sources: add org.osbuild.files source
This source adds support for downloaded files. The files are indexed by their content hash, and the only option is their URL. The main usecase for this will be downloading rpms. Allowing depsolving to be done outside of osbuild, network access to be restricted and downloaded rpms to be reused between runs. Each source is now passed two additional arguments, a cache directory and an output directory. Both are in the source's namespace, and the source is responsible for managing them. Each directory may contain contents from previous runs, but neither is ever guaranteed to do so. Downloaded contents may be saved to the cache and resued between runs, and the requested content should be written to the output dir. If secrets are used, the source must only ever write contents to the output that corresponds to the available secrets (rather than contents from the cache from previous runs). Each stage is passed an additional argument, a sources directory. The directory is read-only, and contains a subdirectory named after each used source, which will contain the requseted contents when the `Get()` call returns (if the source uses this functionality). Based on a patch by Lars Karlitski. Signed-off-by: Tom Gundersen <teg@jklm.no>
This commit is contained in:
parent
794ec97bf3
commit
7817ae5e8b
38 changed files with 348 additions and 10 deletions
108
sources/org.osbuild.files
Executable file
108
sources/org.osbuild.files
Executable file
|
|
@ -0,0 +1,108 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import concurrent.futures
|
||||
import itertools
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
|
||||
def verify_checksum(filename, checksum):
|
||||
algorithm, checksum = checksum.split(":", 1)
|
||||
if algorithm not in ("md5", "sha1", "sha256", "sha384", "sha512"):
|
||||
raise RuntimeError(f"unsupported checksum algorithm: {algorithm}")
|
||||
|
||||
ret = subprocess.run(
|
||||
[f"{algorithm}sum", "-c"],
|
||||
input=f"{checksum} {filename}",
|
||||
stdout=subprocess.DEVNULL,
|
||||
encoding="utf-8",
|
||||
check=False
|
||||
)
|
||||
|
||||
return ret.returncode == 0
|
||||
|
||||
|
||||
def fetch(url, checksum, directory):
|
||||
# Invariant: all files in @directory must be named after their (verified) checksum.
|
||||
if os.path.isfile(f"{directory}/{checksum}"):
|
||||
return
|
||||
|
||||
# Download to a temporary directory until we have verified the checksum. Use a
|
||||
# subdirectory, so we avoid copying accross block devices.
|
||||
with tempfile.TemporaryDirectory(prefix="osbuild-unverified-file-", dir=directory) as tmpdir:
|
||||
# some mirrors are broken sometimes. retry manually, because curl doesn't on 404
|
||||
for _ in range(3):
|
||||
curl = subprocess.run([
|
||||
"curl",
|
||||
"--silent",
|
||||
"--show-error",
|
||||
"--fail",
|
||||
"--location",
|
||||
"--output", checksum,
|
||||
url
|
||||
], encoding="utf-8", cwd=tmpdir, check=False)
|
||||
if curl.returncode == 0:
|
||||
break
|
||||
else:
|
||||
raise RuntimeError(f"error downloading {url}")
|
||||
|
||||
if not verify_checksum(f"{tmpdir}/{checksum}", checksum):
|
||||
raise RuntimeError(f"checksum mismatch: {checksum} {url}")
|
||||
|
||||
# The checksum has been verified, move the file into place. in case we race
|
||||
# another download of the same file, we simply ignore the error as their
|
||||
# contents are guaranteed to be the same.
|
||||
try:
|
||||
os.rename(f"{tmpdir}/{checksum}", f"{directory}/{checksum}")
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
|
||||
def main(options, checksums, cache, output):
|
||||
urls = options.get("urls", {})
|
||||
|
||||
os.makedirs(cache, exist_ok=True)
|
||||
os.makedirs(output, exist_ok=True)
|
||||
|
||||
with concurrent.futures.ProcessPoolExecutor(max_workers=10) as executor:
|
||||
requested_urls = []
|
||||
for checksum in checksums:
|
||||
try:
|
||||
requested_urls.append(urls[checksum])
|
||||
except KeyError:
|
||||
json.dump({"error": f"unknown file: {checksum}"}, sys.stdout)
|
||||
return 1
|
||||
results = executor.map(fetch, requested_urls, checksums, itertools.repeat(cache))
|
||||
|
||||
try:
|
||||
for _ in results:
|
||||
pass
|
||||
except RuntimeError as e:
|
||||
json.dump({"error": e.args[0]}, sys.stdout)
|
||||
return 1
|
||||
|
||||
for checksum in checksums:
|
||||
try:
|
||||
subprocess.run([
|
||||
"cp",
|
||||
"--reflink=auto",
|
||||
f"{cache}/{checksum}",
|
||||
f"{output}/{checksum}"],
|
||||
check=True)
|
||||
except FileExistsError:
|
||||
continue
|
||||
except Exception as e:
|
||||
json.dump({"error": e.message}, sys.stdout)
|
||||
return 1
|
||||
|
||||
json.dump({}, sys.stdout)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = json.load(sys.stdin)
|
||||
r = main(args["options"], args["checksums"], args["cache"], args["output"])
|
||||
sys.exit(r)
|
||||
Loading…
Add table
Add a link
Reference in a new issue