All sources fetch various types of `items`, the specific nature of which is dependent on the source type, but they are all identifyable by a opaque identifier. In order for osbuild to check that all the inputs that a stage needs are are indeed contained in the manifest description, osbuild must learn what ids are fetched by what source. This is done by standarzing the common "items" part, i.e. the "id" -> "options for that id" mapping that is common to all sources. For the version 1 of the format, extract the files and ostree the item information from the respective options. Adapt the sources (files, ostree) so that they use the new items information, but also fall back to the old style; the latter is needed since the sources tests still uses the SourceServer.
253 lines
7.6 KiB
Python
Executable file
253 lines
7.6 KiB
Python
Executable file
#!/usr/bin/python3
|
|
"""
|
|
Source for downloading files from URLs.
|
|
|
|
The files are indexed by their content hash. Can download files
|
|
that require secrets. The only secret provider currently supported
|
|
is `org.osbuild.rhsm` for downloading Red Hat content that requires
|
|
a subscriptions.
|
|
|
|
Internally use curl to download the files; the files are cached in
|
|
an internal cache. Multiple parallel connections are used to speed
|
|
up the download.
|
|
"""
|
|
|
|
|
|
import concurrent.futures
|
|
import glob
|
|
import itertools
|
|
import json
|
|
import math
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
|
|
|
|
SCHEMA = """
|
|
"additionalProperties": false,
|
|
"properties": {
|
|
"urls": {
|
|
"description": "The files to fetch indexed their content checksum",
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"patternProperties": {
|
|
"(md5|sha1|sha256|sha384|sha512):[0-9a-f]{5,64}": {
|
|
"oneOf": [{
|
|
"type": "string",
|
|
"description": "URL to download the file from."
|
|
}, {
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"required": ["url"],
|
|
"properties": {
|
|
"url": {
|
|
"type": "string",
|
|
"description": "URL to download the file from."
|
|
},
|
|
"secrets": {
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"required": ["name"],
|
|
"properties": {
|
|
"name": {
|
|
"type": "string",
|
|
"description": "Name of the secrets provider."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
|
|
|
|
def verify_checksum(filename, checksum):
|
|
algorithm, checksum = checksum.split(":", 1)
|
|
if algorithm not in ("md5", "sha1", "sha256", "sha384", "sha512"):
|
|
raise RuntimeError(f"unsupported checksum algorithm: {algorithm}")
|
|
|
|
ret = subprocess.run(
|
|
[f"{algorithm}sum", "-c"],
|
|
input=f"{checksum} {filename}",
|
|
stdout=subprocess.DEVNULL,
|
|
encoding="utf-8",
|
|
check=False
|
|
)
|
|
|
|
return ret.returncode == 0
|
|
|
|
|
|
def fetch(url, checksum, directory):
|
|
# Invariant: all files in @directory must be named after their (verified) checksum.
|
|
if os.path.isfile(f"{directory}/{checksum}"):
|
|
return
|
|
|
|
secrets = url.get("secrets")
|
|
url_path = url.get("url")
|
|
# Download to a temporary directory until we have verified the checksum. Use a
|
|
# subdirectory, so we avoid copying across block devices.
|
|
with tempfile.TemporaryDirectory(prefix="osbuild-unverified-file-", dir=directory) as tmpdir:
|
|
# some mirrors are sometimes broken. retry manually, because we could be
|
|
# redirected to a different, working, one on retry.
|
|
start_time = time.monotonic()
|
|
return_code = 0
|
|
for _ in range(20):
|
|
elapsed_time = time.monotonic() - start_time
|
|
if elapsed_time >= 300:
|
|
continue
|
|
curl_command = [
|
|
"curl",
|
|
"--silent",
|
|
"--max-time", f"{int(math.ceil(300 - elapsed_time))}",
|
|
"--connect-timeout", "60",
|
|
"--fail",
|
|
"--location",
|
|
"--output", checksum,
|
|
]
|
|
if secrets:
|
|
if secrets.get('ssl_ca_cert'):
|
|
curl_command.extend(["--cacert", secrets.get('ssl_ca_cert')])
|
|
if secrets.get('ssl_client_cert'):
|
|
curl_command.extend(["--cert", secrets.get('ssl_client_cert')])
|
|
if secrets.get('ssl_client_key'):
|
|
curl_command.extend(["--key", secrets.get('ssl_client_key')])
|
|
# url must follow options
|
|
curl_command.append(url_path)
|
|
|
|
curl = subprocess.run(curl_command, encoding="utf-8", cwd=tmpdir, check=False)
|
|
return_code = curl.returncode
|
|
if return_code == 0:
|
|
break
|
|
else:
|
|
raise RuntimeError(f"curl: error downloading {url}: error code {return_code}")
|
|
|
|
if not verify_checksum(f"{tmpdir}/{checksum}", checksum):
|
|
raise RuntimeError(f"checksum mismatch: {checksum} {url}")
|
|
|
|
# The checksum has been verified, move the file into place. in case we race
|
|
# another download of the same file, we simply ignore the error as their
|
|
# contents are guaranteed to be the same.
|
|
try:
|
|
os.rename(f"{tmpdir}/{checksum}", f"{directory}/{checksum}")
|
|
except FileExistsError:
|
|
pass
|
|
|
|
|
|
def get_rhsm_secrets():
|
|
rhsm_secrets = {
|
|
'ssl_ca_cert': "/etc/rhsm/ca/redhat-uep.pem",
|
|
'ssl_client_key': "",
|
|
'ssl_client_cert': ""
|
|
}
|
|
|
|
keys = glob.glob("/etc/pki/entitlement/*-key.pem")
|
|
for key in keys:
|
|
# The key and cert have the same prefix
|
|
cert = key.rstrip("-key.pem") + ".pem"
|
|
# The key is only valid if it has a matching cert
|
|
if os.path.exists(cert):
|
|
rhsm_secrets['ssl_client_key'] = key
|
|
rhsm_secrets['ssl_client_cert'] = cert
|
|
return rhsm_secrets
|
|
|
|
raise RuntimeError("no matching rhsm key and cert")
|
|
|
|
|
|
def download(checksums, urls, cache):
|
|
with concurrent.futures.ProcessPoolExecutor(max_workers=4) as executor:
|
|
requested_urls = []
|
|
rhsm_secrets = None
|
|
|
|
for checksum in checksums:
|
|
try:
|
|
url = urls[checksum]
|
|
except KeyError:
|
|
json.dump({"error": f"unknown file: {checksum}"}, sys.stdout)
|
|
return 1
|
|
|
|
if isinstance(url, dict):
|
|
# check if url needs rhsm secrets
|
|
if url.get("secrets", {}).get("name") == "org.osbuild.rhsm":
|
|
# rhsm secrets only need to be retrieved once and can then be reused
|
|
if rhsm_secrets is None:
|
|
try:
|
|
rhsm_secrets = get_rhsm_secrets()
|
|
except RuntimeError as e:
|
|
json.dump({"error": e.args[0]}, sys.stdout)
|
|
return 1
|
|
url["secrets"] = rhsm_secrets
|
|
else:
|
|
url = {"url": url}
|
|
|
|
requested_urls.append(url)
|
|
|
|
results = executor.map(fetch, requested_urls, checksums, itertools.repeat(cache))
|
|
|
|
try:
|
|
for _ in results:
|
|
pass
|
|
except RuntimeError as e:
|
|
json.dump({"error": e.args[0]}, sys.stdout)
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
def export(checksums, cache, output):
|
|
for checksum in checksums:
|
|
try:
|
|
subprocess.run(
|
|
[
|
|
"cp",
|
|
"--reflink=auto",
|
|
f"{cache}/{checksum}",
|
|
f"{output}/{checksum}",
|
|
],
|
|
check=True,
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
json.dump({"error": e.output}, sys.stdout)
|
|
return 1
|
|
|
|
json.dump({}, sys.stdout)
|
|
return 0
|
|
|
|
|
|
def main(urls, options, checksums, cache, output):
|
|
download_only = not output
|
|
|
|
if not urls:
|
|
urls = options.get("urls", [])
|
|
|
|
if urls:
|
|
if not checksums and download_only:
|
|
checksums = [k for k, _ in urls.items()]
|
|
|
|
os.makedirs(cache, exist_ok=True)
|
|
res = download(checksums, urls, cache)
|
|
if res != 0:
|
|
return res
|
|
|
|
if download_only:
|
|
json.dump({}, sys.stdout)
|
|
return 0
|
|
|
|
os.makedirs(output, exist_ok=True)
|
|
res = export(checksums, cache, output)
|
|
|
|
return res
|
|
|
|
|
|
if __name__ == '__main__':
|
|
args = json.load(sys.stdin)
|
|
r = main(args["items"],
|
|
args["options"],
|
|
args["checksums"],
|
|
args["cache"],
|
|
args.get("output"))
|
|
sys.exit(r)
|