debian-forge/sources/org.osbuild.files
David Rheinsberg 707ff8c988 sources: keep try-except block small
We used to have a try-except block to catch URL requests that are not in
`urls`. This block has since then grown way bigger than it should be. We
may now accidentally catch KeyError exceptions from lots of other
places.

This commit extracts the accessor of `urls[checksum]` and saves the
result in a local variable and makes the remainder use that variable.
2020-05-28 11:06:05 +02:00

163 lines
5.3 KiB
Python
Executable file

#!/usr/bin/python3
import concurrent.futures
import glob
import itertools
import json
import os
import subprocess
import sys
import tempfile
def verify_checksum(filename, checksum):
algorithm, checksum = checksum.split(":", 1)
if algorithm not in ("md5", "sha1", "sha256", "sha384", "sha512"):
raise RuntimeError(f"unsupported checksum algorithm: {algorithm}")
ret = subprocess.run(
[f"{algorithm}sum", "-c"],
input=f"{checksum} {filename}",
stdout=subprocess.DEVNULL,
encoding="utf-8",
check=False
)
return ret.returncode == 0
def fetch(url, checksum, directory):
# Invariant: all files in @directory must be named after their (verified) checksum.
if os.path.isfile(f"{directory}/{checksum}"):
return
secrets = url.get("secrets")
url_path = url.get("url")
# Download to a temporary directory until we have verified the checksum. Use a
# subdirectory, so we avoid copying accross block devices.
with tempfile.TemporaryDirectory(prefix="osbuild-unverified-file-", dir=directory) as tmpdir:
# some mirrors are broken sometimes. retry manually, because curl doesn't on 404
for i in range(10):
curl_command = [
"curl",
"--silent",
"--max-time", f"{30 + i*15}",
"--connect-timeout", "10",
"--show-error",
"--fail",
"--location",
"--output", checksum,
]
if secrets:
if secrets.get('ssl_ca_cert'):
curl_command.extend(["--cacert", secrets.get('ssl_ca_cert')])
if secrets.get('ssl_client_cert'):
curl_command.extend(["--cert", secrets.get('ssl_client_cert')])
if secrets.get('ssl_client_key'):
curl_command.extend(["--key", secrets.get('ssl_client_key')])
# url must follow options
curl_command.append(url_path)
curl = subprocess.run(curl_command, encoding="utf-8", cwd=tmpdir, check=False)
if curl.returncode == 0:
break
else:
raise RuntimeError(f"error downloading {url}")
if not verify_checksum(f"{tmpdir}/{checksum}", checksum):
raise RuntimeError(f"checksum mismatch: {checksum} {url}")
# The checksum has been verified, move the file into place. in case we race
# another download of the same file, we simply ignore the error as their
# contents are guaranteed to be the same.
try:
os.rename(f"{tmpdir}/{checksum}", f"{directory}/{checksum}")
except FileExistsError:
pass
def get_rhsm_secrets():
rhsm_secrets = {
'ssl_ca_cert': "/etc/rhsm/ca/redhat-uep.pem",
'ssl_client_key': "",
'ssl_client_cert': ""
}
keys = glob.glob("/etc/pki/entitlement/*-key.pem")
for key in keys:
# The key and cert have the same prefix
cert = key.rstrip("-key.pem") + ".pem"
# The key is only valid if it has a matching cert
if os.path.exists(cert):
rhsm_secrets['ssl_client_key'] = key
rhsm_secrets['ssl_client_cert'] = cert
return rhsm_secrets
raise RuntimeError("no matching rhsm key and cert")
def main(options, checksums, cache, output):
urls = options.get("urls", {})
os.makedirs(cache, exist_ok=True)
os.makedirs(output, exist_ok=True)
with concurrent.futures.ProcessPoolExecutor(max_workers=15) as executor:
requested_urls = []
rhsm_secrets = None
for checksum in checksums:
try:
url = urls[checksum]
except KeyError:
json.dump({"error": f"unknown file: {checksum}"}, sys.stdout)
return 1
if isinstance(url, dict):
# check if url needs rhsm secrets
if url.get("secrets", {}).get("name") == "org.osbuild.rhsm":
# rhsm secrets only need to be retrieved once and can then be reused
if rhsm_secrets is None:
try:
rhsm_secrets = get_rhsm_secrets()
except RuntimeError as e:
json.dump({"error": e.args[0]}, sys.stdout)
return 1
url["secrets"] = rhsm_secrets
else:
url = {"url": url}
requested_urls.append(url)
results = executor.map(fetch, requested_urls, checksums, itertools.repeat(cache))
try:
for _ in results:
pass
except RuntimeError as e:
json.dump({"error": e.args[0]}, sys.stdout)
return 1
for checksum in checksums:
try:
subprocess.run(
[
"cp",
"--reflink=auto",
f"{cache}/{checksum}",
f"{output}/{checksum}",
],
check=True,
)
except subprocess.CalledProcessError as e:
json.dump({"error": e.output}, sys.stdout)
return 1
json.dump({}, sys.stdout)
return 0
if __name__ == '__main__':
args = json.load(sys.stdin)
r = main(args["options"], args["checksums"], args["cache"], args["output"])
sys.exit(r)