The previous version covered too few use cases, more specifically a single subscription. That is of course not the case for many hosts, so osbuild needs to understand subscriptions. When running org.osbuild.curl source, read the /etc/yum.repos.d/redhat.repo file and load the system subscriptions from there. While processing each url, guess which subscription is tied to the url and use the CA certificate, client certificate, and client key associated with this subscription. It must be done this way because the depsolving and fetching of RPMs may be performed on different hosts and the subscription credentials are different in such case. More detailed description of why this approach was chosen is available in osbuild-composer git: https://github.com/osbuild/osbuild-composer/pull/1405
199 lines
5.9 KiB
Python
Executable file
199 lines
5.9 KiB
Python
Executable file
#!/usr/bin/python3
|
|
"""
|
|
Source for downloading files from URLs.
|
|
|
|
The files are indexed by their content hash. Can download files
|
|
that require secrets. The only secret provider currently supported
|
|
is `org.osbuild.rhsm` for downloading Red Hat content that requires
|
|
a subscriptions.
|
|
|
|
Internally use curl to download the files; the files are cached in
|
|
an internal cache. Multiple parallel connections are used to speed
|
|
up the download.
|
|
"""
|
|
|
|
|
|
import concurrent.futures
|
|
import itertools
|
|
import json
|
|
import math
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
|
|
from typing import Dict
|
|
|
|
from osbuild.util.checksum import verify_file
|
|
from osbuild.util.rhsm import Subscriptions
|
|
|
|
|
|
SCHEMA = """
|
|
"additionalProperties": false,
|
|
"definitions": {
|
|
"item": {
|
|
"description": "The files to fetch indexed their content checksum",
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"patternProperties": {
|
|
"(md5|sha1|sha256|sha384|sha512):[0-9a-f]{32,128}": {
|
|
"oneOf": [
|
|
{
|
|
"type": "string",
|
|
"description": "URL to download the file from."
|
|
},
|
|
{
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"required": [
|
|
"url"
|
|
],
|
|
"properties": {
|
|
"url": {
|
|
"type": "string",
|
|
"description": "URL to download the file from."
|
|
},
|
|
"secrets": {
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"required": [
|
|
"name"
|
|
],
|
|
"properties": {
|
|
"name": {
|
|
"type": "string",
|
|
"description": "Name of the secrets provider."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"properties": {
|
|
"items": {"$ref": "#/definitions/item"},
|
|
"urls": {"$ref": "#/definitions/item"}
|
|
},
|
|
"oneOf": [{
|
|
"required": ["items"]
|
|
}, {
|
|
"required": ["urls"]
|
|
}]
|
|
"""
|
|
|
|
|
|
def fetch(url, checksum, directory):
|
|
# Invariant: all files in @directory must be named after their (verified) checksum.
|
|
if os.path.isfile(f"{directory}/{checksum}"):
|
|
return
|
|
|
|
secrets = url.get("secrets")
|
|
url_path = url.get("url")
|
|
# Download to a temporary directory until we have verified the checksum. Use a
|
|
# subdirectory, so we avoid copying across block devices.
|
|
with tempfile.TemporaryDirectory(prefix="osbuild-unverified-file-", dir=directory) as tmpdir:
|
|
# some mirrors are sometimes broken. retry manually, because we could be
|
|
# redirected to a different, working, one on retry.
|
|
start_time = time.monotonic()
|
|
return_code = 0
|
|
for _ in range(20):
|
|
elapsed_time = time.monotonic() - start_time
|
|
if elapsed_time >= 300:
|
|
continue
|
|
curl_command = [
|
|
"curl",
|
|
"--silent",
|
|
"--max-time", f"{int(math.ceil(300 - elapsed_time))}",
|
|
"--connect-timeout", "60",
|
|
"--fail",
|
|
"--location",
|
|
"--output", checksum,
|
|
]
|
|
if secrets:
|
|
if secrets.get('ssl_ca_cert'):
|
|
curl_command.extend(["--cacert", secrets.get('ssl_ca_cert')])
|
|
if secrets.get('ssl_client_cert'):
|
|
curl_command.extend(["--cert", secrets.get('ssl_client_cert')])
|
|
if secrets.get('ssl_client_key'):
|
|
curl_command.extend(["--key", secrets.get('ssl_client_key')])
|
|
# url must follow options
|
|
curl_command.append(url_path)
|
|
|
|
curl = subprocess.run(curl_command, encoding="utf-8", cwd=tmpdir, check=False)
|
|
return_code = curl.returncode
|
|
if return_code == 0:
|
|
break
|
|
else:
|
|
raise RuntimeError(f"curl: error downloading {url}: error code {return_code}")
|
|
|
|
if not verify_file(f"{tmpdir}/{checksum}", checksum):
|
|
raise RuntimeError(f"checksum mismatch: {checksum} {url}")
|
|
|
|
# The checksum has been verified, move the file into place. in case we race
|
|
# another download of the same file, we simply ignore the error as their
|
|
# contents are guaranteed to be the same.
|
|
try:
|
|
os.rename(f"{tmpdir}/{checksum}", f"{directory}/{checksum}")
|
|
except FileExistsError:
|
|
pass
|
|
|
|
|
|
def download(items, cache):
|
|
with concurrent.futures.ProcessPoolExecutor(max_workers=4) as executor:
|
|
requested_urls = []
|
|
subscriptions = None
|
|
|
|
for url in items.values():
|
|
|
|
if not isinstance(url, dict):
|
|
url = {"url": url}
|
|
|
|
# check if url needs rhsm secrets
|
|
if url.get("secrets", {}).get("name") == "org.osbuild.rhsm":
|
|
try:
|
|
# rhsm secrets only need to be retrieved once and can then be reused
|
|
if subscriptions is None:
|
|
subscriptions = Subscriptions.from_host_system()
|
|
url["secrets"] = subscriptions.get_secrets(url.get("url"))
|
|
except RuntimeError as e:
|
|
json.dump({"error": e.args[0]}, sys.stdout)
|
|
return 1
|
|
|
|
requested_urls.append(url)
|
|
|
|
results = executor.map(fetch, requested_urls, items.keys(), itertools.repeat(cache))
|
|
|
|
try:
|
|
for _ in results:
|
|
pass
|
|
except RuntimeError as e:
|
|
json.dump({"error": e.args[0]}, sys.stdout)
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
def main(items: Dict, cache: str):
|
|
cache = os.path.join(cache, "org.osbuild.files")
|
|
|
|
if not items:
|
|
json.dump({}, sys.stdout)
|
|
return 0
|
|
|
|
os.makedirs(cache, exist_ok=True)
|
|
res = download(items, cache)
|
|
if res != 0:
|
|
return res
|
|
|
|
json.dump({}, sys.stdout)
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
args = json.load(sys.stdin)
|
|
r = main(args["items"], args["cache"])
|
|
sys.exit(r)
|