Some RPMs might be very large, and limiting the total download time might lead to failed build even in cases where downloading is making progress. Instead, set a minimum download speed (1kbps). If the minimum is not surpassed for 30 seconds in a row, the download fails and is retried. This follows the logic employed by DNF. Adjust the number of retries to 10 and the connection timeout to 30, in order to match what DNF does. One difference is that DNF does 10 retries across all downloads, whereas we do it per download, this could be changed in a follow-up. Old: - a download taking more than 5 minutes is unconditionally aborted New: - slow but working downloads will never be aborted - downloads will be stalled for at most five minutes in total before being aborted - time spent making progress does not count towards the five minutes Signed-off-by: Tom Gundersen <teg@jklm.no>
181 lines
5.6 KiB
Python
Executable file
181 lines
5.6 KiB
Python
Executable file
#!/usr/bin/python3
|
|
"""
|
|
Source for downloading files from URLs.
|
|
|
|
The files are indexed by their content hash. Can download files
|
|
that require secrets. The only secret provider currently supported
|
|
is `org.osbuild.rhsm` for downloading Red Hat content that requires
|
|
a subscriptions.
|
|
|
|
Internally use curl to download the files; the files are cached in
|
|
an internal cache. Multiple parallel connections are used to speed
|
|
up the download.
|
|
"""
|
|
|
|
|
|
import concurrent.futures
|
|
import itertools
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
|
|
from osbuild import sources
|
|
|
|
from osbuild.util.checksum import verify_file
|
|
from osbuild.util.rhsm import Subscriptions
|
|
|
|
|
|
SCHEMA = """
|
|
"additionalProperties": false,
|
|
"definitions": {
|
|
"item": {
|
|
"description": "The files to fetch indexed their content checksum",
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"patternProperties": {
|
|
"(md5|sha1|sha256|sha384|sha512):[0-9a-f]{32,128}": {
|
|
"oneOf": [
|
|
{
|
|
"type": "string",
|
|
"description": "URL to download the file from."
|
|
},
|
|
{
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"required": [
|
|
"url"
|
|
],
|
|
"properties": {
|
|
"url": {
|
|
"type": "string",
|
|
"description": "URL to download the file from."
|
|
},
|
|
"secrets": {
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"required": [
|
|
"name"
|
|
],
|
|
"properties": {
|
|
"name": {
|
|
"type": "string",
|
|
"description": "Name of the secrets provider."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"properties": {
|
|
"items": {"$ref": "#/definitions/item"},
|
|
"urls": {"$ref": "#/definitions/item"}
|
|
},
|
|
"oneOf": [{
|
|
"required": ["items"]
|
|
}, {
|
|
"required": ["urls"]
|
|
}]
|
|
"""
|
|
|
|
|
|
def fetch(url, checksum, directory):
|
|
secrets = url.get("secrets")
|
|
url_path = url.get("url")
|
|
# Download to a temporary directory until we have verified the checksum. Use a
|
|
# subdirectory, so we avoid copying across block devices.
|
|
with tempfile.TemporaryDirectory(prefix="osbuild-unverified-file-", dir=directory) as tmpdir:
|
|
# some mirrors are sometimes broken. retry manually, because we could be
|
|
# redirected to a different, working, one on retry.
|
|
return_code = 0
|
|
for _ in range(10):
|
|
curl_command = [
|
|
"curl",
|
|
"--silent",
|
|
"--speed-limit", "1000",
|
|
"--connect-timeout", "30",
|
|
"--fail",
|
|
"--location",
|
|
"--output", checksum,
|
|
]
|
|
if secrets:
|
|
if secrets.get('ssl_ca_cert'):
|
|
curl_command.extend(["--cacert", secrets.get('ssl_ca_cert')])
|
|
if secrets.get('ssl_client_cert'):
|
|
curl_command.extend(["--cert", secrets.get('ssl_client_cert')])
|
|
if secrets.get('ssl_client_key'):
|
|
curl_command.extend(["--key", secrets.get('ssl_client_key')])
|
|
# url must follow options
|
|
curl_command.append(url_path)
|
|
|
|
curl = subprocess.run(curl_command, encoding="utf-8", cwd=tmpdir, check=False)
|
|
return_code = curl.returncode
|
|
if return_code == 0:
|
|
break
|
|
else:
|
|
raise RuntimeError(f"curl: error downloading {url}: error code {return_code}")
|
|
|
|
if not verify_file(f"{tmpdir}/{checksum}", checksum):
|
|
raise RuntimeError(f"checksum mismatch: {checksum} {url}")
|
|
|
|
# The checksum has been verified, move the file into place. in case we race
|
|
# another download of the same file, we simply ignore the error as their
|
|
# contents are guaranteed to be the same.
|
|
try:
|
|
os.rename(f"{tmpdir}/{checksum}", f"{directory}/{checksum}")
|
|
except FileExistsError:
|
|
pass
|
|
|
|
|
|
def download(items, cache):
|
|
with concurrent.futures.ProcessPoolExecutor(max_workers=4) as executor:
|
|
requested_urls = []
|
|
requested_checksums = []
|
|
subscriptions = None
|
|
|
|
for (checksum, url) in items.items():
|
|
|
|
# Invariant: all files in @directory must be named after their (verified) checksum.
|
|
# Check this before secrets so that if everything is pre-downloaded we don't need secrets
|
|
if os.path.isfile(f"{cache}/{checksum}"):
|
|
continue
|
|
|
|
if not isinstance(url, dict):
|
|
url = {"url": url}
|
|
|
|
# check if url needs rhsm secrets
|
|
if url.get("secrets", {}).get("name") == "org.osbuild.rhsm":
|
|
# rhsm secrets only need to be retrieved once and can then be reused
|
|
if subscriptions is None:
|
|
subscriptions = Subscriptions.from_host_system()
|
|
url["secrets"] = subscriptions.get_secrets(url.get("url"))
|
|
|
|
requested_urls.append(url)
|
|
requested_checksums.append(checksum)
|
|
|
|
results = executor.map(fetch, requested_urls, requested_checksums, itertools.repeat(cache))
|
|
|
|
for _ in results:
|
|
pass
|
|
|
|
|
|
class CurlSource(sources.SourceService):
|
|
|
|
def download(self, items, cache, _options):
|
|
cache = os.path.join(cache, "org.osbuild.files")
|
|
os.makedirs(cache, exist_ok=True)
|
|
|
|
download(items, cache)
|
|
|
|
|
|
def main():
|
|
service = CurlSource.from_args(sys.argv[1:])
|
|
service.main()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|