214 lines
7.6 KiB
Python
Executable file
214 lines
7.6 KiB
Python
Executable file
#!/usr/bin/python3
|
|
"""
|
|
Source for downloading files from URLs.
|
|
|
|
The files are indexed by their content hash. It can download files
|
|
that require secrets. The secret providers currently supported are:
|
|
|
|
- `org.osbuild.rhsm` for downloading Red Hat content that requires
|
|
a subscriptions.
|
|
- `org.osbuild.mtls` for downloading content that requires client
|
|
certificats. The paths to the key and cert should be set in the
|
|
environment in OSBUILD_SOURCES_CURL_SSL_CLIENT_KEY,
|
|
OSBUILD_SOURCES_CURL_SSL_CLIENT_CERT, and optionally
|
|
OSBUILD_SOURCES_CURL_SSL_CA_CERT.
|
|
|
|
It uses curl to download the files; the files are cached in an
|
|
internal cache. Multiple parallel connections are used to speed
|
|
up the download.
|
|
"""
|
|
|
|
import concurrent.futures
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import urllib.parse
|
|
from typing import Dict
|
|
|
|
from osbuild import sources
|
|
from osbuild.util.checksum import verify_file
|
|
from osbuild.util.rhsm import Subscriptions
|
|
|
|
SCHEMA = """
|
|
"additionalProperties": false,
|
|
"definitions": {
|
|
"item": {
|
|
"description": "The files to fetch indexed their content checksum",
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"patternProperties": {
|
|
"(md5|sha1|sha256|sha384|sha512):[0-9a-f]{32,128}": {
|
|
"oneOf": [
|
|
{
|
|
"type": "string",
|
|
"description": "URL to download the file from."
|
|
},
|
|
{
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"required": [
|
|
"url"
|
|
],
|
|
"properties": {
|
|
"url": {
|
|
"type": "string",
|
|
"description": "URL to download the file from."
|
|
},
|
|
"insecure": {
|
|
"type": "boolean",
|
|
"description": "Skip the verification step for secure connections and proceed without checking",
|
|
"default": false
|
|
},
|
|
"secrets": {
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"required": [
|
|
"name"
|
|
],
|
|
"properties": {
|
|
"name": {
|
|
"type": "string",
|
|
"description": "Name of the secrets provider."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"properties": {
|
|
"items": {"$ref": "#/definitions/item"},
|
|
"urls": {"$ref": "#/definitions/item"}
|
|
},
|
|
"oneOf": [{
|
|
"required": ["items"]
|
|
}, {
|
|
"required": ["urls"]
|
|
}]
|
|
"""
|
|
|
|
|
|
class CurlSource(sources.SourceService):
|
|
|
|
content_type = "org.osbuild.files"
|
|
|
|
max_workers = 2 * (os.cpu_count() or 1)
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.subscriptions = None
|
|
|
|
def amend_secrets(self, checksum, desc_or_url):
|
|
if not isinstance(desc_or_url, dict):
|
|
desc = {"url": desc_or_url}
|
|
else:
|
|
desc = desc_or_url
|
|
|
|
# check if desc needs rhsm secrets
|
|
if desc.get("secrets", {}).get("name") == "org.osbuild.rhsm":
|
|
# rhsm secrets only need to be retrieved once and can then be reused
|
|
if self.subscriptions is None:
|
|
self.subscriptions = Subscriptions.from_host_system()
|
|
desc["secrets"] = self.subscriptions.get_secrets(desc.get("url"))
|
|
elif desc.get("secrets", {}).get("name") == "org.osbuild.mtls":
|
|
key = os.getenv("OSBUILD_SOURCES_CURL_SSL_CLIENT_KEY")
|
|
cert = os.getenv("OSBUILD_SOURCES_CURL_SSL_CLIENT_CERT")
|
|
if not (key and cert):
|
|
raise RuntimeError(f"mtls secrets required but key ({key}) or cert ({cert}) not defined")
|
|
desc["secrets"] = {
|
|
'ssl_ca_cert': os.getenv("OSBUILD_SOURCES_CURL_SSL_CA_CERT"),
|
|
'ssl_client_cert': cert,
|
|
'ssl_client_key': key,
|
|
}
|
|
|
|
return checksum, desc
|
|
|
|
@staticmethod
|
|
def _quote_url(url: str) -> str:
|
|
purl = urllib.parse.urlparse(url)
|
|
path = urllib.parse.quote(purl.path)
|
|
quoted = purl._replace(path=path)
|
|
return quoted.geturl()
|
|
|
|
def fetch_all(self, items: Dict) -> None:
|
|
filtered = filter(lambda i: not self.exists(i[0], i[1]), items.items()) # discards items already in cache
|
|
amended = map(lambda i: self.amend_secrets(i[0], i[1]), filtered)
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
for _ in executor.map(self.fetch_one, *zip(*amended)):
|
|
pass
|
|
|
|
def fetch_one(self, checksum, desc):
|
|
secrets = desc.get("secrets")
|
|
insecure = desc.get("insecure")
|
|
url = self._quote_url(desc.get("url"))
|
|
proxy = os.getenv("OSBUILD_SOURCES_CURL_PROXY")
|
|
# Download to a temporary sub cache until we have verified the checksum. Use a
|
|
# subdirectory, so we avoid copying across block devices.
|
|
with tempfile.TemporaryDirectory(prefix="osbuild-unverified-file-", dir=self.cache) as tmpdir:
|
|
# some mirrors are sometimes broken. retry manually, because we could be
|
|
# redirected to a different, working, one on retry.
|
|
return_code = 0
|
|
for _ in range(10):
|
|
curl_command = [
|
|
"curl",
|
|
"--silent",
|
|
"--speed-limit", "1000",
|
|
"--connect-timeout", "30",
|
|
"--fail",
|
|
"--location",
|
|
"--output", checksum,
|
|
]
|
|
if proxy:
|
|
curl_command.extend(["--proxy", proxy])
|
|
if secrets:
|
|
if secrets.get('ssl_ca_cert'):
|
|
curl_command.extend(["--cacert", secrets.get('ssl_ca_cert')])
|
|
if secrets.get('ssl_client_cert'):
|
|
curl_command.extend(["--cert", secrets.get('ssl_client_cert')])
|
|
if secrets.get('ssl_client_key'):
|
|
curl_command.extend(["--key", secrets.get('ssl_client_key')])
|
|
|
|
if insecure:
|
|
curl_command.append("--insecure")
|
|
|
|
# url must follow options
|
|
curl_command.append(url)
|
|
|
|
curl = subprocess.run(curl_command, encoding="utf-8", cwd=tmpdir, check=False)
|
|
return_code = curl.returncode
|
|
if return_code == 0:
|
|
break
|
|
else:
|
|
raise RuntimeError(f"curl: error downloading {url}: error code {return_code}")
|
|
|
|
if not verify_file(f"{tmpdir}/{checksum}", checksum):
|
|
raise RuntimeError(f"checksum mismatch: {checksum} {url}")
|
|
|
|
# The checksum has been verified, move the file into place. in case we race
|
|
# another download of the same file, we simply ignore the error as their
|
|
# contents are guaranteed to be the same.
|
|
try:
|
|
os.rename(f"{tmpdir}/{checksum}", f"{self.cache}/{checksum}")
|
|
except FileExistsError:
|
|
pass
|
|
# Workaround the lack of structured progress reporting from
|
|
# stages/sources. It generates messages of the form
|
|
# "message": "source/org.osbuild.curl (org.osbuild.curl): Downloaded https://rpmrepo.osbuild.org/v2/mirror/public/f38/f38-x86_64-fedora-20230413/Packages/f/fonts-srpm-macros-2.0.5-11.fc38.noarch.rpm\n
|
|
#
|
|
# Without it just a long pause with no progress while curl
|
|
# downloads.
|
|
print(f"Downloaded {url}")
|
|
|
|
|
|
def main():
|
|
service = CurlSource.from_args(sys.argv[1:])
|
|
service.main()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|