From 46db834dee455e3657bf328c610572d24472bc48 Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Thu, 11 Jul 2024 16:58:40 +0200 Subject: [PATCH] sources(curl): use json like output inside of custom record When using `--write-out` we are not using %{json} because older curl (7.76) will write {"http_connect":000} which python cannot parse. So we had a custom `--write-out` with `\1xc` as "record" separators between the fields. This is a bit old-school and not very extensible so Achilleas had the idea to still use json but "define" our own subset via the variables that curl provides. This commit does that. --- sources/org.osbuild.curl | 37 ++++++++++++++++---------------- sources/test/test_curl_source.py | 32 +++++++++++++++++++++++++-- 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/sources/org.osbuild.curl b/sources/org.osbuild.curl index c858ff61..8071dcba 100755 --- a/sources/org.osbuild.curl +++ b/sources/org.osbuild.curl @@ -20,6 +20,7 @@ up the download. import concurrent.futures import contextlib +import json import os import pathlib import platform @@ -96,6 +97,11 @@ SCHEMA = """ }] """ +# We are not just using %{json} here because older curl (7.76) will +# write {"http_connect":000} which python cannot parse so we write our +# own json subset +CURL_WRITE_OUT = r'\{\"url\": \"%{url}\"\, \"filename_effective\": \"%{filename_effective}\", \"exitcode\": %{exitcode}, \"errormsg\": \"%{errormsg}\" \}\n' + def curl_has_parallel_downloads(): """ @@ -131,9 +137,11 @@ def _quote_url(url: str) -> str: return quoted.geturl() -def gen_curl_download_config(config_path: pathlib.Path, chksum_desc_tuple: List[Tuple[str, Dict]]): +def gen_curl_download_config(config_path: pathlib.Path, chksum_desc_tuple: List[Tuple[str, Dict]], parallel=False): with open(config_path, "w", encoding="utf8") as fp: # global options + if parallel: + fp.write("parallel\n") fp.write(textwrap.dedent(f"""\ user-agent = "osbuild (Linux.{platform.machine()}; https://osbuild.org/)" silent @@ -142,6 +150,11 @@ def gen_curl_download_config(config_path: pathlib.Path, chksum_desc_tuple: List[ fail location """)) + if parallel: + fp.write(textwrap.dedent(f"""\ + write-out = "{CURL_WRITE_OUT}" + """)) + proxy = os.getenv("OSBUILD_SOURCES_CURL_PROXY") if proxy: fp.write(f'proxy = "{proxy}"\n') @@ -171,18 +184,11 @@ def gen_curl_download_config(config_path: pathlib.Path, chksum_desc_tuple: List[ def try_parse_curl_line(line): - line = line.strip() - print(line) - if not line.startswith("osbuild-dl\x1c"): - print(f"WARNING: unexpected prefix in {line}", file=sys.stderr) + try: + return json.loads(line.strip()) + except Exception as e: # pylint: disable=broad-exception-caught + print(f"WARNING: cannot json parse {line} {e}", file=sys.stderr) return None - _, url, filename, exitcode, errormsg = line.split("\x1c") - return { - "url": url, - "filename_effective": filename, - "exitcode": int(exitcode), - "errormsg": errormsg, - } def validate_and_move_to_targetdir(tmpdir, targetdir, checksum, origin): @@ -203,17 +209,12 @@ def validate_and_move_to_targetdir(tmpdir, targetdir, checksum, origin): def fetch_many_new_curl(tmpdir, targetdir, dl_pairs): curl_config_path = f"{tmpdir}/curl-config.txt" - gen_curl_download_config(curl_config_path, dl_pairs) + gen_curl_download_config(curl_config_path, dl_pairs, parallel=True) curl_command = [ "curl", "--config", curl_config_path, # this adds a bunch of noise but might be nice for debug? # "--show-error", - "--parallel", - # this will write out a "record" for each finished download - # Not using %{json} here because older curl (7.76) will write - # {"http_connect":000} which python cannot parse - "--write-out", "osbuild-dl\x1c%{url}\x1c%{filename_effective}\x1c%{exitcode}\x1cerror: %{errormsg}\n", ] with contextlib.ExitStack() as cm: curl_p = subprocess.Popen(curl_command, encoding="utf-8", cwd=tmpdir, stdout=subprocess.PIPE) diff --git a/sources/test/test_curl_source.py b/sources/test/test_curl_source.py index 5a591a9e..3a508e06 100644 --- a/sources/test/test_curl_source.py +++ b/sources/test/test_curl_source.py @@ -241,9 +241,15 @@ TEST_SOURCE_PAIRS_GEN_DOWNLOAD_CONFIG = [ ] -def test_curl_gen_download_config(tmp_path, sources_module): +def test_curl_gen_download_config_old_curl(tmp_path, sources_module): config_path = tmp_path / "curl-config.txt" - sources_module.gen_curl_download_config(config_path, TEST_SOURCE_PAIRS_GEN_DOWNLOAD_CONFIG) + sources_module.gen_curl_download_config(config_path, [( + # sha256("0") + "sha256:5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91b46729d73a27fb57e9", + { + "url": "http://example.com/file/0", + }, + )]) assert config_path.exists() assert config_path.read_text(encoding="utf8") == textwrap.dedent(f"""\ @@ -258,6 +264,28 @@ def test_curl_gen_download_config(tmp_path, sources_module): output = "sha256:5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91b46729d73a27fb57e9" no-insecure + """) + + +def test_curl_gen_download_config_parallel(tmp_path, sources_module): + config_path = tmp_path / "curl-config.txt" + sources_module.gen_curl_download_config(config_path, TEST_SOURCE_PAIRS_GEN_DOWNLOAD_CONFIG, parallel=True) + + assert config_path.exists() + assert config_path.read_text(encoding="utf8") == textwrap.dedent(f"""\ + parallel + user-agent = "osbuild (Linux.{platform.machine()}; https://osbuild.org/)" + silent + speed-limit = 1000 + connect-timeout = 30 + fail + location + write-out = "{sources_module.CURL_WRITE_OUT}" + + url = "http://example.com/file/0" + output = "sha256:5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91b46729d73a27fb57e9" + no-insecure + url = "http://example.com/file/1" output = "sha256:6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" insecure