sources(curl): manually keep track of failed URLs
This commit keeps track of individual errors as curl will only report the last download operation success/failure via it's exit code. There is "--fail-early" but the downside of that is that abort all in progress downloads too.
This commit is contained in:
parent
a50dbb14c2
commit
a229d46b1e
2 changed files with 16 additions and 8 deletions
|
|
@ -229,6 +229,7 @@ def fetch_many_new_curl(tmpdir, targetdir, dl_pairs):
|
||||||
curl_p = subprocess.Popen(curl_command, encoding="utf-8", cwd=tmpdir, stdout=subprocess.PIPE)
|
curl_p = subprocess.Popen(curl_command, encoding="utf-8", cwd=tmpdir, stdout=subprocess.PIPE)
|
||||||
# ensure that curl is killed even if an unexpected exit happens
|
# ensure that curl is killed even if an unexpected exit happens
|
||||||
cm.callback(curl_p.kill)
|
cm.callback(curl_p.kill)
|
||||||
|
errors = []
|
||||||
while True:
|
while True:
|
||||||
line = curl_p.stdout.readline()
|
line = curl_p.stdout.readline()
|
||||||
# empty line means eof/process finished
|
# empty line means eof/process finished
|
||||||
|
|
@ -238,10 +239,13 @@ def fetch_many_new_curl(tmpdir, targetdir, dl_pairs):
|
||||||
if not dl_details:
|
if not dl_details:
|
||||||
continue
|
continue
|
||||||
url = dl_details['url']
|
url = dl_details['url']
|
||||||
# ignore individual download errors, the overall exit status will
|
# Keep track of individual errors as curl will only report
|
||||||
# reflect them and the caller can retry
|
# the last download operation success/failure via the global
|
||||||
|
# exit code. There is "--fail-early" but the downside of that
|
||||||
|
# is that abort all in progress downloads too.
|
||||||
if dl_details["exitcode"] != 0:
|
if dl_details["exitcode"] != 0:
|
||||||
print(f"WARNING: failed to download {url}: {dl_details['errormsg']}", file=sys.stderr)
|
print(f"WARNING: failed to download {url}: {dl_details['errormsg']}", file=sys.stderr)
|
||||||
|
errors.append(f'{url}: error code {dl_details["exitcode"]}')
|
||||||
continue
|
continue
|
||||||
# the way downloads are setup the filename is the expected hash
|
# the way downloads are setup the filename is the expected hash
|
||||||
# so validate now and move into place
|
# so validate now and move into place
|
||||||
|
|
@ -260,7 +264,10 @@ def fetch_many_new_curl(tmpdir, targetdir, dl_pairs):
|
||||||
print(f"Downloaded {url}")
|
print(f"Downloaded {url}")
|
||||||
# return overall download status (this will be an error if any
|
# return overall download status (this will be an error if any
|
||||||
# transfer failed)
|
# transfer failed)
|
||||||
return curl_p.wait()
|
curl_exit_code = curl_p.wait()
|
||||||
|
if not errors and curl_exit_code > 0:
|
||||||
|
errors.append("curl exited non-zero but reported no errors")
|
||||||
|
return errors
|
||||||
|
|
||||||
|
|
||||||
class CurlSource(sources.SourceService):
|
class CurlSource(sources.SourceService):
|
||||||
|
|
@ -320,12 +327,12 @@ class CurlSource(sources.SourceService):
|
||||||
# redirected to a different, working, one on retry.
|
# redirected to a different, working, one on retry.
|
||||||
return_code = 0
|
return_code = 0
|
||||||
for _ in range(NR_RETRYS):
|
for _ in range(NR_RETRYS):
|
||||||
return_code = fetch_many_new_curl(tmpdir, self.cache, dl_pairs)
|
errors = fetch_many_new_curl(tmpdir, self.cache, dl_pairs)
|
||||||
if return_code == 0:
|
if not errors:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
failed_urls = ",".join([itm[1]["url"] for itm in dl_pairs])
|
details = ",".join(errors)
|
||||||
raise RuntimeError(f"curl: error downloading {failed_urls}: error code {return_code}")
|
raise RuntimeError(f"curl: error downloading {details}")
|
||||||
|
|
||||||
if len(dl_pairs) > 0:
|
if len(dl_pairs) > 0:
|
||||||
raise RuntimeError(f"curl: finished with return_code {return_code} but {dl_pairs} left to download")
|
raise RuntimeError(f"curl: finished with return_code {return_code} but {dl_pairs} left to download")
|
||||||
|
|
|
||||||
|
|
@ -102,7 +102,8 @@ def test_curl_download_many_fail(curl_parallel):
|
||||||
}
|
}
|
||||||
with pytest.raises(RuntimeError) as exp:
|
with pytest.raises(RuntimeError) as exp:
|
||||||
curl_parallel.fetch_all(TEST_SOURCES)
|
curl_parallel.fetch_all(TEST_SOURCES)
|
||||||
assert str(exp.value) == 'curl: error downloading http://localhost:9876/random-not-exists: error code 7'
|
assert str(exp.value).startswith("curl: error downloading http://localhost:9876/random-not-exists")
|
||||||
|
assert 'http://localhost:9876/random-not-exists: error code 7' in str(exp.value)
|
||||||
|
|
||||||
|
|
||||||
def make_test_sources(fake_httpd_root, port, n_files, start_n=0, cacert=""):
|
def make_test_sources(fake_httpd_root, port, n_files, start_n=0, cacert=""):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue