sources/curl: quote URL paths before downloading

Some package versions [1] can contain carets and other characters that
curl doesn't like.  These need to be URL encoded.

Interestingly, the documented way of replacing components in a parsed
URL from urllib in Python is by calling the (seemingly private)
`_replace()` method [2].

[1] https://docs.fedoraproject.org/en-US/packaging-guidelines/Versioning/#_snapshots
[2] https://docs.python.org/3/library/urllib.parse.html#url-parsing
This commit is contained in:
Achilleas Koutsou 2022-08-31 20:02:21 +02:00 committed by Tom Gundersen
parent 1782f2fea9
commit f699720dbd

View file

@ -17,6 +17,7 @@ import os
import subprocess
import sys
import tempfile
import urllib.parse
from osbuild import sources
@ -110,10 +111,17 @@ class CurlSource(sources.SourceService):
return checksum, url
@staticmethod
def _quote_url(url: str) -> str:
purl = urllib.parse.urlparse(url)
path = urllib.parse.quote(purl.path)
quoted = purl._replace(path=path)
return quoted.geturl()
def fetch_one(self, checksum, desc):
secrets = desc.get("secrets")
insecure = desc.get("insecure")
url = desc.get("url")
url = self._quote_url(desc.get("url"))
# Download to a temporary sub cache until we have verified the checksum. Use a
# subdirectory, so we avoid copying across block devices.
with tempfile.TemporaryDirectory(prefix="osbuild-unverified-file-", dir=self.cache) as tmpdir: