diff --git a/osbuild.spec b/osbuild.spec index c59774a5..17290c58 100644 --- a/osbuild.spec +++ b/osbuild.spec @@ -210,6 +210,10 @@ install -p -m 0755 data/10-osbuild-inhibitor.rules %{buildroot}%{_udevrulesdir} mkdir -p %{buildroot}%{_libexecdir} install -p -m 0755 tools/osbuild-depsolve-dnf %{buildroot}%{_libexecdir}/osbuild-depsolve-dnf +# Configure the solver for dnf +mkdir -p %{buildroot}%{_datadir}/osbuild +install -p -m 0644 tools/solver-dnf.json %{buildroot}%{pkgdir}/solver.json + %check exit 0 # We have some integration tests, but those require running a VM, so that would @@ -284,6 +288,7 @@ fi %files depsolve-dnf %{_libexecdir}/osbuild-depsolve-dnf +%{pkgdir}/solver.json %changelog * Mon Aug 19 2019 Miro HronĨok - 1-3 diff --git a/osbuild/solver/__init__.py b/osbuild/solver/__init__.py new file mode 100755 index 00000000..27b3c3e7 --- /dev/null +++ b/osbuild/solver/__init__.py @@ -0,0 +1,78 @@ +import abc +import os +import urllib.error +import urllib.parse +import urllib.request + + +class Solver(abc.ABC): + @abc.abstractmethod + def dump(self): + pass + + @abc.abstractmethod + def depsolve(self, arguments): + pass + + @abc.abstractmethod + def search(self, args): + pass + + +class SolverBase(Solver): + # put any shared helpers in here + pass + + +class SolverException(Exception): + pass + + +class GPGKeyReadError(SolverException): + pass + + +class TransactionError(SolverException): + pass + + +class RepoError(SolverException): + pass + + +class MarkingError(SolverException): + pass + + +class DepsolveError(SolverException): + pass + + +def modify_rootdir_path(path, root_dir): + if path and root_dir: + # if the root_dir is set, we need to translate the key path to be under this directory + return os.path.join(root_dir, path.lstrip("/")) + return path + + +def read_keys(paths, root_dir=None): + keys = [] + for path in paths: + url = urllib.parse.urlparse(path) + if url.scheme == "file": + path = url.path + path = modify_rootdir_path(path, root_dir) + try: + with open(path, mode="r", encoding="utf-8") as keyfile: + keys.append(keyfile.read()) + except Exception as e: + raise GPGKeyReadError(f"error loading gpg key from {path}: {e}") from e + elif url.scheme in ["http", "https"]: + try: + resp = urllib.request.urlopen(urllib.request.Request(path)) + keys.append(resp.read().decode()) + except urllib.error.URLError as e: + raise GPGKeyReadError(f"error reading remote gpg key at {path}: {e}") from e + else: + raise GPGKeyReadError(f"unknown url scheme for gpg key: {url.scheme} ({path})") + return keys diff --git a/osbuild/solver/dnf.py b/osbuild/solver/dnf.py new file mode 100755 index 00000000..3c96f8d1 --- /dev/null +++ b/osbuild/solver/dnf.py @@ -0,0 +1,312 @@ +import os +import os.path +import tempfile +from datetime import datetime +from typing import List + +import dnf +import hawkey + +from osbuild.solver import DepsolveError, MarkingError, RepoError, SolverBase, modify_rootdir_path, read_keys + + +class DNF(SolverBase): + def __init__(self, request, persistdir, cache_dir): + arch = request["arch"] + releasever = request.get("releasever") + module_platform_id = request["module_platform_id"] + proxy = request.get("proxy") + + arguments = request["arguments"] + repos = arguments.get("repos", []) + root_dir = arguments.get("root_dir") + + self.base = dnf.Base() + + # Enable fastestmirror to ensure we choose the fastest mirrors for + # downloading metadata (when depsolving) and downloading packages. + self.base.conf.fastestmirror = True + + # We use the same cachedir for multiple architectures. Unfortunately, + # this is something that doesn't work well in certain situations + # with zchunk: + # Imagine that we already have cache for arch1. Then, we use dnf-json + # to depsolve for arch2. If ZChunk is enabled and available (that's + # the case for Fedora), dnf will try to download only differences + # between arch1 and arch2 metadata. But, as these are completely + # different, dnf must basically redownload everything. + # For downloding deltas, zchunk uses HTTP range requests. Unfortunately, + # if the mirror doesn't support multi range requests, then zchunk will + # download one small segment per a request. Because we need to update + # the whole metadata (10s of MB), this can be extremely slow in some cases. + # I think that we can come up with a better fix but let's just disable + # zchunk for now. As we are already downloading a lot of data when + # building images, I don't care if we download even more. + self.base.conf.zchunk = False + + # Set the rest of the dnf configuration. + self.base.conf.module_platform_id = module_platform_id + self.base.conf.config_file_path = "/dev/null" + self.base.conf.persistdir = persistdir + self.base.conf.cachedir = cache_dir + self.base.conf.substitutions['arch'] = arch + self.base.conf.substitutions['basearch'] = dnf.rpm.basearch(arch) + self.base.conf.substitutions['releasever'] = releasever + if hasattr(self.base.conf, "optional_metadata_types"): + # the attribute doesn't exist on older versions of dnf; ignore the option when not available + self.base.conf.optional_metadata_types.extend(arguments.get("optional-metadata", [])) + if proxy: + self.base.conf.proxy = proxy + + try: + req_repo_ids = set() + for repo in repos: + self.base.repos.add(self._dnfrepo(repo, self.base.conf)) + # collect repo IDs from the request to separate them from the ones loaded from a root_dir + req_repo_ids.add(repo["id"]) + + if root_dir: + # This sets the varsdir to ("{root_dir}/etc/yum/vars/", "{root_dir}/etc/dnf/vars/") for custom variable + # substitution (e.g. CentOS Stream 9's $stream variable) + self.base.conf.substitutions.update_from_etc(root_dir) + + repos_dir = os.path.join(root_dir, "etc/yum.repos.d") + self.base.conf.reposdir = repos_dir + self.base.read_all_repos() + for repo_id, repo_config in self.base.repos.items(): + if repo_id not in req_repo_ids: + repo_config.sslcacert = modify_rootdir_path(repo_config.sslcacert, root_dir) + repo_config.sslclientcert = modify_rootdir_path(repo_config.sslclientcert, root_dir) + repo_config.sslclientkey = modify_rootdir_path(repo_config.sslclientkey, root_dir) + + self.base.fill_sack(load_system_repo=False) + except dnf.exceptions.Error as e: + raise RepoError from e + + # pylint: disable=too-many-branches + @staticmethod + def _dnfrepo(desc, parent_conf=None): + """Makes a dnf.repo.Repo out of a JSON repository description""" + + repo = dnf.repo.Repo(desc["id"], parent_conf) + + if "name" in desc: + repo.name = desc["name"] + + # at least one is required + if "baseurl" in desc: + repo.baseurl = desc["baseurl"] + elif "metalink" in desc: + repo.metalink = desc["metalink"] + elif "mirrorlist" in desc: + repo.mirrorlist = desc["mirrorlist"] + else: + raise ValueError("missing either `baseurl`, `metalink`, or `mirrorlist` in repo") + + repo.sslverify = desc.get("sslverify", True) + if "sslcacert" in desc: + repo.sslcacert = desc["sslcacert"] + if "sslclientkey" in desc: + repo.sslclientkey = desc["sslclientkey"] + if "sslclientcert" in desc: + repo.sslclientcert = desc["sslclientcert"] + + if "gpgcheck" in desc: + repo.gpgcheck = desc["gpgcheck"] + if "repo_gpgcheck" in desc: + repo.repo_gpgcheck = desc["repo_gpgcheck"] + if "gpgkey" in desc: + repo.gpgkey = [desc["gpgkey"]] + if "gpgkeys" in desc: + # gpgkeys can contain a full key, or it can be a URL + # dnf expects urls, so write the key to a temporary location and add the file:// + # path to repo.gpgkey + keydir = os.path.join(parent_conf.persistdir, "gpgkeys") + if not os.path.exists(keydir): + os.makedirs(keydir, mode=0o700, exist_ok=True) + + for key in desc["gpgkeys"]: + if key.startswith("-----BEGIN PGP PUBLIC KEY BLOCK-----"): + # Not using with because it needs to be a valid file for the duration. It + # is inside the temporary persistdir so will be cleaned up on exit. + # pylint: disable=consider-using-with + keyfile = tempfile.NamedTemporaryFile(dir=keydir, delete=False) + keyfile.write(key.encode("utf-8")) + repo.gpgkey.append(f"file://{keyfile.name}") + keyfile.close() + else: + repo.gpgkey.append(key) + + # In dnf, the default metadata expiration time is 48 hours. However, + # some repositories never expire the metadata, and others expire it much + # sooner than that. We therefore allow this to be configured. If nothing + # is provided we error on the side of checking if we should invalidate + # the cache. If cache invalidation is not necessary, the overhead of + # checking is in the hundreds of milliseconds. In order to avoid this + # overhead accumulating for API calls that consist of several dnf calls, + # we set the expiration to a short time period, rather than 0. + repo.metadata_expire = desc.get("metadata_expire", "20s") + + # This option if True disables modularization filtering. Effectively + # disabling modularity for given repository. + if "module_hotfixes" in desc: + repo.module_hotfixes = desc["module_hotfixes"] + + return repo + + @staticmethod + def _timestamp_to_rfc3339(timestamp): + return datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%dT%H:%M:%SZ') + + def dump(self): + packages = [] + for package in self.base.sack.query().available(): + packages.append({ + "name": package.name, + "summary": package.summary, + "description": package.description, + "url": package.url, + "repo_id": package.repoid, + "epoch": package.epoch, + "version": package.version, + "release": package.release, + "arch": package.arch, + "buildtime": self._timestamp_to_rfc3339(package.buildtime), + "license": package.license + }) + return packages + + def search(self, args): + """ Perform a search on the available packages + + args contains a "search" dict with parameters to use for searching. + "packages" list of package name globs to search for + "latest" is a boolean that will return only the latest NEVRA instead + of all matching builds in the metadata. + + eg. + + "search": { + "latest": false, + "packages": ["tmux", "vim*", "*ssh*"] + }, + """ + pkg_globs = args.get("packages", []) + + packages = [] + + # NOTE: Build query one piece at a time, don't pass all to filterm at the same + # time. + available = self.base.sack.query().available() + for name in pkg_globs: + # If the package name glob has * in it, use glob. + # If it has *name* use substr + # If it has neither use exact match + if "*" in name: + if name[0] != "*" or name[-1] != "*": + q = available.filter(name__glob=name) + else: + q = available.filter(name__substr=name.replace("*", "")) + else: + q = available.filter(name__eq=name) + + if args.get("latest", False): + q = q.latest() + + for package in q: + packages.append({ + "name": package.name, + "summary": package.summary, + "description": package.description, + "url": package.url, + "repo_id": package.repoid, + "epoch": package.epoch, + "version": package.version, + "release": package.release, + "arch": package.arch, + "buildtime": self._timestamp_to_rfc3339(package.buildtime), + "license": package.license + }) + return packages + + def depsolve(self, arguments): + # # Return an empty list when 'transactions' key is missing or when it is None + transactions = arguments.get("transactions") or [] + # collect repo IDs from the request so we know whether to translate gpg key paths + request_repo_ids = set(repo["id"] for repo in arguments.get("repos", [])) + root_dir = arguments.get("root_dir") + last_transaction: List = [] + + for transaction in transactions: + self.base.reset(goal=True) + self.base.sack.reset_excludes() + + self.base.conf.install_weak_deps = transaction.get("install_weak_deps", False) + + try: + # set the packages from the last transaction as installed + for installed_pkg in last_transaction: + self.base.package_install(installed_pkg, strict=True) + + # depsolve the current transaction + self.base.install_specs( + transaction.get("package-specs"), + transaction.get("exclude-specs"), + reponame=transaction.get("repo-ids"), + ) + except dnf.exceptions.Error as e: + raise MarkingError from e + + try: + self.base.resolve() + except dnf.exceptions.Error as e: + raise DepsolveError from e + + # store the current transaction result + last_transaction.clear() + for tsi in self.base.transaction: + # Avoid using the install_set() helper, as it does not guarantee + # a stable order + if tsi.action not in dnf.transaction.FORWARD_ACTIONS: + continue + last_transaction.append(tsi.pkg) + + packages = [] + pkg_repos = {} + for package in last_transaction: + packages.append({ + "name": package.name, + "epoch": package.epoch, + "version": package.version, + "release": package.release, + "arch": package.arch, + "repo_id": package.repoid, + "path": package.relativepath, + "remote_location": package.remote_location(), + "checksum": f"{hawkey.chksum_name(package.chksum[0])}:{package.chksum[1].hex()}", + }) + # collect repository objects by id to create the 'repositories' collection for the response + pkgrepo = package.repo + pkg_repos[pkgrepo.id] = pkgrepo + + repositories = {} # full repository configs for the response + for repo in pkg_repos.values(): + repositories[repo.id] = { + "id": repo.id, + "name": repo.name, + "baseurl": list(repo.baseurl) if repo.baseurl else None, + "metalink": repo.metalink, + "mirrorlist": repo.mirrorlist, + "gpgcheck": repo.gpgcheck, + "repo_gpgcheck": repo.repo_gpgcheck, + "gpgkeys": read_keys(repo.gpgkey, root_dir if repo.id not in request_repo_ids else None), + "sslverify": bool(repo.sslverify), + "sslcacert": repo.sslcacert, + "sslclientkey": repo.sslclientkey, + "sslclientcert": repo.sslclientcert, + } + response = { + "packages": packages, + "repos": repositories, + } + return response diff --git a/tools/osbuild-depsolve-dnf5 b/osbuild/solver/dnf5.py similarity index 72% rename from tools/osbuild-depsolve-dnf5 rename to osbuild/solver/dnf5.py index 7d240b64..75c20cf1 100755 --- a/tools/osbuild-depsolve-dnf5 +++ b/osbuild/solver/dnf5.py @@ -1,20 +1,6 @@ -#!/usr/bin/python3 -# pylint: disable=invalid-name - -""" -A JSON-based interface for depsolving using DNF5. - -Reads a request through stdin and prints the result to stdout. -In case of error, a structured error is printed to stdout as well. -""" -import json import os -import sys +import os.path import tempfile -import traceback -import urllib.error -import urllib.parse -import urllib.request from datetime import datetime from typing import List @@ -24,40 +10,7 @@ from libdnf5.common import QueryCmp_CONTAINS as CONTAINS from libdnf5.common import QueryCmp_EQ as EQ from libdnf5.common import QueryCmp_GLOB as GLOB - -# XXX - Temporarily lifted from dnf.rpm module # pylint: disable=fixme -def _invert(dct): - return {v: k for k in dct for v in dct[k]} - - -_BASEARCH_MAP = _invert({ - 'aarch64': ('aarch64',), - 'alpha': ('alpha', 'alphaev4', 'alphaev45', 'alphaev5', 'alphaev56', - 'alphaev6', 'alphaev67', 'alphaev68', 'alphaev7', 'alphapca56'), - 'arm': ('armv5tejl', 'armv5tel', 'armv5tl', 'armv6l', 'armv7l', 'armv8l'), - 'armhfp': ('armv6hl', 'armv7hl', 'armv7hnl', 'armv8hl'), - 'i386': ('i386', 'athlon', 'geode', 'i386', 'i486', 'i586', 'i686'), - 'ia64': ('ia64',), - 'mips': ('mips',), - 'mipsel': ('mipsel',), - 'mips64': ('mips64',), - 'mips64el': ('mips64el',), - 'loongarch64': ('loongarch64',), - 'noarch': ('noarch',), - 'ppc': ('ppc',), - 'ppc64': ('ppc64', 'ppc64iseries', 'ppc64p7', 'ppc64pseries'), - 'ppc64le': ('ppc64le',), - 'riscv32': ('riscv32',), - 'riscv64': ('riscv64',), - 'riscv128': ('riscv128',), - 's390': ('s390',), - 's390x': ('s390x',), - 'sh3': ('sh3',), - 'sh4': ('sh4', 'sh4a'), - 'sparc': ('sparc', 'sparc64', 'sparc64v', 'sparcv8', 'sparcv9', - 'sparcv9v'), - 'x86_64': ('x86_64', 'amd64', 'ia32e'), -}) +from osbuild.solver import DepsolveError, RepoError, SolverBase, modify_rootdir_path, read_keys def remote_location(package, schemes=("http", "ftp", "file", "https")): @@ -73,15 +26,19 @@ def remote_location(package, schemes=("http", "ftp", "file", "https")): return urls[0] -class TransactionError(Exception): - pass +def get_string_option(option): + # option.get_value() causes an error if it's unset for string values, so check if it's empty first + if option.empty(): + return None + return option.get_value() -class RepoError(Exception): - pass +# XXX - Temporarily lifted from dnf.rpm module # pylint: disable=fixme +def _invert(dct): + return {v: k for k in dct for v in dct[k]} -class Solver(): +class DNF5(SolverBase): """Solver implements package related actions These include depsolving a package set, searching for packages, and dumping a list @@ -115,7 +72,7 @@ class Solver(): # Base is the correct place to set substitutions, not per-repo. # See https://github.com/rpm-software-management/dnf5/issues/1248 self.base.get_vars().set("arch", arch) - self.base.get_vars().set("basearch", _BASEARCH_MAP[arch]) + self.base.get_vars().set("basearch", self._BASEARCH_MAP[arch]) if releasever: self.base.get_vars().set('releasever', releasever) if proxy: @@ -197,10 +154,38 @@ class Solver(): repo_iter.next() self.base.get_repo_sack().update_and_load_enabled_repos(load_system=False) - except RuntimeError as e: raise RepoError(e) from e + _BASEARCH_MAP = _invert({ + 'aarch64': ('aarch64',), + 'alpha': ('alpha', 'alphaev4', 'alphaev45', 'alphaev5', 'alphaev56', + 'alphaev6', 'alphaev67', 'alphaev68', 'alphaev7', 'alphapca56'), + 'arm': ('armv5tejl', 'armv5tel', 'armv5tl', 'armv6l', 'armv7l', 'armv8l'), + 'armhfp': ('armv6hl', 'armv7hl', 'armv7hnl', 'armv8hl'), + 'i386': ('i386', 'athlon', 'geode', 'i386', 'i486', 'i586', 'i686'), + 'ia64': ('ia64',), + 'mips': ('mips',), + 'mipsel': ('mipsel',), + 'mips64': ('mips64',), + 'mips64el': ('mips64el',), + 'loongarch64': ('loongarch64',), + 'noarch': ('noarch',), + 'ppc': ('ppc',), + 'ppc64': ('ppc64', 'ppc64iseries', 'ppc64p7', 'ppc64pseries'), + 'ppc64le': ('ppc64le',), + 'riscv32': ('riscv32',), + 'riscv64': ('riscv64',), + 'riscv128': ('riscv128',), + 's390': ('s390',), + 's390x': ('s390x',), + 'sh3': ('sh3',), + 'sh4': ('sh4', 'sh4a'), + 'sparc': ('sparc', 'sparc64', 'sparc64v', 'sparcv8', 'sparcv9', + 'sparcv9v'), + 'x86_64': ('x86_64', 'amd64', 'ia32e'), + }) + # pylint: disable=too-many-branches def _dnfrepo(self, desc, exclude_pkgs=None): """Makes a dnf.repo.Repo out of a JSON repository description""" @@ -391,7 +376,7 @@ class Solver(): goal.add_install(pkg, settings) transaction = goal.resolve() if transaction.get_problems() != NO_PROBLEM: - raise TransactionError("\n".join(transaction.get_resolve_logs_as_strings())) + raise DepsolveError("\n".join(transaction.get_resolve_logs_as_strings())) # store the current transaction result last_transaction.clear() @@ -403,7 +388,7 @@ class Solver(): # Something went wrong, but no error was generated by goal.resolve() if len(transactions) > 0 and len(last_transaction) == 0: - raise TransactionError("Empty transaction results") + raise DepsolveError("Empty transaction results") packages = [] pkg_repos = {} @@ -448,169 +433,3 @@ class Solver(): "repos": repositories, } return response - - -def get_string_option(option): - # option.get_value() causes an error if it's unset for string values, so check if it's empty first - if option.empty(): - return None - return option.get_value() - - -class GPGKeyReadError(Exception): - pass - - -def modify_rootdir_path(path, root_dir): - if path and root_dir: - # if the root_dir is set, we need to translate the key path to be under this directory - return os.path.join(root_dir, path.lstrip("/")) - return path - - -def read_keys(paths, root_dir=None): - keys = [] - for path in paths: - url = urllib.parse.urlparse(path) - if url.scheme == "file": - path = url.path - path = modify_rootdir_path(path, root_dir) - try: - with open(path, mode="r", encoding="utf-8") as keyfile: - keys.append(keyfile.read()) - except Exception as e: - raise GPGKeyReadError(f"error loading gpg key from {path}: {e}") from e - elif url.scheme in ["http", "https"]: - try: - resp = urllib.request.urlopen(urllib.request.Request(path)) - keys.append(resp.read().decode()) - except urllib.error.URLError as e: - raise GPGKeyReadError(f"error reading remote gpg key at {path}: {e}") from e - else: - raise GPGKeyReadError(f"unknown url scheme for gpg key: {url.scheme} ({path})") - return keys - - -def setup_cachedir(request): - arch = request["arch"] - # If dnf-json is run as a service, we don't want users to be able to set the cache - cache_dir = os.environ.get("OVERWRITE_CACHE_DIR", "") - if cache_dir: - cache_dir = os.path.join(cache_dir, arch) - else: - cache_dir = request.get("cachedir", "") - - if not cache_dir: - return "", {"kind": "Error", "reason": "No cache dir set"} - - return cache_dir, None - - -def solve(request, cache_dir): - command = request["command"] - arguments = request["arguments"] - with tempfile.TemporaryDirectory() as persistdir: - try: - solver = Solver(request, persistdir, cache_dir) - if command == "dump": - result = solver.dump() - elif command == "depsolve": - result = solver.depsolve(arguments) - elif command == "search": - result = solver.search(arguments.get("search", {})) - except TransactionError as e: - printe("error depsolve") - return None, { - "kind": "DepsolveError", - "reason": f"There was a problem with depsolving: {e}" - } - except RepoError as e: - printe("error repository setup") - return None, { - "kind": "RepoError", - "reason": f"There was a problem reading a repository: {e}" - } - except Exception as e: # pylint: disable=broad-exception-caught - printe("error traceback") - return None, { - "kind": type(e).__name__, - "reason": str(e), - "traceback": traceback.format_exc() - } - return result, None - - -def printe(*msg): - print(*msg, file=sys.stderr) - - -def fail(err): - printe(f"{err['kind']}: {err['reason']}") - print(json.dumps(err)) - sys.exit(1) - - -def respond(result): - print(json.dumps(result)) - - -# pylint: disable=too-many-return-statements -def validate_request(request): - command = request.get("command") - valid_cmds = ("depsolve", "dump", "search") - if command not in valid_cmds: - return { - "kind": "InvalidRequest", - "reason": f"invalid command '{command}': must be one of {', '.join(valid_cmds)}" - } - - if not request.get("arch"): - return { - "kind": "InvalidRequest", - "reason": "no 'arch' specified" - } - - if not request.get("module_platform_id"): - return { - "kind": "InvalidRequest", - "reason": "no 'module_platform_id' specified" - } - if not request.get("releasever"): - return { - "kind": "InvalidRequest", - "reason": "no 'releasever' specified" - } - arguments = request.get("arguments") - if not arguments: - return { - "kind": "InvalidRequest", - "reason": "empty 'arguments'" - } - - if not arguments.get("repos") and not arguments.get("root_dir"): - return { - "kind": "InvalidRequest", - "reason": "no 'repos' or 'root_dir' specified" - } - - return None - - -def main(): - request = json.load(sys.stdin) - err = validate_request(request) - if err: - fail(err) - - cachedir, err = setup_cachedir(request) - if err: - fail(err) - result, err = solve(request, cachedir) - if err: - fail(err) - else: - respond(result) - - -if __name__ == "__main__": - main() diff --git a/setup.py b/setup.py index d68ebc69..84cf7917 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ setuptools.setup( name="osbuild", version="125", description="A build system for OS images", - packages=["osbuild", "osbuild.formats", "osbuild.util"], + packages=["osbuild", "osbuild.formats", "osbuild.util","osbuild.solver"], license='Apache-2.0', install_requires=[ "jsonschema", diff --git a/tools/osbuild-depsolve-dnf b/tools/osbuild-depsolve-dnf index 92293c00..1beb6ffc 100755 --- a/tools/osbuild-depsolve-dnf +++ b/tools/osbuild-depsolve-dnf @@ -9,344 +9,32 @@ In case of error, a structured error is printed to stdout as well. """ import json import os +import os.path import sys import tempfile -import urllib.error -import urllib.parse -import urllib.request -from datetime import datetime -from typing import List -import dnf -import hawkey +from osbuild.solver import GPGKeyReadError, MarkingError, DepsolveError, RepoError - -class Solver(): - - def __init__(self, request, persistdir, cache_dir): - arch = request["arch"] - releasever = request.get("releasever") - module_platform_id = request["module_platform_id"] - proxy = request.get("proxy") - - arguments = request["arguments"] - repos = arguments.get("repos", []) - root_dir = arguments.get("root_dir") - - self.base = dnf.Base() - - # Enable fastestmirror to ensure we choose the fastest mirrors for - # downloading metadata (when depsolving) and downloading packages. - self.base.conf.fastestmirror = True - - # We use the same cachedir for multiple architectures. Unfortunately, - # this is something that doesn't work well in certain situations - # with zchunk: - # Imagine that we already have cache for arch1. Then, we use dnf-json - # to depsolve for arch2. If ZChunk is enabled and available (that's - # the case for Fedora), dnf will try to download only differences - # between arch1 and arch2 metadata. But, as these are completely - # different, dnf must basically redownload everything. - # For downloding deltas, zchunk uses HTTP range requests. Unfortunately, - # if the mirror doesn't support multi range requests, then zchunk will - # download one small segment per a request. Because we need to update - # the whole metadata (10s of MB), this can be extremely slow in some cases. - # I think that we can come up with a better fix but let's just disable - # zchunk for now. As we are already downloading a lot of data when - # building images, I don't care if we download even more. - self.base.conf.zchunk = False - - # Set the rest of the dnf configuration. - self.base.conf.module_platform_id = module_platform_id - self.base.conf.config_file_path = "/dev/null" - self.base.conf.persistdir = persistdir - self.base.conf.cachedir = cache_dir - self.base.conf.substitutions['arch'] = arch - self.base.conf.substitutions['basearch'] = dnf.rpm.basearch(arch) - self.base.conf.substitutions['releasever'] = releasever - - if hasattr(self.base.conf, "optional_metadata_types"): - # the attribute doesn't exist on older versions of dnf; ignore the option when not available - self.base.conf.optional_metadata_types.extend(arguments.get("optional-metadata", [])) - - if proxy: - self.base.conf.proxy = proxy - - req_repo_ids = set() - for repo in repos: - self.base.repos.add(self._dnfrepo(repo, self.base.conf)) - # collect repo IDs from the request to separate them from the ones loaded from a root_dir - req_repo_ids.add(repo["id"]) - - if root_dir: - # This sets the varsdir to ("{root_dir}/etc/yum/vars/", "{root_dir}/etc/dnf/vars/") for custom variable - # substitution (e.g. CentOS Stream 9's $stream variable) - self.base.conf.substitutions.update_from_etc(root_dir) - - repos_dir = os.path.join(root_dir, "etc/yum.repos.d") - self.base.conf.reposdir = repos_dir - self.base.read_all_repos() - for repo_id, repo_config in self.base.repos.items(): - if repo_id not in req_repo_ids: - repo_config.sslcacert = modify_rootdir_path(repo_config.sslcacert, root_dir) - repo_config.sslclientcert = modify_rootdir_path(repo_config.sslclientcert, root_dir) - repo_config.sslclientkey = modify_rootdir_path(repo_config.sslclientkey, root_dir) - - self.base.fill_sack(load_system_repo=False) - - # pylint: disable=too-many-branches - @staticmethod - def _dnfrepo(desc, parent_conf=None): - """Makes a dnf.repo.Repo out of a JSON repository description""" - - repo = dnf.repo.Repo(desc["id"], parent_conf) - - if "name" in desc: - repo.name = desc["name"] - - # at least one is required - if "baseurl" in desc: - repo.baseurl = desc["baseurl"] - elif "metalink" in desc: - repo.metalink = desc["metalink"] - elif "mirrorlist" in desc: - repo.mirrorlist = desc["mirrorlist"] - else: - raise ValueError("missing either `baseurl`, `metalink`, or `mirrorlist` in repo") - - repo.sslverify = desc.get("sslverify", True) - if "sslcacert" in desc: - repo.sslcacert = desc["sslcacert"] - if "sslclientkey" in desc: - repo.sslclientkey = desc["sslclientkey"] - if "sslclientcert" in desc: - repo.sslclientcert = desc["sslclientcert"] - - if "gpgcheck" in desc: - repo.gpgcheck = desc["gpgcheck"] - if "repo_gpgcheck" in desc: - repo.repo_gpgcheck = desc["repo_gpgcheck"] - if "gpgkey" in desc: - repo.gpgkey = [desc["gpgkey"]] - if "gpgkeys" in desc: - # gpgkeys can contain a full key, or it can be a URL - # dnf expects urls, so write the key to a temporary location and add the file:// - # path to repo.gpgkey - keydir = os.path.join(parent_conf.persistdir, "gpgkeys") - if not os.path.exists(keydir): - os.makedirs(keydir, mode=0o700, exist_ok=True) - - for key in desc["gpgkeys"]: - if key.startswith("-----BEGIN PGP PUBLIC KEY BLOCK-----"): - # Not using with because it needs to be a valid file for the duration. It - # is inside the temporary persistdir so will be cleaned up on exit. - # pylint: disable=consider-using-with - keyfile = tempfile.NamedTemporaryFile(dir=keydir, delete=False) - keyfile.write(key.encode("utf-8")) - repo.gpgkey.append(f"file://{keyfile.name}") - keyfile.close() - else: - repo.gpgkey.append(key) - - # In dnf, the default metadata expiration time is 48 hours. However, - # some repositories never expire the metadata, and others expire it much - # sooner than that. We therefore allow this to be configured. If nothing - # is provided we error on the side of checking if we should invalidate - # the cache. If cache invalidation is not necessary, the overhead of - # checking is in the hundreds of milliseconds. In order to avoid this - # overhead accumulating for API calls that consist of several dnf calls, - # we set the expiration to a short time period, rather than 0. - repo.metadata_expire = desc.get("metadata_expire", "20s") - - # This option if True disables modularization filtering. Effectively - # disabling modularity for given repository. - if "module_hotfixes" in desc: - repo.module_hotfixes = desc["module_hotfixes"] - - return repo - - @staticmethod - def _timestamp_to_rfc3339(timestamp): - return datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%dT%H:%M:%SZ') - - def dump(self): - packages = [] - for package in self.base.sack.query().available(): - packages.append({ - "name": package.name, - "summary": package.summary, - "description": package.description, - "url": package.url, - "repo_id": package.repoid, - "epoch": package.epoch, - "version": package.version, - "release": package.release, - "arch": package.arch, - "buildtime": self._timestamp_to_rfc3339(package.buildtime), - "license": package.license - }) - return packages - - def search(self, args): - """ Perform a search on the available packages - - args contains a "search" dict with parameters to use for searching. - "packages" list of package name globs to search for - "latest" is a boolean that will return only the latest NEVRA instead - of all matching builds in the metadata. - - eg. - - "search": { - "latest": false, - "packages": ["tmux", "vim*", "*ssh*"] - }, - """ - pkg_globs = args.get("packages", []) - - packages = [] - - # NOTE: Build query one piece at a time, don't pass all to filterm at the same - # time. - available = self.base.sack.query().available() - for name in pkg_globs: - # If the package name glob has * in it, use glob. - # If it has *name* use substr - # If it has neither use exact match - if "*" in name: - if name[0] != "*" or name[-1] != "*": - q = available.filter(name__glob=name) - else: - q = available.filter(name__substr=name.replace("*", "")) - else: - q = available.filter(name__eq=name) - - if args.get("latest", False): - q = q.latest() - - for package in q: - packages.append({ - "name": package.name, - "summary": package.summary, - "description": package.description, - "url": package.url, - "repo_id": package.repoid, - "epoch": package.epoch, - "version": package.version, - "release": package.release, - "arch": package.arch, - "buildtime": self._timestamp_to_rfc3339(package.buildtime), - "license": package.license - }) - return packages - - def depsolve(self, arguments): - transactions = arguments.get("transactions", []) - # collect repo IDs from the request so we know whether to translate gpg key paths - request_repo_ids = set(repo["id"] for repo in arguments.get("repos", [])) - root_dir = arguments.get("root_dir") - last_transaction: List = [] - - for transaction in transactions: - self.base.reset(goal=True) - self.base.sack.reset_excludes() - - self.base.conf.install_weak_deps = transaction.get("install_weak_deps", False) - - # set the packages from the last transaction as installed - for installed_pkg in last_transaction: - self.base.package_install(installed_pkg, strict=True) - - # depsolve the current transaction - self.base.install_specs( - transaction.get("package-specs"), - transaction.get("exclude-specs"), - reponame=transaction.get("repo-ids"), - ) - self.base.resolve() - - # store the current transaction result - last_transaction.clear() - for tsi in self.base.transaction: - # Avoid using the install_set() helper, as it does not guarantee - # a stable order - if tsi.action not in dnf.transaction.FORWARD_ACTIONS: - continue - last_transaction.append(tsi.pkg) - - packages = [] - pkg_repos = {} - for package in last_transaction: - packages.append({ - "name": package.name, - "epoch": package.epoch, - "version": package.version, - "release": package.release, - "arch": package.arch, - "repo_id": package.repoid, - "path": package.relativepath, - "remote_location": package.remote_location(), - "checksum": f"{hawkey.chksum_name(package.chksum[0])}:{package.chksum[1].hex()}", - }) - # collect repository objects by id to create the 'repositories' collection for the response - pkgrepo = package.repo - pkg_repos[pkgrepo.id] = pkgrepo - - repositories = {} # full repository configs for the response - for repo in pkg_repos.values(): - repositories[repo.id] = { - "id": repo.id, - "name": repo.name, - "baseurl": list(repo.baseurl) if repo.baseurl else None, - "metalink": repo.metalink, - "mirrorlist": repo.mirrorlist, - "gpgcheck": repo.gpgcheck, - "repo_gpgcheck": repo.repo_gpgcheck, - "gpgkeys": read_keys(repo.gpgkey, root_dir if repo.id not in request_repo_ids else None), - "sslverify": bool(repo.sslverify), - "sslcacert": repo.sslcacert, - "sslclientkey": repo.sslclientkey, - "sslclientcert": repo.sslclientcert, - } - response = { - "packages": packages, - "repos": repositories, - } - return response - - -class GPGKeyReadError(Exception): +# Load the solver configuration +config = {"use_dnf5": False} +config_path = os.environ.get("OSBUILD_SOLVER_CONFIG") or "/usr/lib/osbuild/solver.json" +try: + with open(config_path, encoding="utf-8") as f: + config = json.load(f) +except FileNotFoundError: pass - -def modify_rootdir_path(path, root_dir): - if path and root_dir: - # if the root_dir is set, we need to translate the key path to be under this directory - return os.path.join(root_dir, path.lstrip("/")) - return path +if config.get("use_dnf5", False): + from osbuild.solver.dnf5 import DNF5 as Solver +else: + from osbuild.solver.dnf import DNF as Solver -def read_keys(paths, root_dir=None): - keys = [] - for path in paths: - url = urllib.parse.urlparse(path) - if url.scheme == "file": - path = url.path - path = modify_rootdir_path(path, root_dir) - try: - with open(path, mode="r", encoding="utf-8") as keyfile: - keys.append(keyfile.read()) - except Exception as e: - raise GPGKeyReadError(f"error loading gpg key from {path}: {e}") from e - elif url.scheme in ["http", "https"]: - try: - resp = urllib.request.urlopen(urllib.request.Request(path)) - keys.append(resp.read().decode()) - except urllib.error.URLError as e: - raise GPGKeyReadError(f"error reading remote gpg key at {path}: {e}") from e - else: - raise GPGKeyReadError(f"unknown url scheme for gpg key: {url.scheme} ({path})") - return keys +def get_string_option(option): + # option.get_value() causes an error if it's unset for string values, so check if it's empty first + if option.empty(): + return None + return option.get_value() def setup_cachedir(request): @@ -369,22 +57,32 @@ def solve(request, cache_dir): arguments = request["arguments"] with tempfile.TemporaryDirectory() as persistdir: + solver = Solver(request, persistdir, cache_dir) try: - solver = Solver(request, persistdir, cache_dir) if command == "dump": result = solver.dump() elif command == "depsolve": result = solver.depsolve(arguments) elif command == "search": result = solver.search(arguments.get("search", {})) - - except dnf.exceptions.MarkingErrors as e: + except GPGKeyReadError as e: + printe("error reading gpgkey") + return None, { + "kind": type(e).__name__, + "reason": str(e) + } + except RepoError as e: + return None, { + "kind": "RepoError", + "reason": f"There was a problem reading a repository: {e}" + } + except MarkingError as e: printe("error install_specs") return None, { "kind": "MarkingErrors", "reason": f"Error occurred when marking packages for installation: {e}" } - except dnf.exceptions.DepsolveError as e: + except DepsolveError as e: printe("error depsolve") # collect list of packages for error pkgs = [] @@ -394,23 +92,15 @@ def solve(request, cache_dir): "kind": "DepsolveError", "reason": f"There was a problem depsolving {', '.join(pkgs)}: {e}" } - except dnf.exceptions.RepoError as e: - return None, { - "kind": "RepoError", - "reason": f"There was a problem reading a repository: {e}" - } - except dnf.exceptions.Error as e: - printe("error repository setup") + except Exception as e: # pylint: disable=broad-exception-caught + printe("error traceback") + import traceback return None, { "kind": type(e).__name__, - "reason": str(e) - } - except GPGKeyReadError as e: - printe("error reading gpgkey") - return None, { - "kind": type(e).__name__, - "reason": str(e) + "reason": str(e), + "traceback": traceback.format_exc() } + return result, None diff --git a/tools/solver-dnf.json b/tools/solver-dnf.json new file mode 100644 index 00000000..43fe3b97 --- /dev/null +++ b/tools/solver-dnf.json @@ -0,0 +1,3 @@ +{ + "use_dnf5": false +} diff --git a/tools/solver-dnf5.json b/tools/solver-dnf5.json new file mode 100644 index 00000000..642ed8d5 --- /dev/null +++ b/tools/solver-dnf5.json @@ -0,0 +1,3 @@ +{ + "use_dnf5": true +}