#!/usr/bin/python3 # pylint: disable=invalid-name """ A JSON-based interface for depsolving using DNF. Reads a request through stdin and prints the result to stdout. In case of error, a structured error is printed to stdout as well. """ import json import os import sys import tempfile from datetime import datetime import dnf import hawkey class Solver(): # pylint: disable=too-many-arguments def __init__(self, repos, module_platform_id, persistdir, cachedir, arch): self.base = dnf.Base() # Enable fastestmirror to ensure we choose the fastest mirrors for # downloading metadata (when depsolving) and downloading packages. self.base.conf.fastestmirror = True # We use the same cachedir for multiple architectures. Unfortunately, # this is something that doesn't work well in certain situations # with zchunk: # Imagine that we already have cache for arch1. Then, we use dnf-json # to depsolve for arch2. If ZChunk is enabled and available (that's # the case for Fedora), dnf will try to download only differences # between arch1 and arch2 metadata. But, as these are completely # different, dnf must basically redownload everything. # For downloding deltas, zchunk uses HTTP range requests. Unfortunately, # if the mirror doesn't support multi range requests, then zchunk will # download one small segment per a request. Because we need to update # the whole metadata (10s of MB), this can be extremely slow in some cases. # I think that we can come up with a better fix but let's just disable # zchunk for now. As we are already downloading a lot of data when # building images, I don't care if we download even more. self.base.conf.zchunk = False # Set the rest of the dnf configuration. self.base.conf.module_platform_id = module_platform_id self.base.conf.config_file_path = "/dev/null" self.base.conf.persistdir = persistdir self.base.conf.cachedir = cachedir self.base.conf.substitutions['arch'] = arch self.base.conf.substitutions['basearch'] = dnf.rpm.basearch(arch) for repo in repos: self.base.repos.add(self._dnfrepo(repo, self.base.conf)) self.base.fill_sack(load_system_repo=False) @staticmethod def _dnfrepo(desc, parent_conf=None): """Makes a dnf.repo.Repo out of a JSON repository description""" repo = dnf.repo.Repo(desc["id"], parent_conf) if "name" in desc: repo.name = desc["name"] if "baseurl" in desc: repo.baseurl = desc["baseurl"] elif "metalink" in desc: repo.metalink = desc["metalink"] elif "mirrorlist" in desc: repo.mirrorlist = desc["mirrorlist"] else: assert False if desc.get("ignoressl", False): repo.sslverify = False if "sslcacert" in desc: repo.sslcacert = desc["sslcacert"] if "sslclientkey" in desc: repo.sslclientkey = desc["sslclientkey"] if "sslclientcert" in desc: repo.sslclientcert = desc["sslclientcert"] # In dnf, the default metadata expiration time is 48 hours. However, # some repositories never expire the metadata, and others expire it much # sooner than that. We therefore allow this to be configured. If nothing # is provided we error on the side of checking if we should invalidate # the cache. If cache invalidation is not necessary, the overhead of # checking is in the hundreds of milliseconds. In order to avoid this # overhead accumulating for API calls that consist of several dnf calls, # we set the expiration to a short time period, rather than 0. repo.metadata_expire = desc.get("metadata_expire", "20s") return repo @staticmethod def _timestamp_to_rfc3339(timestamp): return datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%dT%H:%M:%SZ') def dump(self): packages = [] for package in self.base.sack.query().available(): packages.append({ "name": package.name, "summary": package.summary, "description": package.description, "url": package.url, "repo_id": package.repoid, "epoch": package.epoch, "version": package.version, "release": package.release, "arch": package.arch, "buildtime": self._timestamp_to_rfc3339(package.buildtime), "license": package.license }) return packages def depsolve(self, transactions): last_transaction = [] for idx, transaction in enumerate(transactions): self.base.reset(goal=True) self.base.sack.reset_excludes() # don't install weak-deps for transactions after the 1st transaction if idx > 0: self.base.conf.install_weak_deps = False # set the packages from the last transaction as installed for installed_pkg in last_transaction: self.base.package_install(installed_pkg, strict=True) # depsolve the current transaction self.base.install_specs( transaction.get("package-specs"), transaction.get("exclude-specs"), reponame=transaction.get("repo-ids"), ) self.base.resolve() # store the current transaction result last_transaction.clear() for tsi in self.base.transaction: # Avoid using the install_set() helper, as it does not guarantee # a stable order if tsi.action not in dnf.transaction.FORWARD_ACTIONS: continue last_transaction.append(tsi.pkg) dependencies = [] for package in last_transaction: dependencies.append({ "name": package.name, "epoch": package.epoch, "version": package.version, "release": package.release, "arch": package.arch, "repo_id": package.repoid, "path": package.relativepath, "remote_location": package.remote_location(), "checksum": ( f"{hawkey.chksum_name(package.chksum[0])}:" f"{package.chksum[1].hex()}" ) }) return dependencies def setup_cachedir(request): arch = request["arch"] # If dnf-json is run as a service, we don't want users to be able to set the cache cache_dir = os.environ.get("OVERWRITE_CACHE_DIR", "") if cache_dir: cache_dir = os.path.join(cache_dir, arch) else: cache_dir = request.get("cachedir", "") if not cache_dir: return "", {"kind": "Error", "reason": "No cache dir set"} return cache_dir, None def solve(request, cache_dir): command = request["command"] arch = request["arch"] module_platform_id = request["module_platform_id"] arguments = request["arguments"] transactions = arguments.get("transactions") with tempfile.TemporaryDirectory() as persistdir: try: solver = Solver( arguments["repos"], module_platform_id, persistdir, cache_dir, arch ) if command == "dump": result = solver.dump() elif command == "depsolve": result = solver.depsolve(transactions) except dnf.exceptions.MarkingErrors as e: printe("error install_specs") return None, { "kind": "MarkingErrors", "reason": f"Error occurred when marking packages for installation: {e}" } except dnf.exceptions.DepsolveError as e: printe("error depsolve") # collect list of packages for error pkgs = [] for t in transactions: pkgs.extend(t["package-specs"]) return None, { "kind": "DepsolveError", "reason": f"There was a problem depsolving {', '.join(pkgs)}: {e}" } except dnf.exceptions.RepoError as e: return None, { "kind": "RepoError", "reason": f"There was a problem reading a repository: {e}" } except dnf.exceptions.Error as e: printe("error repository setup") return None, { "kind": type(e).__name__, "reason": str(e) } return result, None def printe(*msg): print(*msg, file=sys.stderr) def fail(err): printe(f"{err['kind']}: {err['reason']}") print(json.dumps(err)) sys.exit(1) def respond(result): print(json.dumps(result)) def validate_request(request): command = request.get("command") valid_cmds = ("depsolve", "dump") if command not in valid_cmds: return { "kind": "InvalidRequest", "reason": f"invalid command '{command}': must be one of {', '.join(valid_cmds)}" } if not request.get("arch"): return { "kind": "InvalidRequest", "reason": "no 'arch' specified" } if not request.get("module_platform_id"): return { "kind": "InvalidRequest", "reason": "no 'module_platform_id' specified" } arguments = request.get("arguments") if not arguments: return { "kind": "InvalidRequest", "reason": "empty 'arguments'" } if not arguments.get("repos"): return { "kind": "InvalidRequest", "reason": "no 'repos' specified" } return None def main(): request = json.load(sys.stdin) err = validate_request(request) if err: fail(err) cachedir, err = setup_cachedir(request) if err: fail(err) result, err = solve(request, cachedir) if err: fail(err) else: respond(result) if __name__ == "__main__": main()