Defined a Hash() method on rpmmd.RepoConfig that calculates a SHA-256 ID
for a repository based on its configuration. Identical configurations
should produce the same ID. The Name and ImageTypeTags of a repository
aren't taken into account. These attributes affect a repository's
functional configuration.
This ID lets us change the way we handle repository configurations in a
few places:
- Preparing the depsolve job arguments is simpler since we have
predictable IDs for the repository configurations. We don't need to
rely on the index of a RepoConfig in a list to identify or access it,
which prevented us from building a list of all repository
configurations, since we needed them to be placed in the list in a
certain order.
- Associating packages from the depsolve result with the repository
configuration (in depsToRPMMD) no longer relies on an ID string
converted from and back to an integer index. Repositories define
their own IDs.
- Tests are a bit messier now but the changes simplify the main code, so
it's an acceptable trade-off.
- Fixtures need to change based on the repository configuration for
the test.
- We need to calculate the ID for the repository configuration for
the temporary file server URL.
333 lines
11 KiB
Python
Executable file
333 lines
11 KiB
Python
Executable file
#!/usr/bin/python3
|
|
# pylint: disable=invalid-name
|
|
|
|
"""
|
|
A JSON-based interface for depsolving using DNF.
|
|
|
|
Reads a request through stdin and prints the result to stdout.
|
|
In case of error, a structured error is printed to stdout as well.
|
|
"""
|
|
|
|
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
from datetime import datetime
|
|
|
|
import dnf
|
|
import hawkey
|
|
|
|
|
|
class Solver():
|
|
|
|
# pylint: disable=too-many-arguments
|
|
def __init__(self, repos, module_platform_id, persistdir, cachedir, arch):
|
|
self.base = dnf.Base()
|
|
|
|
# Enable fastestmirror to ensure we choose the fastest mirrors for
|
|
# downloading metadata (when depsolving) and downloading packages.
|
|
self.base.conf.fastestmirror = True
|
|
|
|
# We use the same cachedir for multiple architectures. Unfortunately,
|
|
# this is something that doesn't work well in certain situations
|
|
# with zchunk:
|
|
# Imagine that we already have cache for arch1. Then, we use dnf-json
|
|
# to depsolve for arch2. If ZChunk is enabled and available (that's
|
|
# the case for Fedora), dnf will try to download only differences
|
|
# between arch1 and arch2 metadata. But, as these are completely
|
|
# different, dnf must basically redownload everything.
|
|
# For downloding deltas, zchunk uses HTTP range requests. Unfortunately,
|
|
# if the mirror doesn't support multi range requests, then zchunk will
|
|
# download one small segment per a request. Because we need to update
|
|
# the whole metadata (10s of MB), this can be extremely slow in some cases.
|
|
# I think that we can come up with a better fix but let's just disable
|
|
# zchunk for now. As we are already downloading a lot of data when
|
|
# building images, I don't care if we download even more.
|
|
self.base.conf.zchunk = False
|
|
|
|
# Set the rest of the dnf configuration.
|
|
self.base.conf.module_platform_id = module_platform_id
|
|
self.base.conf.config_file_path = "/dev/null"
|
|
self.base.conf.persistdir = persistdir
|
|
self.base.conf.cachedir = cachedir
|
|
self.base.conf.substitutions['arch'] = arch
|
|
self.base.conf.substitutions['basearch'] = dnf.rpm.basearch(arch)
|
|
|
|
for repo in repos:
|
|
self.base.repos.add(self._dnfrepo(repo, self.base.conf))
|
|
self.base.fill_sack(load_system_repo=False)
|
|
|
|
@staticmethod
|
|
def _dnfrepo(desc, parent_conf=None):
|
|
"""Makes a dnf.repo.Repo out of a JSON repository description"""
|
|
|
|
repo = dnf.repo.Repo(desc["id"], parent_conf)
|
|
|
|
if "name" in desc:
|
|
repo.name = desc["name"]
|
|
if "baseurl" in desc:
|
|
repo.baseurl = desc["baseurl"]
|
|
elif "metalink" in desc:
|
|
repo.metalink = desc["metalink"]
|
|
elif "mirrorlist" in desc:
|
|
repo.mirrorlist = desc["mirrorlist"]
|
|
else:
|
|
assert False
|
|
|
|
if desc.get("ignoressl", False):
|
|
repo.sslverify = False
|
|
if "sslcacert" in desc:
|
|
repo.sslcacert = desc["sslcacert"]
|
|
if "sslclientkey" in desc:
|
|
repo.sslclientkey = desc["sslclientkey"]
|
|
if "sslclientcert" in desc:
|
|
repo.sslclientcert = desc["sslclientcert"]
|
|
|
|
# In dnf, the default metadata expiration time is 48 hours. However,
|
|
# some repositories never expire the metadata, and others expire it much
|
|
# sooner than that. We therefore allow this to be configured. If nothing
|
|
# is provided we error on the side of checking if we should invalidate
|
|
# the cache. If cache invalidation is not necessary, the overhead of
|
|
# checking is in the hundreds of milliseconds. In order to avoid this
|
|
# overhead accumulating for API calls that consist of several dnf calls,
|
|
# we set the expiration to a short time period, rather than 0.
|
|
repo.metadata_expire = desc.get("metadata_expire", "20s")
|
|
|
|
return repo
|
|
|
|
def _repo_checksums(self):
|
|
checksums = {}
|
|
for repo in self.base.repos.iter_enabled():
|
|
# Uses the same algorithm as libdnf to find cache dir:
|
|
# https://github.com/rpm-software-management/libdnf/blob/master/libdnf/repo/Repo.cpp#L1288
|
|
if repo.metalink:
|
|
url = repo.metalink
|
|
elif repo.mirrorlist:
|
|
url = repo.mirrorlist
|
|
elif repo.baseurl:
|
|
url = repo.baseurl[0]
|
|
else:
|
|
assert False
|
|
|
|
digest = hashlib.sha256(url.encode()).hexdigest()[:16]
|
|
|
|
repomd_file = f"{repo.id}-{digest}/repodata/repomd.xml"
|
|
with open(f"{self.base.conf.cachedir}/{repomd_file}", "rb") as f:
|
|
repomd = f.read()
|
|
|
|
checksums[repo.id] = "sha256:" + hashlib.sha256(repomd).hexdigest()
|
|
|
|
return checksums
|
|
|
|
@staticmethod
|
|
def _timestamp_to_rfc3339(timestamp):
|
|
return datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%dT%H:%M:%SZ')
|
|
|
|
def dump(self):
|
|
packages = []
|
|
for package in self.base.sack.query().available():
|
|
packages.append({
|
|
"name": package.name,
|
|
"summary": package.summary,
|
|
"description": package.description,
|
|
"url": package.url,
|
|
"repo_id": package.repoid,
|
|
"epoch": package.epoch,
|
|
"version": package.version,
|
|
"release": package.release,
|
|
"arch": package.arch,
|
|
"buildtime": self._timestamp_to_rfc3339(package.buildtime),
|
|
"license": package.license
|
|
})
|
|
return {
|
|
"checksums": self._repo_checksums(),
|
|
"packages": packages
|
|
}
|
|
|
|
def depsolve(self, transactions):
|
|
last_transaction = []
|
|
|
|
for idx, transaction in enumerate(transactions):
|
|
self.base.reset(goal=True)
|
|
self.base.sack.reset_excludes()
|
|
|
|
# don't install weak-deps for transactions after the 1st transaction
|
|
if idx > 0:
|
|
self.base.conf.install_weak_deps = False
|
|
|
|
# set the packages from the last transaction as installed
|
|
for installed_pkg in last_transaction:
|
|
self.base.package_install(installed_pkg, strict=True)
|
|
|
|
# depsolve the current transaction
|
|
self.base.install_specs(
|
|
transaction.get("package-specs"),
|
|
transaction.get("exclude-specs"),
|
|
reponame=transaction.get("repo-ids"),
|
|
)
|
|
self.base.resolve()
|
|
|
|
# store the current transaction result
|
|
last_transaction.clear()
|
|
for tsi in self.base.transaction:
|
|
# Avoid using the install_set() helper, as it does not guarantee
|
|
# a stable order
|
|
if tsi.action not in dnf.transaction.FORWARD_ACTIONS:
|
|
continue
|
|
last_transaction.append(tsi.pkg)
|
|
|
|
dependencies = []
|
|
for package in last_transaction:
|
|
dependencies.append({
|
|
"name": package.name,
|
|
"epoch": package.epoch,
|
|
"version": package.version,
|
|
"release": package.release,
|
|
"arch": package.arch,
|
|
"repo_id": package.repoid,
|
|
"path": package.relativepath,
|
|
"remote_location": package.remote_location(),
|
|
"checksum": (
|
|
f"{hawkey.chksum_name(package.chksum[0])}:"
|
|
f"{package.chksum[1].hex()}"
|
|
)
|
|
})
|
|
|
|
return {
|
|
"checksums": self._repo_checksums(),
|
|
"dependencies": dependencies
|
|
}
|
|
|
|
|
|
def setup_cachedir(request):
|
|
arch = request["arch"]
|
|
# If dnf-json is run as a service, we don't want users to be able to set the cache
|
|
cache_dir = os.environ.get("OVERWRITE_CACHE_DIR", "")
|
|
if cache_dir:
|
|
cache_dir = os.path.join(cache_dir, arch)
|
|
else:
|
|
cache_dir = request.get("cachedir", "")
|
|
|
|
if not cache_dir:
|
|
return "", {"kind": "Error", "reason": "No cache dir set"}
|
|
|
|
return cache_dir, None
|
|
|
|
|
|
def solve(request, cache_dir):
|
|
command = request["command"]
|
|
arch = request["arch"]
|
|
module_platform_id = request["module_platform_id"]
|
|
arguments = request["arguments"]
|
|
|
|
transactions = arguments.get("transactions")
|
|
with tempfile.TemporaryDirectory() as persistdir:
|
|
try:
|
|
solver = Solver(
|
|
arguments["repos"],
|
|
module_platform_id,
|
|
persistdir,
|
|
cache_dir,
|
|
arch
|
|
)
|
|
if command == "dump":
|
|
result = solver.dump()
|
|
elif command == "depsolve":
|
|
result = solver.depsolve(transactions)
|
|
|
|
except dnf.exceptions.MarkingErrors as e:
|
|
printe("error install_specs")
|
|
return None, {
|
|
"kind": "MarkingErrors",
|
|
"reason": f"Error occurred when marking packages for installation: {e}"
|
|
}
|
|
except dnf.exceptions.DepsolveError as e:
|
|
printe("error depsolve")
|
|
return None, {
|
|
"kind": "DepsolveError",
|
|
"reason": f"There was a problem depsolving {arguments['package-specs']}: {e}"
|
|
}
|
|
except dnf.exceptions.RepoError as e:
|
|
return None, {
|
|
"kind": "RepoError",
|
|
"reason": f"There was a problem reading a repository: {e}"
|
|
}
|
|
except dnf.exceptions.Error as e:
|
|
printe("error repository setup")
|
|
return None, {
|
|
"kind": type(e).__name__,
|
|
"reason": str(e)
|
|
}
|
|
return result, None
|
|
|
|
|
|
def printe(*msg):
|
|
print(*msg, file=sys.stderr)
|
|
|
|
|
|
def fail(err):
|
|
printe(f"{err['kind']}: {err['reason']}")
|
|
print(json.dumps(err))
|
|
sys.exit(1)
|
|
|
|
|
|
def respond(result):
|
|
print(json.dumps(result))
|
|
|
|
|
|
def validate_request(request):
|
|
command = request.get("command")
|
|
valid_cmds = ("depsolve", "dump")
|
|
if command not in valid_cmds:
|
|
return {
|
|
"kind": "InvalidRequest",
|
|
"reason": f"invalid command '{command}': must be one of {', '.join(valid_cmds)}"
|
|
}
|
|
|
|
if not request.get("arch"):
|
|
return {
|
|
"kind": "InvalidRequest",
|
|
"reason": "no 'arch' specified"
|
|
}
|
|
|
|
if not request.get("module_platform_id"):
|
|
return {
|
|
"kind": "InvalidRequest",
|
|
"reason": "no 'module_platform_id' specified"
|
|
}
|
|
arguments = request.get("arguments")
|
|
if not arguments:
|
|
return {
|
|
"kind": "InvalidRequest",
|
|
"reason": "empty 'arguments'"
|
|
}
|
|
|
|
if not arguments.get("repos"):
|
|
return {
|
|
"kind": "InvalidRequest",
|
|
"reason": "no 'repos' specified"
|
|
}
|
|
|
|
return None
|
|
|
|
|
|
def main():
|
|
request = json.load(sys.stdin)
|
|
err = validate_request(request)
|
|
if err:
|
|
fail(err)
|
|
|
|
cachedir, err = setup_cachedir(request)
|
|
if err:
|
|
fail(err)
|
|
result, err = solve(request, cachedir)
|
|
if err:
|
|
fail(err)
|
|
else:
|
|
respond(result)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|