debian-forge/tools/osbuild-mpp
Pierre-Yves Chibon 407bb73a12 Copy the local_vars dictionary to avoid eval modifying it
This avoid having the local_vars dictionary getting filled with
a large number of built-in variable that are of no use for this
dictionary.

This commit was created by from Alexander Larsson.

Signed-off-by: Pierre-Yves Chibon <pingou@pingoured.fr>
2021-09-24 13:32:51 +02:00

945 lines
27 KiB
Python
Executable file

#!/usr/bin/python3
"""Manifest-Pre-Processor
This manifest-pre-processor takes a path to a manifest, loads it,
runs various pre-processing options and then produces a resultant manfest, written
to a specified filename (or stdout if filename is "-").
Manifest format version "1" and "2" are supported.
Pipeline Import:
This tool imports a pipeline from another file and inserts it into a manifest
at the same position the import instruction is located. Sources from the
imported manifest are merged with the existing sources.
The parameters for this pre-processor for format version "1" look like this:
```
...
"mpp-import-pipeline": {
"path": "./manifest.json"
}
...
```
The parameters for this pre-processor for format version "2" look like this:
```
...
"mpp-import-pipeline": {
"path": "./manifest.json",
"id:" "build"
}
...
```
Depsolving:
This tool adjusts the `org.osbuild.rpm` stage. It consumes the `mpp-depsolve`
option and produces a package-list and source-entries.
It supports version "1" and version "2" of the manifest description format.
The parameters for this pre-processor, version "1", look like this:
```
...
{
"name": "org.osbuild.rpm",
...
"options": {
...
"mpp-depsolve": {
"architecture": "x86_64",
"module-platform-id": "f32",
"baseurl": "http://mirrors.kernel.org/fedora/releases/32/Everything/x86_64/os",
"repos": [
{
"id": "default",
"metalink": "https://mirrors.fedoraproject.org/metalink?repo=fedora-32&arch=$basearch"
}
],
"packages": [
"@core",
"dracut-config-generic",
"grub2-pc",
"kernel"
],
"excludes": [
(optional excludes)
]
}
}
}
...
```
The parameters for this pre-processor, version "2", look like this:
```
...
{
"name": "org.osbuild.rpm",
...
"inputs": {
packages: {
"mpp-depsolve": {
see above
}
}
}
}
...
```
Variable expansion and substitution:
The variables can be set in the mpp-vars toplevel dict (which is removed from
the final results) or overridden by the -D,--define commandline option.
They can then be used from within the manifest via f-string formatting using
the `mpp-format-{int,string,json}` directives. Additionally the variables
will be substituted via template string substitution a la `$variable` inside
the mpp blocks.
Example:
```
{
"mpp-vars": {
"variable": "some string",
"rootfs_size": 20480,
"arch:": "x86_64",
"ref": "fedora/$arch/osbuild"
},
...
{
"foo": "a value",
"bar": { "mpp-format-string": "This expands {variable} but can also eval like {variable.upper()}" }
"disk_size": { "mpp-format-int": "{rootfs_size * 512}" }
}
...
```
Defining partition layouts for disk images:
It is possbile to define a partition layout via `mpp-define-image`. The defined layout
is actually written to a temporary sparse file and read back via `sfdisk`, so that all
partition data like `size` and `start` include actual padding and such. The `image`
variable will be defined with `size` and `layout` keys, the latter containing the
partition layout data. It can be accessed via the "String expansion" explained above.
Example:
```
...
"mpp-define-image": {
"size": "10737418240",
"table": {
"uuid": "D209C89E-EA5E-4FBD-B161-B461CCE297E0",
"label": "gpt",
"partitions": [
{
"id": "bios-boot",
"start": 2048,
"size": 2048,
"type": "21686148-6449-6E6F-744E-656564454649",
"bootable": true,
"uuid": "FAC7F1FB-3E8D-4137-A512-961DE09A5549"
},
...
}
...
```
"""
import argparse
import collections
import contextlib
import json
import os
import pathlib
import string
import subprocess
import sys
import tempfile
import urllib.parse
from typing import Dict, Optional
import dnf
import hawkey
from osbuild.util.rhsm import Subscriptions
def element_enter(element, key, default):
if key not in element:
element[key] = default.copy()
return element[key]
class PkgInfo:
def __init__(self, checksum, name, evr, arch):
self.checksum = checksum
self.name = name
self.evr = evr
self.arch = arch
self.url = None
self.secrets = None
@classmethod
def from_dnf_package(cls, pkg: dnf.package.Package):
checksum_type = hawkey.chksum_name(pkg.chksum[0])
checksum_hex = pkg.chksum[1].hex()
checksum = f"{checksum_type}:{checksum_hex}"
return cls(checksum, pkg.name, pkg.evr, pkg.arch)
@property
def evra(self):
return f"{self.evr}.{self.arch}"
@property
def nevra(self):
return f"{self.name}-{self.evra}"
def __str__(self):
return self.nevra
class DepSolver:
def __init__(self, cachedir, persistdir):
self.cachedir = cachedir
self.persistdir = persistdir
self.basedir = None
self.subscriptions = None
self.secrets = {}
self.base = dnf.Base()
def reset(self, arch, basedir, module_platform_id, ignore_weak_deps):
base = self.base
base.reset(goal=True, repos=True, sack=True)
self.secrets.clear()
if self.cachedir:
base.conf.cachedir = self.cachedir
base.conf.config_file_path = "/dev/null"
base.conf.persistdir = self.persistdir
base.conf.module_platform_id = module_platform_id
base.conf.install_weak_deps = not ignore_weak_deps
base.conf.arch = arch
self.base = base
self.basedir = basedir
def expand_baseurl(self, baseurl):
"""Expand non-uris as paths relative to basedir into a file:/// uri"""
basedir = self.basedir
try:
result = urllib.parse.urlparse(baseurl)
if not result.scheme:
path = basedir.joinpath(baseurl)
return path.resolve().as_uri()
except: # pylint: disable=bare-except
pass
return baseurl
def get_secrets(self, url, desc):
if not desc:
return None
name = desc.get("name")
if name != "org.osbuild.rhsm":
raise ValueError(f"Unknown secret type: {name}")
try:
# rhsm secrets only need to be retrieved once and can then be reused
if not self.subscriptions:
self.subscriptions = Subscriptions.from_host_system()
secrets = self.subscriptions.get_secrets(url)
except RuntimeError as e:
raise ValueError(f"Error getting secrets: {e.args[0]}") from None
secrets["type"] = "org.osbuild.rhsm"
return secrets
def add_repo(self, desc, baseurl):
repo = dnf.repo.Repo(desc["id"], self.base.conf)
url = None
url_keys = ["baseurl", "metalink", "mirrorlist"]
skip_keys = ["id", "secrets"]
supported = ["baseurl", "metalink", "mirrorlist",
"enabled", "metadata_expire", "gpgcheck", "username", "password", "priority",
"sslverify", "sslcacert", "sslclientkey", "sslclientcert",
"skip_if_unavailable"]
for key in desc.keys():
if key in skip_keys:
continue # We handled this already
if key in url_keys:
url = desc[key]
if key in supported:
value = desc[key]
if key == "baseurl":
value = self.expand_baseurl(value)
setattr(repo, key, value)
else:
raise ValueError(f"Unknown repo config option {key}")
if not url:
url = self.expand_baseurl(baseurl)
if not url:
raise ValueError("repo description does not contain baseurl, metalink, or mirrorlist keys")
secrets = self.get_secrets(url, desc.get("secrets"))
if secrets:
if "ssl_ca_cert" in secrets:
repo.sslcacert = secrets["ssl_ca_cert"]
if "ssl_client_key" in secrets:
repo.sslclientkey = secrets["ssl_client_key"]
if "ssl_client_cert" in secrets:
repo.sslclientcert = secrets["ssl_client_cert"]
self.secrets[repo.id] = secrets["type"]
self.base.repos.add(repo)
return repo
def resolve(self, packages, excludes):
base = self.base
base.reset(goal=True, sack=True)
base.fill_sack(load_system_repo=False)
base.install_specs(packages, exclude=excludes)
base.resolve()
deps = []
for tsi in base.transaction:
if tsi.action not in dnf.transaction.FORWARD_ACTIONS:
continue
path = tsi.pkg.relativepath
reponame = tsi.pkg.reponame
baseurl = self.base.repos[reponame].baseurl[0]
baseurl = self.expand_baseurl(baseurl)
# dep["path"] often starts with a "/", even though it's meant to be
# relative to `baseurl`. Strip any leading slashes, but ensure there's
# exactly one between `baseurl` and the path.
url = urllib.parse.urljoin(baseurl + "/", path.lstrip("/"))
pkg = PkgInfo.from_dnf_package(tsi.pkg)
pkg.url = url
pkg.secrets = self.secrets.get(reponame)
deps.append(pkg)
return deps
class Partition:
def __init__(self,
uid: str = None,
pttype: str = None,
start: int = None,
size: int = None,
bootable: bool = False,
name: str = None,
uuid: str = None):
self.id = uid
self.type = pttype
self.start = start
self.size = size
self.bootable = bootable
self.name = name
self.uuid = uuid
self.index = None
@property
def start_in_bytes(self):
return (self.start or 0) * 512
@property
def size_in_bytes(self):
return (self.size or 0) * 512
@classmethod
def from_dict(cls, js):
p = cls(uid=js.get("id"),
pttype=js.get("type"),
start=js.get("start"),
size=js.get("size"),
bootable=js.get("bootable"),
name=js.get("name"),
uuid=js.get("uuid"))
return p
def to_dict(self):
data = {}
if self.start:
data["start"] = self.start
if self.size:
data["size"] = self.size
if self.type:
data["type"] = self.type
if self.bootable:
data["bootable"] = self.bootable
if self.name:
data["name"] = self.name
if self.uuid:
data["uuid"] = self.uuid
return data
class PartitionTable:
def __init__(self, label, uuid, partitions):
self.label = label
self.uuid = uuid
self.partitions = partitions or []
def __getitem__(self, key) -> Partition:
if isinstance(key, int):
return self.partitions[key]
elif isinstance(key, str):
for part in self.partitions:
if part.id == key:
return part
else:
raise ValueError("unsupported type")
def write_to(self, target, sync=True):
"""Write the partition table to disk"""
# generate the command for sfdisk to create the table
command = f"label: {self.label}\nlabel-id: {self.uuid}"
for partition in self.partitions:
fields = []
for field in ["start", "size", "type", "name", "uuid"]:
value = getattr(partition, field)
if value:
fields += [f'{field}="{value}"']
if partition.bootable:
fields += ["bootable"]
command += "\n" + ", ".join(fields)
subprocess.run(["sfdisk", "-q", "--no-tell-kernel", target],
input=command,
encoding='utf-8',
check=True)
if sync:
self.update_from(target)
def update_from(self, target):
"""Update and fill in missing information from disk"""
r = subprocess.run(["sfdisk", "--json", target],
stdout=subprocess.PIPE,
encoding='utf-8',
check=True)
disk_table = json.loads(r.stdout)["partitiontable"]
disk_parts = disk_table["partitions"]
assert len(disk_parts) == len(self.partitions)
for i, part in enumerate(self.partitions):
part.index = i
part.start = disk_parts[i]["start"]
part.size = disk_parts[i]["size"]
part.type = disk_parts[i].get("type")
part.name = disk_parts[i].get("name")
@classmethod
def from_dict(cls, js) -> Partition:
ptuuid = js["uuid"]
pttype = js["label"]
partitions = js.get("partitions")
parts = [Partition.from_dict(p) for p in partitions]
table = cls(pttype, ptuuid, parts)
return table
def __str__(self) -> str:
data = {}
if self.uuid:
data["uuid"] = self.uuid
data["label"] = self.label
data["partitions"] = [
pt.to_dict() for pt in self.partitions
]
return json.dumps(data, indent=2)
class Image:
def __init__(self, size, layout):
self.size = size
self.layout = layout
@classmethod
def from_dict(cls, js):
size = js["size"]
data = js["table"]
with tempfile.TemporaryDirectory() as tmp:
image = os.path.join(tmp, "disk.img")
subprocess.run(["truncate", "--size", size, image], check=True)
table = PartitionTable.from_dict(data)
table.write_to(image)
return cls(size, table)
class ManifestFile:
@staticmethod
def load(path):
with open(path) as f:
return ManifestFile.load_from_fd(f, path)
@staticmethod
def load_from_fd(f, path):
# We use OrderedDict to preserve key order (for python < 3.6)
data = json.load(f, object_pairs_hook=collections.OrderedDict)
version = int(data.get("version", "1"))
if version == 1:
return ManifestFileV1(path, data)
if version == 2:
return ManifestFileV2(path, data)
raise ValueError(f"Unknown manfest version {version}")
def __init__(self, path, root, version):
self.path = pathlib.Path(path)
self.basedir = self.path.parent
self.root = root
self.version = version
self.sources = element_enter(self.root, "sources", {})
self.source_urls = {}
self.vars = {}
self.init_vars()
def get_mpp_node(self, parent: Dict, name: str) -> Optional[Dict]:
name = "mpp-" + name
desc = parent.get(name)
if not desc:
return None
del parent[name]
return self.substitute_vars(desc)
def init_vars(self):
variables = self.get_mpp_node(self.root, "vars")
if not variables:
return
for k, v in variables.items():
fakeroot = [v]
self._process_format(fakeroot)
self.vars[k] = fakeroot[0]
self.substitute_vars(self.vars)
def set_vars(self, args):
for arg in args:
if "=" in arg:
key, value_s = arg.split("=", 1)
value = json.loads(value_s)
else:
key = arg
value = True
self.vars[key] = value
def substitute_vars(self, node):
"""Expand variables in `node` with the manifest variables"""
if isinstance(node, dict):
for k, v in node.items():
node[k] = self.substitute_vars(v)
elif isinstance(node, list):
for i, v in enumerate(node):
node[i] = self.substitute_vars(v)
elif isinstance(node, str):
tpl = string.Template(node)
node = tpl.safe_substitute(self.vars)
return node
def load_import(self, path, search_dirs):
m = self.find_and_load_manifest(path, search_dirs)
if m.version != self.version:
raise ValueError(f"Incompatible manifest version {m.version}")
return m
def find_and_load_manifest(self, path, dirs):
for p in [self.basedir] + dirs:
with contextlib.suppress(FileNotFoundError):
fullpath = os.path.join(p, path)
with open(fullpath, "r") as f:
return ManifestFile.load_from_fd(f, path)
raise FileNotFoundError(f"Could not find manifest '{path}'")
def depsolve(self, solver: DepSolver, desc: Dict):
repos = desc.get("repos", [])
packages = desc.get("packages", [])
excludes = desc.get("excludes", [])
baseurl = desc.get("baseurl")
arch = desc.get("architecture")
if not packages:
return []
module_platform_id = desc["module-platform-id"]
ignore_weak_deps = bool(desc.get("ignore-weak-deps"))
solver.reset(arch, self.basedir, module_platform_id, ignore_weak_deps)
for repo in repos:
solver.add_repo(repo, baseurl)
return solver.resolve(packages, excludes)
def add_packages(self, deps, pipeline_name):
checksums = []
pkginfos = {}
for dep in deps:
name, checksum, url = dep.name, dep.checksum, dep.url
pkginfos[name] = dep
if dep.secrets:
data = {
"url": url,
"secrets": {"name": dep.secrets}
}
else:
data = url
self.source_urls[checksum] = data
checksums.append(checksum)
if "rpms" not in self.vars:
self.vars["rpms"] = {}
self.vars["rpms"][pipeline_name] = pkginfos
return checksums
def sort_urls(self):
def get_sort_key(item):
key = item[1]
if isinstance(key, dict):
key = key["url"]
return key
urls = self.source_urls
if not urls:
return urls
urls_sorted = sorted(urls.items(), key=get_sort_key)
urls.clear()
urls.update(collections.OrderedDict(urls_sorted))
def write(self, file, sort_keys=False):
self.sort_urls()
json.dump(self.root, file, indent=2, sort_keys=sort_keys)
file.write("\n")
def _process_format(self, node):
def _is_format(node):
if not isinstance(node, dict):
return False
for m in ("int", "string", "json"):
if f"mpp-format-{m}" in node:
return True
return False
def _eval_format(node, local_vars):
if "mpp-format-string" in node:
res_type = "string"
format_string = node["mpp-format-string"]
elif "mpp-format-json" in node:
res_type = "json"
format_string = node["mpp-format-json"]
else:
res_type = "int"
format_string = node["mpp-format-int"]
# pylint: disable=eval-used # yolo this is fine!
# Note, we copy local_vars here to avoid eval modifying it
res = eval(f'f\'\'\'{format_string}\'\'\'', dict(local_vars))
if res_type == "int":
return int(res)
elif res_type == "json":
return json.loads(res)
return res
if isinstance(node, dict):
for key in list(node.keys()):
value = node[key]
if _is_format(value):
node[key] = _eval_format(value, self.vars)
else:
self._process_format(value)
if isinstance(node, list):
for i, value in enumerate(node):
if _is_format(value):
node[i] = _eval_format(value, self.vars)
else:
self._process_format(value)
def process_format(self):
self._process_format(self.root)
def process_partition(self):
desc = self.get_mpp_node(self.root, "define-image")
if not desc:
return
name = desc.get("id", "image")
self.vars[name] = Image.from_dict(desc)
class ManifestFileV1(ManifestFile):
def __init__(self, path, data):
super().__init__(path, data, 1)
self.pipeline = element_enter(self.root, "pipeline", {})
files = element_enter(self.sources, "org.osbuild.files", {})
self.source_urls = element_enter(files, "urls", {})
def _process_import(self, build, search_dirs):
mpp = self.get_mpp_node(build, "import-pipeline")
if not mpp:
return
path = mpp["path"]
imp = self.load_import(path, search_dirs)
self.vars.update(imp.vars)
# We only support importing manifests with URL sources. Other sources are
# not supported, yet. This can be extended in the future, but we should
# maybe rather try to make sources generic (and repeatable?), so we can
# deal with any future sources here as well.
assert list(imp.sources.keys()) == ["org.osbuild.files"]
# We import `sources` from the manifest, as well as a pipeline description
# from the `pipeline` entry. Make sure nothing else is in the manifest, so
# we do not accidentally miss new features.
assert sorted(imp.root) == sorted(["pipeline", "sources"])
# Now with everything imported and verified, we can merge the pipeline back
# into the original manifest. We take all URLs and merge them in the pinned
# url-array, and then we take the pipeline and simply override any original
# pipeline at the position where the import was declared.
self.source_urls.update(imp.source_urls)
build["pipeline"] = imp.pipeline
def process_imports(self, search_dirs):
current = self.root
while current:
self._process_import(current, search_dirs)
current = current.get("pipeline", {}).get("build")
def _process_depsolve(self, solver, stage, pipeline_name):
if stage.get("name", "") != "org.osbuild.rpm":
return
options = stage.get("options")
if not options:
return
mpp = self.get_mpp_node(options, "depsolve")
if not mpp:
return
packages = element_enter(options, "packages", [])
deps = self.depsolve(solver, mpp)
checksums = self.add_packages(deps, pipeline_name)
packages += checksums
def process_depsolves(self, solver, pipeline=None, depth=0):
if pipeline is None:
pipeline = self.pipeline
if depth == 0:
pipeline_name = "stages"
elif depth == 1:
pipeline_name = "build"
else:
pipeline_name = "build" + str(depth)
stages = element_enter(pipeline, "stages", [])
for stage in stages:
self._process_depsolve(solver, stage, pipeline_name)
build = pipeline.get("build")
if build:
if "pipeline" in build:
self.process_depsolves(solver, build["pipeline"], depth+1)
class ManifestFileV2(ManifestFile):
def __init__(self, path, data):
super().__init__(path, data, 2)
self.pipelines = element_enter(self.root, "pipelines", {})
files = element_enter(self.sources, "org.osbuild.curl", {})
self.source_urls = element_enter(files, "items", {})
def get_pipeline_by_name(self, name):
for pipeline in self.pipelines:
if pipeline["name"] == name:
return pipeline
raise ValueError(f"Pipeline '{name}' not found in {self.path}")
def _process_import(self, pipeline, search_dirs):
mpp = self.get_mpp_node(pipeline, "import-pipeline")
if not mpp:
return
path = mpp["path"]
imp = self.load_import(path, search_dirs)
self.vars.update(imp.vars)
for source, desc in imp.sources.items():
target = self.sources.get(source)
if not target:
# new source, just copy everything
self.sources[source] = desc
continue
if desc.get("options"):
options = element_enter(target, "options", {})
options.update(desc["options"])
items = element_enter(target, "items", {})
items.update(desc.get("items", {}))
target = imp.get_pipeline_by_name(mpp["id"])
pipeline.update(target)
def process_imports(self, search_dirs):
for pipeline in self.pipelines:
self._process_import(pipeline, search_dirs)
def _process_depsolve(self, solver, stage, pipeline_name):
if stage.get("type", "") != "org.osbuild.rpm":
return
inputs = element_enter(stage, "inputs", {})
packages = element_enter(inputs, "packages", {})
mpp = self.get_mpp_node(packages, "depsolve")
if not mpp:
return
refs = element_enter(packages, "references", {})
deps = self.depsolve(solver, mpp)
checksums = self.add_packages(deps, pipeline_name)
for checksum in checksums:
refs[checksum] = {}
def process_depsolves(self, solver):
for pipeline in self.pipelines:
name = pipeline.get("name", "")
stages = element_enter(pipeline, "stages", [])
for stage in stages:
self._process_depsolve(solver, stage, name)
def main():
parser = argparse.ArgumentParser(description="Manifest pre processor")
parser.add_argument(
"--dnf-cache",
metavar="PATH",
type=os.path.abspath,
default=None,
help="Path to DNF cache-directory to use",
)
parser.add_argument(
"-I,--import-dir",
dest="searchdirs",
default=[],
action="append",
help="Search for import in that directory",
)
parser.add_argument(
"--sort-keys",
dest="sort_keys",
action='store_true',
help="Sort keys in generated json",
)
parser.add_argument(
"-D,--define",
dest="vars",
action='append',
help="Set/Override variable, format is key=Json"
)
parser.add_argument(
"src",
metavar="SRCPATH",
help="Input manifest",
)
parser.add_argument(
"dst",
metavar="DESTPATH",
help="Output manifest",
)
args = parser.parse_args(sys.argv[1:])
m = ManifestFile.load(args.src)
# First resolve all imports
m.process_imports(args.searchdirs)
m.process_partition()
# Override variables from the main of imported files
if args.vars:
m.set_vars(args.vars)
with tempfile.TemporaryDirectory() as persistdir:
solver = DepSolver(args.dnf_cache, persistdir)
m.process_depsolves(solver)
m.process_format()
with sys.stdout if args.dst == "-" else open(args.dst, "w") as f:
m.write(f, args.sort_keys)
if __name__ == "__main__":
main()