diff --git a/bootc-base-imagectl b/bootc-base-imagectl index 871d5ea..51884f2 100755 --- a/bootc-base-imagectl +++ b/bootc-base-imagectl @@ -54,6 +54,24 @@ def run_build_rootfs(args): print(f"Copying /{f} to {dst}") shutil.copy('/' + f, dst) +def run_rechunk(args): + argv = [ + 'rpm-ostree', + 'experimental', + 'compose', + 'build-chunked-oci'] + if args.max_layers is not None: + argv.append(f"--max-layers={args.max_layers}") + argv.extend(['--bootc', + '--format-version=1', + f'--from={args.from_image}', + f'--output=containers-storage:{args.to_image}']) + try: + subprocess.run(argv, check=True) + except subprocess.CalledProcessError as e: + print(f"Error executing command: {e}") + sys.exit(1) + def run_list(args): d = '/' + MANIFESTDIR for ent in sorted(os.listdir(d)): @@ -80,6 +98,12 @@ if __name__ == "__main__": build_rootfs.add_argument("target", help="Path to the target root directory that will be generated.") build_rootfs.set_defaults(func=run_build_rootfs) + cmd_rechunk = subparsers.add_parser('rechunk', help="Generate a new container image with split, reproducible, chunked layers") + cmd_rechunk.add_argument("--max-layers", help="Configure the number of output layers") + cmd_rechunk.add_argument("from_image", help="Operate on this image in the container storage") + cmd_rechunk.add_argument("to_image", help="Output a new image to the container storage") + cmd_rechunk.set_defaults(func=run_rechunk) + cmd_list = subparsers.add_parser('list', help='List available manifests') cmd_list.set_defaults(func=run_list) diff --git a/bootc-base-imagectl.md b/bootc-base-imagectl.md index 9a90b34..b3cba04 100644 --- a/bootc-base-imagectl.md +++ b/bootc-base-imagectl.md @@ -47,6 +47,54 @@ This command takes just two arguments: - A path to the target root filesystem which will be generated as a directory. The target should not already exist (but its parent must exist). +## Using bootc-base-imagectl rechunk + +This operation is strongly related to `build-rootfs` but is also orthogonal; +it can be used on a "regular" container build as well. + +This command assumes it will be run as a container image, and defaults +to wanting write access to the container storage. + +``` +podman run --rm --privileged -v /var/lib/containers:/var/lib/containers quay.io/fedora/fedora-bootc:rawhide \ + bootc-base-imagectl rechunk quay.io/exampleos/exampleos:build quay.io/exampleos/exampleos:latest +``` + +### Rationale + +When performing a complex container derivation, there are several issues: + +#### Replaced duplicate content + +When e.g. upgrading or replacing the kernel or other large packages +as part of a container build (without squashing all layers) then +the old replaced content will still be present. + +#### Removed content still present + +Similarly, `RUN dnf -y remove` etc. will still retain that removed +content in prior layers. + +#### Timestamp drift + +By default, many tools will use the current timestamp when writing +files. `rpm` will do this (unless `SOURCE_DATE_EPOCH` is set), and +other tools like `cp` and `curl` will as well. + +This means that every build of the image will produce a new +tar stream (with new timestamps) - that will get pushed to a registry +and downloaded by clients, even if the content didn't actually change. + +### What rechunk does: split reproducible chunked images + +The `bootc-base-imagectl rechunk` command fixes all of these issues +by taking an input container, operates on its final merged filesystem +tree (hence removed/overridden files are handled), and then splits it up +(currently based on the RPM database) into separate layers (tarballs). + +Further, because bootc uses OSTree today, and OSTree canonializes all timestamps +to zero on the client side, this tool does that at build time. + ### Other options `bootc-base-imagectl list` will enumerate available configurations that