debian-forge/sources/org.osbuild.inline
Tomáš Hozza e172e6e6f6 Sources/inline: support lzma+base64 encoded data
Extend the inline source to support lzma compressed and base64 encoded
'data'. This will allow us to reduce the potential manifest size when
embedding big files. The aim is specifically at eventually embedding
SBOMs of the image.

An example single SBOM can be a JSON file with size of about 1.9 MiB.
The lzma+base64 combination reduces the 'data' to embed to "only" around
250 KiB.

Signed-off-by: Tomáš Hozza <thozza@redhat.com>
2025-05-13 10:27:36 +02:00

102 lines
2.8 KiB
Python
Executable file

#!/usr/bin/python3
"""Source for binary data encoded inline in the manifest
This source can be used to transport data in the source
section of the manifest. Each resource is ascii-encoded
in the `data` property, where the encoding is specified
in the `encoding` property. The resources is content
addressed via the hash value of the raw data before the
ascii encoding. This hash value is verified after the
resource is decoded and written to the store.
"""
import base64
import contextlib
import lzma
import os
import sys
from typing import Dict
from osbuild import sources
from osbuild.util.checksum import verify_file
SCHEMA = """
"definitions": {
"item": {
"description": "Inline data indexed by their checksum",
"type": "object",
"additionalProperties": false,
"patternProperties": {
"(md5|sha1|sha256|sha384|sha512):[0-9a-f]{32,128}": {
"type": "object",
"additionalProperties": false,
"required": ["encoding", "data"],
"properties": {
"encoding": {
"description": "The specific encoding of `data`",
"enum": ["base64", "lzma+base64"]
},
"data": {
"description": "The ascii encoded raw data",
"type": "string"
}
}
}
}
}
},
"additionalProperties": false,
"required": ["items"],
"properties": {
"items": {"$ref": "#/definitions/item"}
}
"""
class InlineSource(sources.SourceService):
content_type = "org.osbuild.files"
def fetch_all(self, items: Dict) -> None:
filtered = filter(lambda i: not self.exists(i[0], i[1]), items.items()) # discards items already in cache
for args in filtered:
self.fetch_one(*args)
def fetch_one(self, checksum, desc):
target = os.path.join(self.cache, checksum)
floating = os.path.join(self.tmpdir, checksum)
if os.path.isfile(target):
return
encoding = desc["encoding"]
if encoding == "base64":
data = base64.b64decode(desc["data"])
elif encoding == "lzma+base64":
data = base64.b64decode(desc["data"])
data = lzma.decompress(data)
else:
raise RuntimeError(f"Unknown encoding {encoding}")
# Write the bits to disk and then verify the checksum
# This ensures that 1) the data is ok and that 2) we
# wrote them correctly as well
with open(floating, "wb") as f:
f.write(data)
if not verify_file(floating, checksum):
raise RuntimeError(f"Checksum mismatch for {format(checksum)}")
with contextlib.suppress(FileExistsError):
os.rename(floating, target)
def main():
service = InlineSource.from_args(sys.argv[1:])
service.main()
if __name__ == '__main__':
main()