Extend the inline source to support lzma compressed and base64 encoded 'data'. This will allow us to reduce the potential manifest size when embedding big files. The aim is specifically at eventually embedding SBOMs of the image. An example single SBOM can be a JSON file with size of about 1.9 MiB. The lzma+base64 combination reduces the 'data' to embed to "only" around 250 KiB. Signed-off-by: Tomáš Hozza <thozza@redhat.com>
102 lines
2.8 KiB
Python
Executable file
102 lines
2.8 KiB
Python
Executable file
#!/usr/bin/python3
|
|
"""Source for binary data encoded inline in the manifest
|
|
|
|
This source can be used to transport data in the source
|
|
section of the manifest. Each resource is ascii-encoded
|
|
in the `data` property, where the encoding is specified
|
|
in the `encoding` property. The resources is content
|
|
addressed via the hash value of the raw data before the
|
|
ascii encoding. This hash value is verified after the
|
|
resource is decoded and written to the store.
|
|
"""
|
|
|
|
|
|
import base64
|
|
import contextlib
|
|
import lzma
|
|
import os
|
|
import sys
|
|
from typing import Dict
|
|
|
|
from osbuild import sources
|
|
from osbuild.util.checksum import verify_file
|
|
|
|
SCHEMA = """
|
|
"definitions": {
|
|
"item": {
|
|
"description": "Inline data indexed by their checksum",
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"patternProperties": {
|
|
"(md5|sha1|sha256|sha384|sha512):[0-9a-f]{32,128}": {
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"required": ["encoding", "data"],
|
|
"properties": {
|
|
"encoding": {
|
|
"description": "The specific encoding of `data`",
|
|
"enum": ["base64", "lzma+base64"]
|
|
},
|
|
"data": {
|
|
"description": "The ascii encoded raw data",
|
|
"type": "string"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"additionalProperties": false,
|
|
"required": ["items"],
|
|
"properties": {
|
|
"items": {"$ref": "#/definitions/item"}
|
|
}
|
|
"""
|
|
|
|
|
|
class InlineSource(sources.SourceService):
|
|
|
|
content_type = "org.osbuild.files"
|
|
|
|
def fetch_all(self, items: Dict) -> None:
|
|
filtered = filter(lambda i: not self.exists(i[0], i[1]), items.items()) # discards items already in cache
|
|
|
|
for args in filtered:
|
|
self.fetch_one(*args)
|
|
|
|
def fetch_one(self, checksum, desc):
|
|
target = os.path.join(self.cache, checksum)
|
|
floating = os.path.join(self.tmpdir, checksum)
|
|
|
|
if os.path.isfile(target):
|
|
return
|
|
|
|
encoding = desc["encoding"]
|
|
if encoding == "base64":
|
|
data = base64.b64decode(desc["data"])
|
|
elif encoding == "lzma+base64":
|
|
data = base64.b64decode(desc["data"])
|
|
data = lzma.decompress(data)
|
|
else:
|
|
raise RuntimeError(f"Unknown encoding {encoding}")
|
|
|
|
# Write the bits to disk and then verify the checksum
|
|
# This ensures that 1) the data is ok and that 2) we
|
|
# wrote them correctly as well
|
|
with open(floating, "wb") as f:
|
|
f.write(data)
|
|
|
|
if not verify_file(floating, checksum):
|
|
raise RuntimeError(f"Checksum mismatch for {format(checksum)}")
|
|
|
|
with contextlib.suppress(FileExistsError):
|
|
os.rename(floating, target)
|
|
|
|
|
|
def main():
|
|
service = InlineSource.from_args(sys.argv[1:])
|
|
service.main()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|