debian-forge/sources/org.osbuild.inline
Simon de Vlieger 2c42c46c48 sources: move parallelisation into source
This moves the parallelisation decisions into the sources themselves,
making the `download` method abstract inside `osbuild` itself.
2024-01-26 09:58:48 +01:00

97 lines
2.7 KiB
Python
Executable file

#!/usr/bin/python3
"""Source for binary data encoded inline in the manifest
This source can be used to transport data in the source
section of the manifest. Each resource is ascii-encoded
in the `data` property, where the encoding is specified
in the `encoding` property. The resources is content
addressed via the hash value of the raw data before the
ascii encoding. This hash value is verified after the
resource is decoded and written to the store.
"""
import base64
import concurrent.futures
import contextlib
import os
import sys
from typing import Dict
from osbuild import sources
from osbuild.util.checksum import verify_file
SCHEMA = """
"definitions": {
"item": {
"description": "Inline data indexed by their checksum",
"type": "object",
"additionalProperties": false,
"patternProperties": {
"(md5|sha1|sha256|sha384|sha512):[0-9a-f]{32,128}": {
"type": "object",
"additionalProperties": false,
"required": ["encoding", "data"],
"properties": {
"encoding": {
"description": "The specific encoding of `data`",
"enum": ["base64"]
},
"data": {
"description": "The ascii encoded raw data",
"type": "string"
}
}
}
}
}
},
"additionalProperties": false,
"required": ["items"],
"properties": {
"items": {"$ref": "#/definitions/item"}
}
"""
class InlineSource(sources.SourceService):
content_type = "org.osbuild.files"
def download(self, items: Dict) -> None:
filtered = filter(lambda i: not self.exists(i[0], i[1]), items.items()) # discards items already in cache
transformed = map(lambda i: self.transform(i[0], i[1]), filtered) # prepare each item to be downloaded
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
for _ in executor.map(self.fetch_one, *zip(*transformed)):
pass
def fetch_one(self, checksum, desc):
target = os.path.join(self.cache, checksum)
floating = os.path.join(self.tmpdir, checksum)
if os.path.isfile(target):
return
data = base64.b64decode(desc["data"])
# Write the bits to disk and then verify the checksum
# This ensures that 1) the data is ok and that 2) we
# wrote them correctly as well
with open(floating, "wb") as f:
f.write(data)
if not verify_file(floating, checksum):
raise RuntimeError(f"Checksum mismatch for {format(checksum)}")
with contextlib.suppress(FileExistsError):
os.rename(floating, target)
def main():
service = InlineSource.from_args(sys.argv[1:])
service.main()
if __name__ == '__main__':
main()