debian-forge/osbuild/sources.py
Thomas Lavocat 1de74ce2c9 sources: generalizing download method
Before, the download method was defined in the inherited class of each
program. With the same kind of workflow redefined every time. This
contribution aims at making the workflow more clear and to generalize
what can be in the SourceService class.

The download worklow is as follow:
Setup -> Filter -> Prepare -> Download

The setup mainly step sets up caches. Where the download data will be
stored in the end.

The filter step is used to discard some of the items to download based
on some criterion. By default, it is used to verify if an item is
already in the cache using the item's checksum.

The Prepare step goes from each element and let the overloading step the
ability to alter each item before downloading it. This is used mainly
for the curl command which for rhel must generate the subscriptions.

Then the download step will call fetch_one for each item. Here the
download can be performed sequentially or in parallel depending on the
number of workers selected.
2022-05-11 04:32:42 -05:00

107 lines
3.3 KiB
Python

import abc
import contextlib
import os
import json
import tempfile
import concurrent.futures
from abc import abstractmethod
from typing import Dict, Tuple
from . import host
from .objectstore import ObjectStore
from .util.types import PathLike
class Source:
"""
A single source with is corresponding options.
"""
def __init__(self, info, items, options) -> None:
self.info = info
self.items = items or {}
self.options = options
def download(self, mgr: host.ServiceManager, store: ObjectStore, libdir: PathLike):
source = self.info.name
cache = os.path.join(store.store, "sources")
args = {
"options": self.options,
"cache": cache,
"output": None,
"checksums": [],
"libdir": os.fspath(libdir)
}
client = mgr.start(f"source/{source}", self.info.path)
with self.make_items_file(store.tmp) as fd:
fds = [fd]
reply = client.call_with_fds("download", args, fds)
return reply
@contextlib.contextmanager
def make_items_file(self, tmp):
with tempfile.TemporaryFile("w+", dir=tmp, encoding="utf-8") as f:
json.dump(self.items, f)
f.seek(0)
yield f.fileno()
class SourceService(host.Service):
"""Source host service"""
max_workers = 1
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.cache = None
self.options = None
self.tmpdir = None
@abc.abstractmethod
def fetch_one(self, checksum, desc) -> None:
"""Performs the actual fetch of an element described by its checksum and its descriptor"""
def exists(self, checksum, _desc) -> bool:
"""Returns True if the item to download is in cache. """
return os.path.isfile(f"{self.cache}/{checksum}")
# pylint: disable=[no-self-use]
def transform(self, checksum, desc) -> Tuple:
"""Modify the input data before downloading. By default only transforms an item object to a Tupple."""
return checksum, desc
def download(self, items: Dict) -> None:
items = filter(lambda i: not self.exists(i[0], i[1]), items.items()) # discards items already in cache
items = map(lambda i: self.transform(i[0], i[1]), items) # prepare each item to be downloaded
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
for _ in executor.map(self.fetch_one, *zip(*items)):
pass
@property
@classmethod
@abstractmethod
def content_type(cls):
"""The content type of the source."""
@staticmethod
def load_items(fds):
with os.fdopen(fds.steal(0)) as f:
items = json.load(f)
return items
def setup(self, args):
self.cache = os.path.join(args["cache"], self.content_type)
os.makedirs(self.cache, exist_ok=True)
self.options = args["options"]
def dispatch(self, method: str, args, fds):
if method == "download":
self.setup(args)
with tempfile.TemporaryDirectory(prefix=".unverified-", dir=self.cache) as self.tmpdir:
return self.download(SourceService.load_items(fds)), None
raise host.ProtocolError("Unknown method")