Before, the download method was defined in the inherited class of each program. With the same kind of workflow redefined every time. This contribution aims at making the workflow more clear and to generalize what can be in the SourceService class. The download worklow is as follow: Setup -> Filter -> Prepare -> Download The setup mainly step sets up caches. Where the download data will be stored in the end. The filter step is used to discard some of the items to download based on some criterion. By default, it is used to verify if an item is already in the cache using the item's checksum. The Prepare step goes from each element and let the overloading step the ability to alter each item before downloading it. This is used mainly for the curl command which for rhel must generate the subscriptions. Then the download step will call fetch_one for each item. Here the download can be performed sequentially or in parallel depending on the number of workers selected.
107 lines
3.3 KiB
Python
107 lines
3.3 KiB
Python
import abc
|
|
import contextlib
|
|
import os
|
|
import json
|
|
import tempfile
|
|
import concurrent.futures
|
|
from abc import abstractmethod
|
|
from typing import Dict, Tuple
|
|
|
|
from . import host
|
|
from .objectstore import ObjectStore
|
|
from .util.types import PathLike
|
|
|
|
|
|
class Source:
|
|
"""
|
|
A single source with is corresponding options.
|
|
"""
|
|
|
|
def __init__(self, info, items, options) -> None:
|
|
self.info = info
|
|
self.items = items or {}
|
|
self.options = options
|
|
|
|
def download(self, mgr: host.ServiceManager, store: ObjectStore, libdir: PathLike):
|
|
source = self.info.name
|
|
cache = os.path.join(store.store, "sources")
|
|
|
|
args = {
|
|
"options": self.options,
|
|
"cache": cache,
|
|
"output": None,
|
|
"checksums": [],
|
|
"libdir": os.fspath(libdir)
|
|
}
|
|
|
|
client = mgr.start(f"source/{source}", self.info.path)
|
|
|
|
with self.make_items_file(store.tmp) as fd:
|
|
fds = [fd]
|
|
reply = client.call_with_fds("download", args, fds)
|
|
|
|
return reply
|
|
|
|
@contextlib.contextmanager
|
|
def make_items_file(self, tmp):
|
|
with tempfile.TemporaryFile("w+", dir=tmp, encoding="utf-8") as f:
|
|
json.dump(self.items, f)
|
|
f.seek(0)
|
|
yield f.fileno()
|
|
|
|
|
|
class SourceService(host.Service):
|
|
"""Source host service"""
|
|
|
|
max_workers = 1
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
self.cache = None
|
|
self.options = None
|
|
self.tmpdir = None
|
|
|
|
@abc.abstractmethod
|
|
def fetch_one(self, checksum, desc) -> None:
|
|
"""Performs the actual fetch of an element described by its checksum and its descriptor"""
|
|
|
|
def exists(self, checksum, _desc) -> bool:
|
|
"""Returns True if the item to download is in cache. """
|
|
return os.path.isfile(f"{self.cache}/{checksum}")
|
|
|
|
# pylint: disable=[no-self-use]
|
|
def transform(self, checksum, desc) -> Tuple:
|
|
"""Modify the input data before downloading. By default only transforms an item object to a Tupple."""
|
|
return checksum, desc
|
|
|
|
def download(self, items: Dict) -> None:
|
|
items = filter(lambda i: not self.exists(i[0], i[1]), items.items()) # discards items already in cache
|
|
items = map(lambda i: self.transform(i[0], i[1]), items) # prepare each item to be downloaded
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
for _ in executor.map(self.fetch_one, *zip(*items)):
|
|
pass
|
|
|
|
@property
|
|
@classmethod
|
|
@abstractmethod
|
|
def content_type(cls):
|
|
"""The content type of the source."""
|
|
|
|
@staticmethod
|
|
def load_items(fds):
|
|
with os.fdopen(fds.steal(0)) as f:
|
|
items = json.load(f)
|
|
return items
|
|
|
|
def setup(self, args):
|
|
self.cache = os.path.join(args["cache"], self.content_type)
|
|
os.makedirs(self.cache, exist_ok=True)
|
|
self.options = args["options"]
|
|
|
|
def dispatch(self, method: str, args, fds):
|
|
if method == "download":
|
|
self.setup(args)
|
|
with tempfile.TemporaryDirectory(prefix=".unverified-", dir=self.cache) as self.tmpdir:
|
|
return self.download(SourceService.load_items(fds)), None
|
|
|
|
raise host.ProtocolError("Unknown method")
|