debian-forge/osbuild/objectstore.py
Christian Kellner 42a365d12f osbuild: no auto commit of the last stage
Do not automatically commit the last stage of the pipeline to the
store. The last stage is most likely not what should be cached,
because it will contain all the individual customization and thus
be very likely different for different users. Instead, the dnf or
rpm stages have a higher chance of being the same and thus are
better candidates for caching.
Technically this change is done via two big changes that build
upon new features introduces in the previous commits, most notably
the copy on write semantics of Object and that input/output is
being done via `objectstore.Object` instead of plain paths. The
first of the two big changes is  to create one new `Object` at
the beginning of `pipeline.run` and use that, in write mode via
`Object.write` across invocations of `stage.run` calls, with
checkpoints being created after each stage on demand.
The very same `Object` is then used in read mode via `Object.read`
as the input tree for the Assembler. After the assembler is done
the resulting image/tree is manually committed to the store.
The other big change is to remove the `ObjectStore.commit` call
from the `ObjectStore.new` method and thus the automatic commit
after the last stage is gone.
NB: since the build tree is being retrieved in `get_buildtree`
from the store, a checkpoint for the last stage of the build
pipeline is forced for now. Future commits will refactor will
do away with that forced commit as well.
Change osbuildtest.TestCase to always create a checkpoint at
the final tree (the last stage of the pipeline), since tests
need it to check the tree contents.
2020-02-28 16:11:49 +01:00

279 lines
8.6 KiB
Python

import contextlib
import errno
import hashlib
import os
import subprocess
import tempfile
from typing import Optional
from . import treesum
__all__ = [
"ObjectStore",
]
@contextlib.contextmanager
def suppress_oserror(*errnos):
"""A context manager that suppresses any OSError with an errno in `errnos`.
Like contextlib.suppress, but can differentiate between OSErrors.
"""
try:
yield
except OSError as e:
if e.errno not in errnos:
raise e
def mount(source, target, bind=True, ro=True, private=True, mode="0755"):
options = []
if bind:
options += ["bind"]
if ro:
options += ["ro"]
if mode:
options += [mode]
args = []
if private:
args += ["--make-private"]
if options:
args += ["-o", ",".join(options)]
subprocess.run(["mount"] + args + [source, target], check=True)
def umount(target, lazy=True):
args = []
if lazy:
args += ["--lazy"]
subprocess.run(["umount"] + args + [target], check=True)
class Object:
def __init__(self, store: "ObjectStore"):
self._init = True
self._readers = 0
self._base = None
self._workdir = None
self._tree = None
self.store = store
self.reset()
def init(self) -> None:
"""Initialize the object with content of its base"""
self._check_writable()
self._check_readers()
if self._init:
return
source = self.store.resolve_ref(self._base)
subprocess.run(["cp", "--reflink=auto", "-a",
f"{source}/.", self._tree],
check=True)
self._init = True
@property
def base(self) -> Optional[str]:
return self._base
@base.setter
def base(self, base_id: Optional[str]):
self._init = not base_id
self._base = base_id
@property
def treesum(self) -> str:
"""Calculate the treesum of the object"""
with self._open() as fd:
m = hashlib.sha256()
treesum.treesum(m, fd)
treesum_hash = m.hexdigest()
return treesum_hash
@property
def _path(self) -> str:
if self._base and not self._init:
path = self.store.resolve_ref(self._base)
else:
path = self._tree
return path
def write(self) -> str:
"""Return a path that can be written to"""
self._check_writable()
self._check_readers()
self.init()
return self._tree
@contextlib.contextmanager
def read(self) -> str:
self._check_writable()
with self.tempdir("mount") as target:
mount(self._path, target)
try:
self._readers += 1
yield target
finally:
umount(target)
self._readers -= 1
def store_tree(self, destination: str):
"""Store the tree at destination and reset itself
Moves the tree atomically by using rename(2). If the
target already exist, does nothing. Afterwards it
resets itself and can be used as if it was new.
"""
self._check_writable()
self._check_readers()
self.init()
with suppress_oserror(errno.ENOTEMPTY, errno.EEXIST):
os.rename(self._tree, destination)
self.reset()
def reset(self):
self.cleanup()
self._workdir = self.store.tempdir(suffix="object")
self._tree = os.path.join(self._workdir.name, "tree")
os.makedirs(self._tree, mode=0o755, exist_ok=True)
self._init = not self._base
def cleanup(self):
self._check_readers()
if self._workdir:
self._workdir.cleanup()
self._workdir = None
def _check_readers(self):
"""Internal: Raise a ValueError if there are readers"""
if self._readers:
raise ValueError("Read operation is ongoing")
def _check_writable(self):
"""Internal: Raise a ValueError if not writable"""
if not self._workdir:
raise ValueError("Object is not writable")
@contextlib.contextmanager
def _open(self):
"""Open the directory and return the file descriptor"""
with self.read() as path:
fd = os.open(path, os.O_DIRECTORY)
try:
yield fd
finally:
os.close(fd)
def tempdir(self, suffix=None):
workdir = self._workdir.name
if suffix:
suffix = "-" + suffix
return tempfile.TemporaryDirectory(dir=workdir,
suffix=suffix)
def __enter__(self):
self._check_writable()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.cleanup()
return exc_type is None
class ObjectStore:
def __init__(self, store):
self.store = store
self.objects = f"{store}/objects"
self.refs = f"{store}/refs"
os.makedirs(self.store, exist_ok=True)
os.makedirs(self.objects, exist_ok=True)
os.makedirs(self.refs, exist_ok=True)
def contains(self, object_id):
if not object_id:
return False
return os.access(self.resolve_ref(object_id), os.F_OK)
def resolve_ref(self, object_id: Optional[str]) -> Optional[str]:
"""Returns the path to the given object_id"""
if not object_id:
return None
return f"{self.refs}/{object_id}"
def tempdir(self, prefix=None, suffix=None):
"""Return a tempfile.TemporaryDirectory within the store"""
return tempfile.TemporaryDirectory(dir=self.store,
prefix=prefix,
suffix=suffix)
@contextlib.contextmanager
def get(self, object_id):
with Object(self) as obj:
obj.base = object_id
with obj.read() as path:
yield path
@contextlib.contextmanager
def new(self, base_id=None):
"""Creates a new temporary `Object`.
This method must be used as a context manager. It returns
a temporary instance of `Object`, which can then be used
for interaction with the store.
If changes to the object's content were made (by calling
`Object.write`), these must manually be committed to the
store via `commit()`.
"""
with Object(self) as obj:
if base_id:
# if we were given a base id then this is the base
# content for the new object
# NB: `Object` has copy-on-write semantics, so no
# copying of the data takes places at this point
obj.base = base_id
yield obj
def commit(self, obj: Object, object_id: str) -> str:
"""Commits a Object to the object store
Move the contents of the obj (Object) to object directory
of the store with the content hash (obj.treesum) as its name.
Creates a symlink to that ('objects/{hash}') in the references
directory with the object_id as the name ('refs/{object_id}).
If the link already exists, it will be atomically replaced.
Returns: The treesum of the object
"""
treesum_hash = obj.treesum
# the object is stored in the objects directory using its content
# hash as its name, ideally a given object_id (i.e., given config)
# will always produce the same content hash, but that is not
# guaranteed. If an object with the same treesum already exist, us
# the existing one instead
obj.store_tree(f"{self.objects}/{treesum_hash}")
# symlink the object_id (config hash) in the refs directory to the
# treesum (content hash) in the objects directory. If a symlink by
# that name alreday exists, atomically replace it, but leave the
# backing object in place (it may be in use).
with self.tempdir() as tmp:
link = f"{tmp}/link"
os.symlink(f"../objects/{treesum_hash}", link)
os.replace(link, self.resolve_ref(object_id))
# the reference that is pointing to `treesum_hash` is now the base
# of `obj`. It is not actively initialized but any subsequent calls
# to `obj.write()` will initialize it again
# NB: in the case that an object with the same treesum as `obj`
# already existed in the store obj.store_tree() will not actually
# have written anything to the store. In this case `obj` will then
# be initialized with the content of the already existing object.
obj.base = object_id
return treesum_hash