debian-forge/osbuild/objectstore.py
Christian Kellner 5deb1be514 objectstore: change Object.move to .store_tree
Now that Object manages its work directory itself, re-create the
latter when the its tree is moved, i.e. when the object is being
committed to the store. This means that after the object has been
written to the store it is in the same state is if it was new and
can be used in the very same way.
If the move itself fails (the rename(2) fails), the tree and its
contents is cleaned up with the reset of the work directory.
Rename the `move` method to `store_tree` to better reflect how the
method should be used, i.e. to store the tree corresponding to the
Object instance.
2020-02-28 16:11:49 +01:00

209 lines
6.9 KiB
Python

import contextlib
import errno
import hashlib
import os
import subprocess
import tempfile
from typing import Optional
from . import treesum
__all__ = [
"ObjectStore",
]
@contextlib.contextmanager
def suppress_oserror(*errnos):
"""A context manager that suppresses any OSError with an errno in `errnos`.
Like contextlib.suppress, but can differentiate between OSErrors.
"""
try:
yield
except OSError as e:
if e.errno not in errnos:
raise e
class Object:
def __init__(self, store: "ObjectStore"):
self._workdir = None
self._tree = None
self.store = store
self.reset()
def init(self, source: str) -> None:
"""Initialize the object with source content"""
subprocess.run(["cp", "--reflink=auto", "-a",
f"{source}/.", self.path],
check=True)
@property
def path(self) -> str:
return self._tree
@property
def treesum(self) -> str:
"""Calculate the treesum of the object"""
with self.open() as fd:
m = hashlib.sha256()
treesum.treesum(m, fd)
treesum_hash = m.hexdigest()
return treesum_hash
@contextlib.contextmanager
def open(self):
"""Open the directory and return the file descriptor"""
try:
fd = os.open(self.path, os.O_DIRECTORY)
yield fd
finally:
os.close(fd)
def store_tree(self, destination: str):
"""Store the tree at destination and reset itself
Moves the tree atomically by using rename(2). If the
target already exist, does nothing. Afterwards it
resets itself and can be used as if it was new.
"""
with suppress_oserror(errno.ENOTEMPTY, errno.EEXIST):
os.rename(self.path, destination)
self.reset()
def reset(self):
self.cleanup()
self._workdir = self.store.tempdir(suffix="object")
self._tree = os.path.join(self._workdir.name, "tree")
os.makedirs(self._tree, mode=0o755, exist_ok=True)
def cleanup(self):
if self._workdir:
self._workdir.cleanup()
self._workdir = None
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.cleanup()
return exc_type is None
class ObjectStore:
def __init__(self, store):
self.store = store
self.objects = f"{store}/objects"
self.refs = f"{store}/refs"
os.makedirs(self.store, exist_ok=True)
os.makedirs(self.objects, exist_ok=True)
os.makedirs(self.refs, exist_ok=True)
def contains(self, object_id):
if not object_id:
return False
return os.access(self.resolve_ref(object_id), os.F_OK)
def resolve_ref(self, object_id: Optional[str]) -> Optional[str]:
"""Returns the path to the given object_id"""
if not object_id:
return None
return f"{self.refs}/{object_id}"
def tempdir(self, prefix=None, suffix=None):
"""Return a tempfile.TemporaryDirectory within the store"""
return tempfile.TemporaryDirectory(dir=self.store,
prefix=prefix,
suffix=suffix)
@contextlib.contextmanager
def get(self, object_id):
with self.tempdir() as tmp:
if object_id:
path = self.resolve_ref(object_id)
subprocess.run(["mount", "-o", "bind,ro,mode=0755", path, tmp], check=True)
try:
yield tmp
finally:
subprocess.run(["umount", "--lazy", tmp], check=True)
else:
# None was given as object_id, just return an empty directory
yield tmp
@contextlib.contextmanager
def new(self, object_id, base_id=None):
"""Creates a new `Object` for `object_id`.
This method must be used as a context manager. It returns a new
temporary instance of `Object`. It will only be committed to the
store if the context completes without raising an exception.
"""
with Object(self) as obj:
# the object that is yielded will be added to the content store
# on success as object_id
if base_id:
# the base, the working tree and the output dir are all
# on the same fs, so attempt a lightweight copy if the
# fs supports it
obj.init(self.resolve_ref(base_id))
yield obj
# if the yield above raises an exception, the working tree
# is cleaned up by tempfile, otherwise, the it the content
# of it was created or modified by the caller. All that is
# left to do is to commit it to the object store
self.commit(obj, object_id)
def snapshot(self, object_path: str, object_id: str) -> str:
"""Commit `object_path` to store and ref it as `object_id`
Create a snapshot of `object_path` and store it via its
content hash in the object directory; additionally
create a new reference to it via `object_id` in the
reference directory.
Returns: The treesum of the snapshot
"""
# Make a new temporary directory and Object; initialize
# the latter with the contents of `object_path` and commit
# it to the store
with Object(self) as obj:
obj.init(object_path)
return self.commit(obj, object_id)
def commit(self, obj: Object, object_id: str) -> str:
"""Commits a Object to the object store
Move the contents of the obj (Object) to object directory
of the store with the content hash (obj.treesum) as its name.
Creates a symlink to that ('objects/{hash}') in the references
directory with the object_id as the name ('refs/{object_id}).
If the link already exists, it will be atomically replaced.
Returns: The treesum of the object
"""
treesum_hash = obj.treesum
# the object is stored in the objects directory using its content
# hash as its name, ideally a given object_id (i.e., given config)
# will always produce the same content hash, but that is not
# guaranteed. If an object with the same treesum already exist, us
# the existing one instead
obj.store_tree(f"{self.objects}/{treesum_hash}")
# symlink the object_id (config hash) in the refs directory to the
# treesum (content hash) in the objects directory. If a symlink by
# that name alreday exists, atomically replace it, but leave the
# backing object in place (it may be in use).
with self.tempdir() as tmp:
link = f"{tmp}/link"
os.symlink(f"../objects/{treesum_hash}", link)
os.replace(link, self.resolve_ref(object_id))
return treesum_hash