osbuid: integrate FsCache into ObjectStore
Integrate the recently added file system cache `FsCache` into our object store `ObjectStore`. NB: This changes the semantics of it: previously a call to `ObjectStore.commit` resulted in the object being in the cache (i/o errors aside). But `FsCache.store`, which is now the backing store for objects, will only commit objects if there is enough space left. Thus we cannot rely that objects are present for reading after a call to `FsCache.store`. To cope with this we now always copy the object into the cache, even for cases where we previously moved it: for the case where commit is called with `object_id` matching `Object.id`, which is the case for when `commit` is called for last stage in the pipeline. We could keep this optimization but then we would have to special case it and not call `commit` for these cases but only after we exported all objects; or in other words, after we are sure we will never read from any committed object again. The extra complexity seems not worth it for the little gain of the optimization. Convert all the tests for the new semantic and also remove a lot of them that make no sense under this new paradigm. Add a new command line option `--cache-max-size` which will set the maximum size of the cache, if specified.
This commit is contained in:
parent
1e0e1fa2c2
commit
ae0680da11
6 changed files with 158 additions and 247 deletions
|
|
@ -4,10 +4,10 @@ import json
|
|||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import uuid
|
||||
from typing import Any, Optional, Set
|
||||
from typing import Any, Optional, Set, Union
|
||||
|
||||
from osbuild.util import jsoncomm, rmrf
|
||||
from osbuild.util import jsoncomm
|
||||
from osbuild.util.fscache import FsCache, FsCacheInfo
|
||||
from osbuild.util.mnt import mount, umount
|
||||
from osbuild.util.types import PathLike
|
||||
|
||||
|
|
@ -105,22 +105,34 @@ class Object:
|
|||
def __fspath__(self):
|
||||
return self.path
|
||||
|
||||
def __init__(self, store: "ObjectStore", uid: str, mode: Mode):
|
||||
def __init__(self, cache: FsCache, uid: str, mode: Mode):
|
||||
self._cache = cache
|
||||
self._mode = mode
|
||||
self._workdir = None
|
||||
self._id = uid
|
||||
self.store = store
|
||||
self._path = None
|
||||
self._meta: Optional[Object.Metadata] = None
|
||||
self._stack: Optional[contextlib.ExitStack] = None
|
||||
|
||||
def _open_for_reading(self):
|
||||
name = self._stack.enter_context(
|
||||
self._cache.load(self.id)
|
||||
)
|
||||
self._path = os.path.join(self._cache, name)
|
||||
|
||||
def _open_for_writing(self):
|
||||
name = self._stack.enter_context(
|
||||
self._cache.stage()
|
||||
)
|
||||
self._path = os.path.join(self._cache, name)
|
||||
os.makedirs(os.path.join(self._path, "tree"))
|
||||
|
||||
def __enter__(self):
|
||||
assert not self.active
|
||||
self._stack = contextlib.ExitStack()
|
||||
if self.mode == Object.Mode.READ:
|
||||
path = self.store.resolve_ref(uid)
|
||||
assert path is not None
|
||||
self._path = os.path.join(path, "data")
|
||||
self._open_for_reading()
|
||||
else:
|
||||
workdir = self.tempdir("workdir")
|
||||
self._workdir = workdir
|
||||
self._path = os.path.join(workdir.name, "data")
|
||||
tree = os.path.join(self._path, "tree")
|
||||
os.makedirs(tree)
|
||||
self._open_for_writing()
|
||||
|
||||
# Expose our base path as `os.PathLike` via `PathAdater`
|
||||
# so any changes to it, e.g. via `store_tree`, will be
|
||||
|
|
@ -128,6 +140,16 @@ class Object:
|
|||
wrapped = PathAdapter(self, "_path")
|
||||
self._meta = self.Metadata(wrapped, folder="meta")
|
||||
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, exc_tb):
|
||||
assert self.active
|
||||
self.cleanup()
|
||||
|
||||
@property
|
||||
def active(self) -> bool:
|
||||
return self._stack is not None
|
||||
|
||||
@property
|
||||
def id(self) -> Optional[str]:
|
||||
return self._id
|
||||
|
|
@ -139,39 +161,22 @@ class Object:
|
|||
def init(self, base: "Object"):
|
||||
"""Initialize the object with the base object"""
|
||||
self._check_mode(Object.Mode.WRITE)
|
||||
assert self.active
|
||||
assert self._path
|
||||
base.clone(self._path)
|
||||
|
||||
@property
|
||||
def tree(self) -> str:
|
||||
assert self.active
|
||||
assert self._path
|
||||
return os.path.join(self._path, "tree")
|
||||
|
||||
@property
|
||||
def meta(self) -> Metadata:
|
||||
assert self.active
|
||||
assert self._meta
|
||||
return self._meta
|
||||
|
||||
def store_tree(self):
|
||||
"""Store the tree with a fresh name and close it
|
||||
|
||||
Moves the tree atomically by using rename(2), to a
|
||||
randomly generated unique name.
|
||||
|
||||
This puts the object into the READ state.
|
||||
"""
|
||||
self._check_mode(Object.Mode.WRITE)
|
||||
|
||||
name = str(uuid.uuid4())
|
||||
|
||||
base = os.path.join(self.store.objects, name)
|
||||
os.makedirs(base)
|
||||
destination = os.path.join(base, "data")
|
||||
os.rename(self._path, destination)
|
||||
self._path = destination
|
||||
|
||||
self.finalize()
|
||||
self.cleanup()
|
||||
|
||||
return name
|
||||
|
||||
def finalize(self):
|
||||
if self.mode != Object.Mode.WRITE:
|
||||
return
|
||||
|
|
@ -180,27 +185,15 @@ class Object:
|
|||
self._mode = Object.Mode.READ
|
||||
|
||||
def cleanup(self):
|
||||
workdir = self._workdir
|
||||
if workdir:
|
||||
# manually remove the tree, it might contain
|
||||
# files with immutable flag set, which will
|
||||
# throw off standard Python 3 tempdir cleanup
|
||||
rmrf.rmtree(os.path.join(workdir.name, "data"))
|
||||
|
||||
workdir.cleanup()
|
||||
self._workdir = None
|
||||
if self._stack:
|
||||
self._stack.close()
|
||||
self._stack = None
|
||||
|
||||
def _check_mode(self, want: Mode):
|
||||
"""Internal: Raise a ValueError if we are not in the desired mode"""
|
||||
if self.mode != want:
|
||||
raise ValueError(f"Wrong object mode: {self.mode}, want {want}")
|
||||
|
||||
def tempdir(self, suffix=None):
|
||||
if suffix:
|
||||
suffix = "-" + suffix
|
||||
name = f"object-{self._id[:7]}-"
|
||||
return self.store.tempdir(prefix=name, suffix=suffix)
|
||||
|
||||
def export(self, to_directory: PathLike):
|
||||
"""Copy object into an external directory"""
|
||||
subprocess.run(
|
||||
|
|
@ -283,16 +276,14 @@ class HostTree:
|
|||
|
||||
class ObjectStore(contextlib.AbstractContextManager):
|
||||
def __init__(self, store: PathLike):
|
||||
self.store = store
|
||||
self.objects = os.path.join(store, "objects")
|
||||
self.refs = os.path.join(store, "refs")
|
||||
self.cache = FsCache("osbuild", store)
|
||||
self.tmp = os.path.join(store, "tmp")
|
||||
os.makedirs(self.store, exist_ok=True)
|
||||
os.makedirs(self.objects, exist_ok=True)
|
||||
os.makedirs(self.refs, exist_ok=True)
|
||||
os.makedirs(self.tmp, exist_ok=True)
|
||||
self._objs: Set[Object] = set()
|
||||
self._host_tree: Optional[HostTree] = None
|
||||
self._stack = contextlib.ExitStack()
|
||||
|
||||
def _get_floating(self, object_id: str) -> Optional[Object]:
|
||||
"""Internal: get a non-committed object"""
|
||||
|
|
@ -301,8 +292,33 @@ class ObjectStore(contextlib.AbstractContextManager):
|
|||
return obj
|
||||
return None
|
||||
|
||||
@property
|
||||
def maximum_size(self) -> Optional[Union[int, str]]:
|
||||
info = self.cache.info
|
||||
return info.maximum_size
|
||||
|
||||
@maximum_size.setter
|
||||
def maximum_size(self, size: Union[int, str]):
|
||||
info = FsCacheInfo(maximum_size=size)
|
||||
self.cache.info = info
|
||||
|
||||
@property
|
||||
def active(self) -> bool:
|
||||
#pylint: disable=protected-access
|
||||
return self.cache._is_active()
|
||||
|
||||
@property
|
||||
def store(self):
|
||||
return os.fspath(self.cache)
|
||||
|
||||
@property
|
||||
def objects(self):
|
||||
return os.path.join(self.cache, "objects")
|
||||
|
||||
@property
|
||||
def host_tree(self) -> HostTree:
|
||||
assert self.active
|
||||
|
||||
if not self._host_tree:
|
||||
self._host_tree = HostTree(self)
|
||||
return self._host_tree
|
||||
|
|
@ -314,13 +330,11 @@ class ObjectStore(contextlib.AbstractContextManager):
|
|||
if self._get_floating(object_id):
|
||||
return True
|
||||
|
||||
return os.access(self.resolve_ref(object_id), os.F_OK)
|
||||
|
||||
def resolve_ref(self, object_id: Optional[str]) -> Optional[str]:
|
||||
"""Returns the path to the given object_id"""
|
||||
if not object_id:
|
||||
return None
|
||||
return os.path.join(self.refs, object_id)
|
||||
try:
|
||||
with self.cache.load(object_id):
|
||||
return True
|
||||
except FsCache.MissError:
|
||||
return False
|
||||
|
||||
def tempdir(self, prefix=None, suffix=None):
|
||||
"""Return a tempfile.TemporaryDirectory within the store"""
|
||||
|
|
@ -329,75 +343,51 @@ class ObjectStore(contextlib.AbstractContextManager):
|
|||
suffix=suffix)
|
||||
|
||||
def get(self, object_id):
|
||||
assert self.active
|
||||
|
||||
obj = self._get_floating(object_id)
|
||||
if obj:
|
||||
return obj
|
||||
|
||||
if not self.contains(object_id):
|
||||
try:
|
||||
obj = Object(self.cache, object_id, Object.Mode.READ)
|
||||
self._stack.enter_context(obj)
|
||||
return obj
|
||||
except FsCache.MissError:
|
||||
return None
|
||||
|
||||
return Object(self, object_id, Object.Mode.READ)
|
||||
|
||||
def new(self, object_id: str):
|
||||
"""Creates a new `Object` and open it for writing.
|
||||
|
||||
It returns a temporary instance of `Object`, the base
|
||||
optionally set to `base_id`. It can be used to interact
|
||||
with the store.
|
||||
If changes to the object's content were made (by calling
|
||||
`Object.write`), these must manually be committed to the
|
||||
store via `commit()`.
|
||||
It returns a instance of `Object` that can be used to
|
||||
write tree and metadata. Use `commit` to attempt to
|
||||
store the object in the cache.
|
||||
"""
|
||||
assert self.active
|
||||
|
||||
obj = Object(self, object_id, Object.Mode.WRITE)
|
||||
obj = Object(self.cache, object_id, Object.Mode.WRITE)
|
||||
self._stack.enter_context(obj)
|
||||
|
||||
self._objs.add(obj)
|
||||
|
||||
return obj
|
||||
|
||||
def commit(self, obj: Object, object_id: str) -> str:
|
||||
"""Commits a Object to the object store
|
||||
def commit(self, obj: Object, object_id: str):
|
||||
"""Commits the Object to the object cache as `object_id`.
|
||||
|
||||
Move the contents of the obj (Object) to object directory
|
||||
of the store with a universally unique name. Creates a
|
||||
symlink to that ('objects/{hash}') in the references
|
||||
directory with the object_id as the name ('refs/{object_id}).
|
||||
If the link already exists, it will be atomically replaced.
|
||||
|
||||
If object_id is different from the id of the object, a copy
|
||||
of the object will be stored.
|
||||
|
||||
Returns: The name of the object
|
||||
Attempts to store the contents of `obj` and its metadata
|
||||
in the object cache. Whether anything is actually stored
|
||||
depends on the configuration of the cache, i.e. its size
|
||||
and how much free space is left or can be made available.
|
||||
Therefore the caller should not assume that the stored
|
||||
object can be retrived at all.
|
||||
"""
|
||||
|
||||
# The supplied object_id is not the object's final id, so
|
||||
# we have to make a copy first
|
||||
if obj.id != object_id:
|
||||
tmp = self.new(object_id)
|
||||
tmp.init(obj)
|
||||
obj = tmp
|
||||
assert self.active
|
||||
|
||||
# The object is stored in the objects directory using its unique
|
||||
# name. This means that each commit will always result in a new
|
||||
# object in the store, even if an identical one exists.
|
||||
object_name = obj.store_tree()
|
||||
|
||||
# symlink the object_id (config hash) in the refs directory to the
|
||||
# object name in the objects directory. If a symlink by that name
|
||||
# already exists, atomically replace it, but leave the backing object
|
||||
# in place (it may be in use).
|
||||
with self.tempdir() as tmp:
|
||||
link = f"{tmp}/link"
|
||||
os.symlink(f"../objects/{object_name}", link)
|
||||
|
||||
ref = self.resolve_ref(object_id)
|
||||
|
||||
if not ref:
|
||||
raise RuntimeError("commit with unresolvable ref")
|
||||
|
||||
os.replace(link, ref)
|
||||
|
||||
return object_name
|
||||
with self.cache.store(object_id) as name:
|
||||
path = os.path.join(self.cache, name)
|
||||
obj.clone(path)
|
||||
|
||||
def cleanup(self):
|
||||
"""Cleanup all created Objects that are still alive"""
|
||||
|
|
@ -405,10 +395,19 @@ class ObjectStore(contextlib.AbstractContextManager):
|
|||
self._host_tree.cleanup()
|
||||
self._host_tree = None
|
||||
|
||||
for obj in self._objs:
|
||||
obj.cleanup()
|
||||
self._stack.close()
|
||||
self._objs = set()
|
||||
|
||||
def __fspath__(self):
|
||||
return os.fspath(self.store)
|
||||
|
||||
def __enter__(self):
|
||||
assert not self.active
|
||||
self._stack.enter_context(self.cache)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
assert self.active
|
||||
self.cleanup()
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue