From ae0680da117bc1589ec2724b60909eb123451b1b Mon Sep 17 00:00:00 2001 From: Christian Kellner Date: Tue, 6 Dec 2022 18:41:52 +0100 Subject: [PATCH] osbuid: integrate FsCache into ObjectStore Integrate the recently added file system cache `FsCache` into our object store `ObjectStore`. NB: This changes the semantics of it: previously a call to `ObjectStore.commit` resulted in the object being in the cache (i/o errors aside). But `FsCache.store`, which is now the backing store for objects, will only commit objects if there is enough space left. Thus we cannot rely that objects are present for reading after a call to `FsCache.store`. To cope with this we now always copy the object into the cache, even for cases where we previously moved it: for the case where commit is called with `object_id` matching `Object.id`, which is the case for when `commit` is called for last stage in the pipeline. We could keep this optimization but then we would have to special case it and not call `commit` for these cases but only after we exported all objects; or in other words, after we are sure we will never read from any committed object again. The extra complexity seems not worth it for the little gain of the optimization. Convert all the tests for the new semantic and also remove a lot of them that make no sense under this new paradigm. Add a new command line option `--cache-max-size` which will set the maximum size of the cache, if specified. --- docs/osbuild.1.rst | 3 + osbuild/main_cli.py | 6 + osbuild/objectstore.py | 227 +++++++++++++++++------------------ schutzbot/manifest_tests.sh | 3 + test/mod/test_objectstore.py | 160 +++++------------------- test/test.py | 6 + 6 files changed, 158 insertions(+), 247 deletions(-) diff --git a/docs/osbuild.1.rst b/docs/osbuild.1.rst index 019e3ac4..1cab5f85 100644 --- a/docs/osbuild.1.rst +++ b/docs/osbuild.1.rst @@ -43,6 +43,9 @@ is not listed here, **osbuild** will deny startup and exit with an error. are stored -l DIR, --libdir=DIR directory containing stages, assemblers, and the osbuild library +--cache-max-size=SIZE maximum size of the cache (bytes) or 'unlimited' + for no restriction (size may include an optional + unit suffix, like kB, kiB, MB, MiB and so on) --checkpoint=CHECKPOINT stage to commit to the object store during build (can be passed multiple times) --export=OBJECT object to export (can be passed multiple times) diff --git a/osbuild/main_cli.py b/osbuild/main_cli.py index fbf23916..d1ebfdb0 100644 --- a/osbuild/main_cli.py +++ b/osbuild/main_cli.py @@ -15,6 +15,7 @@ import osbuild import osbuild.meta import osbuild.monitor from osbuild.objectstore import ObjectStore +from osbuild.util.parsing import parse_size from osbuild.util.term import fmt as vt @@ -66,6 +67,8 @@ def parse_arguments(sys_argv): help="directory where intermediary os trees are stored") parser.add_argument("-l", "--libdir", metavar="DIRECTORY", type=os.path.abspath, default="/usr/lib/osbuild", help="directory containing stages, assemblers, and the osbuild library") + parser.add_argument("--cache-max-size", metavar="SIZE", type=parse_size, default=None, + help="maximum size of the cache (bytes) or 'unlimited' for no restriction") parser.add_argument("--checkpoint", metavar="ID", action="append", type=str, default=None, help="stage to commit to the object store during build (can be passed multiple times)") parser.add_argument("--export", metavar="ID", action="append", type=str, default=[], @@ -150,6 +153,9 @@ def osbuild_cli(): try: with ObjectStore(args.store) as object_store: + if args.cache_max_size is not None: + object_store.maximum_size = args.cache_max_size + stage_timeout = args.stage_timeout pipelines = manifest.depsolve(object_store, exports) diff --git a/osbuild/objectstore.py b/osbuild/objectstore.py index 1002d6ad..606e79e3 100644 --- a/osbuild/objectstore.py +++ b/osbuild/objectstore.py @@ -4,10 +4,10 @@ import json import os import subprocess import tempfile -import uuid -from typing import Any, Optional, Set +from typing import Any, Optional, Set, Union -from osbuild.util import jsoncomm, rmrf +from osbuild.util import jsoncomm +from osbuild.util.fscache import FsCache, FsCacheInfo from osbuild.util.mnt import mount, umount from osbuild.util.types import PathLike @@ -105,22 +105,34 @@ class Object: def __fspath__(self): return self.path - def __init__(self, store: "ObjectStore", uid: str, mode: Mode): + def __init__(self, cache: FsCache, uid: str, mode: Mode): + self._cache = cache self._mode = mode - self._workdir = None self._id = uid - self.store = store + self._path = None + self._meta: Optional[Object.Metadata] = None + self._stack: Optional[contextlib.ExitStack] = None + def _open_for_reading(self): + name = self._stack.enter_context( + self._cache.load(self.id) + ) + self._path = os.path.join(self._cache, name) + + def _open_for_writing(self): + name = self._stack.enter_context( + self._cache.stage() + ) + self._path = os.path.join(self._cache, name) + os.makedirs(os.path.join(self._path, "tree")) + + def __enter__(self): + assert not self.active + self._stack = contextlib.ExitStack() if self.mode == Object.Mode.READ: - path = self.store.resolve_ref(uid) - assert path is not None - self._path = os.path.join(path, "data") + self._open_for_reading() else: - workdir = self.tempdir("workdir") - self._workdir = workdir - self._path = os.path.join(workdir.name, "data") - tree = os.path.join(self._path, "tree") - os.makedirs(tree) + self._open_for_writing() # Expose our base path as `os.PathLike` via `PathAdater` # so any changes to it, e.g. via `store_tree`, will be @@ -128,6 +140,16 @@ class Object: wrapped = PathAdapter(self, "_path") self._meta = self.Metadata(wrapped, folder="meta") + return self + + def __exit__(self, exc_type, exc_value, exc_tb): + assert self.active + self.cleanup() + + @property + def active(self) -> bool: + return self._stack is not None + @property def id(self) -> Optional[str]: return self._id @@ -139,39 +161,22 @@ class Object: def init(self, base: "Object"): """Initialize the object with the base object""" self._check_mode(Object.Mode.WRITE) + assert self.active + assert self._path base.clone(self._path) @property def tree(self) -> str: + assert self.active + assert self._path return os.path.join(self._path, "tree") @property def meta(self) -> Metadata: + assert self.active + assert self._meta return self._meta - def store_tree(self): - """Store the tree with a fresh name and close it - - Moves the tree atomically by using rename(2), to a - randomly generated unique name. - - This puts the object into the READ state. - """ - self._check_mode(Object.Mode.WRITE) - - name = str(uuid.uuid4()) - - base = os.path.join(self.store.objects, name) - os.makedirs(base) - destination = os.path.join(base, "data") - os.rename(self._path, destination) - self._path = destination - - self.finalize() - self.cleanup() - - return name - def finalize(self): if self.mode != Object.Mode.WRITE: return @@ -180,27 +185,15 @@ class Object: self._mode = Object.Mode.READ def cleanup(self): - workdir = self._workdir - if workdir: - # manually remove the tree, it might contain - # files with immutable flag set, which will - # throw off standard Python 3 tempdir cleanup - rmrf.rmtree(os.path.join(workdir.name, "data")) - - workdir.cleanup() - self._workdir = None + if self._stack: + self._stack.close() + self._stack = None def _check_mode(self, want: Mode): """Internal: Raise a ValueError if we are not in the desired mode""" if self.mode != want: raise ValueError(f"Wrong object mode: {self.mode}, want {want}") - def tempdir(self, suffix=None): - if suffix: - suffix = "-" + suffix - name = f"object-{self._id[:7]}-" - return self.store.tempdir(prefix=name, suffix=suffix) - def export(self, to_directory: PathLike): """Copy object into an external directory""" subprocess.run( @@ -283,16 +276,14 @@ class HostTree: class ObjectStore(contextlib.AbstractContextManager): def __init__(self, store: PathLike): - self.store = store - self.objects = os.path.join(store, "objects") - self.refs = os.path.join(store, "refs") + self.cache = FsCache("osbuild", store) self.tmp = os.path.join(store, "tmp") os.makedirs(self.store, exist_ok=True) os.makedirs(self.objects, exist_ok=True) - os.makedirs(self.refs, exist_ok=True) os.makedirs(self.tmp, exist_ok=True) self._objs: Set[Object] = set() self._host_tree: Optional[HostTree] = None + self._stack = contextlib.ExitStack() def _get_floating(self, object_id: str) -> Optional[Object]: """Internal: get a non-committed object""" @@ -301,8 +292,33 @@ class ObjectStore(contextlib.AbstractContextManager): return obj return None + @property + def maximum_size(self) -> Optional[Union[int, str]]: + info = self.cache.info + return info.maximum_size + + @maximum_size.setter + def maximum_size(self, size: Union[int, str]): + info = FsCacheInfo(maximum_size=size) + self.cache.info = info + + @property + def active(self) -> bool: + #pylint: disable=protected-access + return self.cache._is_active() + + @property + def store(self): + return os.fspath(self.cache) + + @property + def objects(self): + return os.path.join(self.cache, "objects") + @property def host_tree(self) -> HostTree: + assert self.active + if not self._host_tree: self._host_tree = HostTree(self) return self._host_tree @@ -314,13 +330,11 @@ class ObjectStore(contextlib.AbstractContextManager): if self._get_floating(object_id): return True - return os.access(self.resolve_ref(object_id), os.F_OK) - - def resolve_ref(self, object_id: Optional[str]) -> Optional[str]: - """Returns the path to the given object_id""" - if not object_id: - return None - return os.path.join(self.refs, object_id) + try: + with self.cache.load(object_id): + return True + except FsCache.MissError: + return False def tempdir(self, prefix=None, suffix=None): """Return a tempfile.TemporaryDirectory within the store""" @@ -329,75 +343,51 @@ class ObjectStore(contextlib.AbstractContextManager): suffix=suffix) def get(self, object_id): + assert self.active + obj = self._get_floating(object_id) if obj: return obj - if not self.contains(object_id): + try: + obj = Object(self.cache, object_id, Object.Mode.READ) + self._stack.enter_context(obj) + return obj + except FsCache.MissError: return None - return Object(self, object_id, Object.Mode.READ) - def new(self, object_id: str): """Creates a new `Object` and open it for writing. - It returns a temporary instance of `Object`, the base - optionally set to `base_id`. It can be used to interact - with the store. - If changes to the object's content were made (by calling - `Object.write`), these must manually be committed to the - store via `commit()`. + It returns a instance of `Object` that can be used to + write tree and metadata. Use `commit` to attempt to + store the object in the cache. """ + assert self.active - obj = Object(self, object_id, Object.Mode.WRITE) + obj = Object(self.cache, object_id, Object.Mode.WRITE) + self._stack.enter_context(obj) self._objs.add(obj) return obj - def commit(self, obj: Object, object_id: str) -> str: - """Commits a Object to the object store + def commit(self, obj: Object, object_id: str): + """Commits the Object to the object cache as `object_id`. - Move the contents of the obj (Object) to object directory - of the store with a universally unique name. Creates a - symlink to that ('objects/{hash}') in the references - directory with the object_id as the name ('refs/{object_id}). - If the link already exists, it will be atomically replaced. - - If object_id is different from the id of the object, a copy - of the object will be stored. - - Returns: The name of the object + Attempts to store the contents of `obj` and its metadata + in the object cache. Whether anything is actually stored + depends on the configuration of the cache, i.e. its size + and how much free space is left or can be made available. + Therefore the caller should not assume that the stored + object can be retrived at all. """ - # The supplied object_id is not the object's final id, so - # we have to make a copy first - if obj.id != object_id: - tmp = self.new(object_id) - tmp.init(obj) - obj = tmp + assert self.active - # The object is stored in the objects directory using its unique - # name. This means that each commit will always result in a new - # object in the store, even if an identical one exists. - object_name = obj.store_tree() - - # symlink the object_id (config hash) in the refs directory to the - # object name in the objects directory. If a symlink by that name - # already exists, atomically replace it, but leave the backing object - # in place (it may be in use). - with self.tempdir() as tmp: - link = f"{tmp}/link" - os.symlink(f"../objects/{object_name}", link) - - ref = self.resolve_ref(object_id) - - if not ref: - raise RuntimeError("commit with unresolvable ref") - - os.replace(link, ref) - - return object_name + with self.cache.store(object_id) as name: + path = os.path.join(self.cache, name) + obj.clone(path) def cleanup(self): """Cleanup all created Objects that are still alive""" @@ -405,10 +395,19 @@ class ObjectStore(contextlib.AbstractContextManager): self._host_tree.cleanup() self._host_tree = None - for obj in self._objs: - obj.cleanup() + self._stack.close() + self._objs = set() + + def __fspath__(self): + return os.fspath(self.store) + + def __enter__(self): + assert not self.active + self._stack.enter_context(self.cache) + return self def __exit__(self, exc_type, exc_val, exc_tb): + assert self.active self.cleanup() diff --git a/schutzbot/manifest_tests.sh b/schutzbot/manifest_tests.sh index e7905f8c..f277cbf2 100755 --- a/schutzbot/manifest_tests.sh +++ b/schutzbot/manifest_tests.sh @@ -16,6 +16,9 @@ git checkout "$MANIFEST_DB_COMMIT" OSBUILD_LABEL=$(matchpathcon -n /usr/bin/osbuild) chcon $OSBUILD_LABEL tools/image-info +# set the maximum cache size to unlimited +echo "{}" | sudo osbuild --cache-max-size unlimited - + # run the tests from the manifest-db for this arch+distro echo "Running the osbuild-image-test for arch $ARCH and ditribution $DISTRO_CODE" sudo tools/osbuild-image-test --arch=$ARCH --distro=$DISTRO_CODE --image-info-path=tools/image-info diff --git a/test/mod/test_objectstore.py b/test/mod/test_objectstore.py index 1893a112..c68e7ce0 100644 --- a/test/mod/test_objectstore.py +++ b/test/mod/test_objectstore.py @@ -15,12 +15,10 @@ from .. import test def store_path(store: objectstore.ObjectStore, ref: str, path: str) -> bool: - if not store.contains(ref): + obj = store.get(ref) + if not obj: return False - obj = store.resolve_ref(ref) - if not obj or not os.path.exists(obj): - return False - return os.path.exists(os.path.join(obj, "data", "tree", path)) + return os.path.exists(os.path.join(obj, path)) @unittest.skipUnless(test.TestBase.can_bind_mount(), "root-only") @@ -35,8 +33,9 @@ class TestObjectStore(unittest.TestCase): def test_basic(self): # always use a temporary store so item counting works with objectstore.ObjectStore(self.store) as object_store: + object_store.maximum_size = 1024*1024*1024 + # No objects or references should be in the store - assert len(os.listdir(object_store.refs)) == 0 assert len(os.listdir(object_store.objects)) == 0 tree = object_store.new("a") @@ -47,157 +46,51 @@ class TestObjectStore(unittest.TestCase): p = Path(tree, "A") p.touch() - # consumes the object, puts it into read mode - object_store.commit(tree, "a") - + tree.finalize() # put the object into READ mode assert tree.mode == objectstore.Object.Mode.READ - assert object_store.contains("a") + # commit makes a copy, if space + object_store.commit(tree, "a") assert store_path(object_store, "a", "A") - assert len(os.listdir(object_store.refs)) == 1 - assert len(os.listdir(object_store.objects)) == 1 + # second object, based on the first one + obj2 = object_store.new("b") + obj2.init(tree) - tree = object_store.new("b") - p = Path(tree, "A") - p.touch() - p = Path(tree, "B") + p = Path(obj2, "B") p.touch() - # consumes the object, puts it into read mode + obj2.finalize() # put the object into READ mode + assert obj2.mode == objectstore.Object.Mode.READ + + # commit always makes a copy, if space object_store.commit(tree, "b") assert object_store.contains("b") + assert store_path(object_store, "b", "A") assert store_path(object_store, "b", "B") - assert len(os.listdir(object_store.refs)) == 2 assert len(os.listdir(object_store.objects)) == 2 - # assert len(os.listdir(f"{object_store.refs}/b/")) == 2 - - self.assertEqual(object_store.resolve_ref(None), None) - self.assertEqual(object_store.resolve_ref("a"), - f"{object_store.refs}/a") + # object should exist and should be in read mode tree = object_store.get("b") assert tree is not None assert tree.mode == objectstore.Object.Mode.READ def test_cleanup(self): # always use a temporary store so item counting works - with tempfile.TemporaryDirectory(dir="/var/tmp") as tmp: - with objectstore.ObjectStore(tmp) as object_store: - tree = object_store.new("a") - self.assertEqual(len(os.listdir(object_store.tmp)), 1) - p = Path(tree, "A") - p.touch() + with objectstore.ObjectStore(self.store) as object_store: + object_store.maximum_size = 1024*1024*1024 - # there should be no temporary Objects dirs anymore - self.assertEqual(len(os.listdir(object_store.tmp)), 0) - - def test_commit_clone(self): - # operate with a clean object store - with tempfile.TemporaryDirectory(dir="/var/tmp") as tmp: - # sample data to be used for read, write checks - data = "23" - - with objectstore.ObjectStore(tmp) as store: - assert len(os.listdir(store.refs)) == 0 - - tree = store.new("a") - with open(os.path.join(tree, "data"), "w", - encoding="utf-8") as f: - f.write(data) - st = os.fstat(f.fileno()) - data_inode = st.st_ino - - # commit the object as "x", making a copy - store.commit(tree, "x") - - # check that "data" got indeed copied - tree = store.get("x") - assert tree is not None - - with open(os.path.join(tree, "data"), "r", - encoding="utf-8") as f: - st = os.fstat(f.fileno()) - self.assertNotEqual(st.st_ino, data_inode) - data_read = f.read() - self.assertEqual(data, data_read) - - def test_commit_consume(self): - # operate with a clean object store - with tempfile.TemporaryDirectory(dir="/var/tmp") as tmp: - # sample data to be used for read, write checks - data = "23" - - with objectstore.ObjectStore(tmp) as store: - assert len(os.listdir(store.refs)) == 0 - - tree = store.new("a") - with open(os.path.join(tree, "data"), "w", encoding="utf8") as f: - f.write(data) - st = os.fstat(f.fileno()) - data_inode = st.st_ino - - # commit the object as "a" - store.commit(tree, "a") - assert len(os.listdir(store.refs)) == 1 - - # check that "data" is still the very - # same file after committing - with open(os.path.join(tree, "data"), "r", encoding="utf8") as f: - st = os.fstat(f.fileno()) - self.assertEqual(st.st_ino, data_inode) - data_read = f.read() - self.assertEqual(data, data_read) - - def test_object_base(self): - with objectstore.ObjectStore(self.store) as store: - assert len(os.listdir(store.refs)) == 0 - assert len(os.listdir(store.objects)) == 0 - - base = store.new("a") - p = Path(base, "A") - p.touch() - store.commit(base, "a") - - assert store.contains("a") - assert store_path(store, "a", "A") - - tree = store.new("b") - tree.init(base) - - p = Path(tree, "B") - p.touch() - - tree.finalize() - - assert os.path.exists(os.path.join(tree, "A")) - assert os.path.exists(os.path.join(tree, "B")) - - def test_snapshot(self): - with objectstore.ObjectStore(self.store) as store: - tree = store.new("b") + stage = os.path.join(object_store, "stage") + tree = object_store.new("a") + self.assertEqual(len(os.listdir(stage)), 1) p = Path(tree, "A") p.touch() - assert not store.contains("a") - store.commit(tree, "a") # store via "a", creates a clone - assert store.contains("a") - - p = Path(tree, "B") - p.touch() - store.commit(tree, "b") - - # check the references exist - assert os.path.exists(f"{store.refs}/a") - assert os.path.exists(f"{store.refs}/b") - - # check the contents of the trees - assert store_path(store, "a", "A") - assert not store_path(store, "a", "B") - assert store_path(store, "b", "A") - assert store_path(store, "b", "B") + # there should be no temporary Objects dirs anymore + with objectstore.ObjectStore(self.store) as object_store: + assert object_store.get("A") is None def test_metadata(self): @@ -256,6 +149,7 @@ class TestObjectStore(unittest.TestCase): assert md.get("a") == data with objectstore.ObjectStore(self.store) as store: + store.maximum_size = 1024*1024*1024 obj = store.new("a") p = Path(obj, "A") p.touch() diff --git a/test/test.py b/test/test.py index 92664573..98be29b1 100644 --- a/test/test.py +++ b/test/test.py @@ -12,6 +12,7 @@ import tempfile import unittest import osbuild.meta +from osbuild.objectstore import ObjectStore from osbuild.util import linux @@ -277,6 +278,8 @@ class OSBuild(contextlib.AbstractContextManager): _exitstack = None _cachedir = None + maximum_cache_size = 20 * 1024 * 1024 * 1024 # 20 GB + def __init__(self, *, cache_from=None): self._cache_from = cache_from @@ -297,6 +300,9 @@ class OSBuild(contextlib.AbstractContextManager): self._cachedir ], check=True) + with ObjectStore(self._cachedir) as store: + store.maximum_size = self.maximum_cache_size + # Keep our ExitStack for `__exit__()`. self._exitstack = self._exitstack.pop_all()