diff --git a/docs/osbuild.1.rst b/docs/osbuild.1.rst index 019e3ac4..1cab5f85 100644 --- a/docs/osbuild.1.rst +++ b/docs/osbuild.1.rst @@ -43,6 +43,9 @@ is not listed here, **osbuild** will deny startup and exit with an error. are stored -l DIR, --libdir=DIR directory containing stages, assemblers, and the osbuild library +--cache-max-size=SIZE maximum size of the cache (bytes) or 'unlimited' + for no restriction (size may include an optional + unit suffix, like kB, kiB, MB, MiB and so on) --checkpoint=CHECKPOINT stage to commit to the object store during build (can be passed multiple times) --export=OBJECT object to export (can be passed multiple times) diff --git a/osbuild/main_cli.py b/osbuild/main_cli.py index fbf23916..d1ebfdb0 100644 --- a/osbuild/main_cli.py +++ b/osbuild/main_cli.py @@ -15,6 +15,7 @@ import osbuild import osbuild.meta import osbuild.monitor from osbuild.objectstore import ObjectStore +from osbuild.util.parsing import parse_size from osbuild.util.term import fmt as vt @@ -66,6 +67,8 @@ def parse_arguments(sys_argv): help="directory where intermediary os trees are stored") parser.add_argument("-l", "--libdir", metavar="DIRECTORY", type=os.path.abspath, default="/usr/lib/osbuild", help="directory containing stages, assemblers, and the osbuild library") + parser.add_argument("--cache-max-size", metavar="SIZE", type=parse_size, default=None, + help="maximum size of the cache (bytes) or 'unlimited' for no restriction") parser.add_argument("--checkpoint", metavar="ID", action="append", type=str, default=None, help="stage to commit to the object store during build (can be passed multiple times)") parser.add_argument("--export", metavar="ID", action="append", type=str, default=[], @@ -150,6 +153,9 @@ def osbuild_cli(): try: with ObjectStore(args.store) as object_store: + if args.cache_max_size is not None: + object_store.maximum_size = args.cache_max_size + stage_timeout = args.stage_timeout pipelines = manifest.depsolve(object_store, exports) diff --git a/osbuild/objectstore.py b/osbuild/objectstore.py index 1002d6ad..606e79e3 100644 --- a/osbuild/objectstore.py +++ b/osbuild/objectstore.py @@ -4,10 +4,10 @@ import json import os import subprocess import tempfile -import uuid -from typing import Any, Optional, Set +from typing import Any, Optional, Set, Union -from osbuild.util import jsoncomm, rmrf +from osbuild.util import jsoncomm +from osbuild.util.fscache import FsCache, FsCacheInfo from osbuild.util.mnt import mount, umount from osbuild.util.types import PathLike @@ -105,22 +105,34 @@ class Object: def __fspath__(self): return self.path - def __init__(self, store: "ObjectStore", uid: str, mode: Mode): + def __init__(self, cache: FsCache, uid: str, mode: Mode): + self._cache = cache self._mode = mode - self._workdir = None self._id = uid - self.store = store + self._path = None + self._meta: Optional[Object.Metadata] = None + self._stack: Optional[contextlib.ExitStack] = None + def _open_for_reading(self): + name = self._stack.enter_context( + self._cache.load(self.id) + ) + self._path = os.path.join(self._cache, name) + + def _open_for_writing(self): + name = self._stack.enter_context( + self._cache.stage() + ) + self._path = os.path.join(self._cache, name) + os.makedirs(os.path.join(self._path, "tree")) + + def __enter__(self): + assert not self.active + self._stack = contextlib.ExitStack() if self.mode == Object.Mode.READ: - path = self.store.resolve_ref(uid) - assert path is not None - self._path = os.path.join(path, "data") + self._open_for_reading() else: - workdir = self.tempdir("workdir") - self._workdir = workdir - self._path = os.path.join(workdir.name, "data") - tree = os.path.join(self._path, "tree") - os.makedirs(tree) + self._open_for_writing() # Expose our base path as `os.PathLike` via `PathAdater` # so any changes to it, e.g. via `store_tree`, will be @@ -128,6 +140,16 @@ class Object: wrapped = PathAdapter(self, "_path") self._meta = self.Metadata(wrapped, folder="meta") + return self + + def __exit__(self, exc_type, exc_value, exc_tb): + assert self.active + self.cleanup() + + @property + def active(self) -> bool: + return self._stack is not None + @property def id(self) -> Optional[str]: return self._id @@ -139,39 +161,22 @@ class Object: def init(self, base: "Object"): """Initialize the object with the base object""" self._check_mode(Object.Mode.WRITE) + assert self.active + assert self._path base.clone(self._path) @property def tree(self) -> str: + assert self.active + assert self._path return os.path.join(self._path, "tree") @property def meta(self) -> Metadata: + assert self.active + assert self._meta return self._meta - def store_tree(self): - """Store the tree with a fresh name and close it - - Moves the tree atomically by using rename(2), to a - randomly generated unique name. - - This puts the object into the READ state. - """ - self._check_mode(Object.Mode.WRITE) - - name = str(uuid.uuid4()) - - base = os.path.join(self.store.objects, name) - os.makedirs(base) - destination = os.path.join(base, "data") - os.rename(self._path, destination) - self._path = destination - - self.finalize() - self.cleanup() - - return name - def finalize(self): if self.mode != Object.Mode.WRITE: return @@ -180,27 +185,15 @@ class Object: self._mode = Object.Mode.READ def cleanup(self): - workdir = self._workdir - if workdir: - # manually remove the tree, it might contain - # files with immutable flag set, which will - # throw off standard Python 3 tempdir cleanup - rmrf.rmtree(os.path.join(workdir.name, "data")) - - workdir.cleanup() - self._workdir = None + if self._stack: + self._stack.close() + self._stack = None def _check_mode(self, want: Mode): """Internal: Raise a ValueError if we are not in the desired mode""" if self.mode != want: raise ValueError(f"Wrong object mode: {self.mode}, want {want}") - def tempdir(self, suffix=None): - if suffix: - suffix = "-" + suffix - name = f"object-{self._id[:7]}-" - return self.store.tempdir(prefix=name, suffix=suffix) - def export(self, to_directory: PathLike): """Copy object into an external directory""" subprocess.run( @@ -283,16 +276,14 @@ class HostTree: class ObjectStore(contextlib.AbstractContextManager): def __init__(self, store: PathLike): - self.store = store - self.objects = os.path.join(store, "objects") - self.refs = os.path.join(store, "refs") + self.cache = FsCache("osbuild", store) self.tmp = os.path.join(store, "tmp") os.makedirs(self.store, exist_ok=True) os.makedirs(self.objects, exist_ok=True) - os.makedirs(self.refs, exist_ok=True) os.makedirs(self.tmp, exist_ok=True) self._objs: Set[Object] = set() self._host_tree: Optional[HostTree] = None + self._stack = contextlib.ExitStack() def _get_floating(self, object_id: str) -> Optional[Object]: """Internal: get a non-committed object""" @@ -301,8 +292,33 @@ class ObjectStore(contextlib.AbstractContextManager): return obj return None + @property + def maximum_size(self) -> Optional[Union[int, str]]: + info = self.cache.info + return info.maximum_size + + @maximum_size.setter + def maximum_size(self, size: Union[int, str]): + info = FsCacheInfo(maximum_size=size) + self.cache.info = info + + @property + def active(self) -> bool: + #pylint: disable=protected-access + return self.cache._is_active() + + @property + def store(self): + return os.fspath(self.cache) + + @property + def objects(self): + return os.path.join(self.cache, "objects") + @property def host_tree(self) -> HostTree: + assert self.active + if not self._host_tree: self._host_tree = HostTree(self) return self._host_tree @@ -314,13 +330,11 @@ class ObjectStore(contextlib.AbstractContextManager): if self._get_floating(object_id): return True - return os.access(self.resolve_ref(object_id), os.F_OK) - - def resolve_ref(self, object_id: Optional[str]) -> Optional[str]: - """Returns the path to the given object_id""" - if not object_id: - return None - return os.path.join(self.refs, object_id) + try: + with self.cache.load(object_id): + return True + except FsCache.MissError: + return False def tempdir(self, prefix=None, suffix=None): """Return a tempfile.TemporaryDirectory within the store""" @@ -329,75 +343,51 @@ class ObjectStore(contextlib.AbstractContextManager): suffix=suffix) def get(self, object_id): + assert self.active + obj = self._get_floating(object_id) if obj: return obj - if not self.contains(object_id): + try: + obj = Object(self.cache, object_id, Object.Mode.READ) + self._stack.enter_context(obj) + return obj + except FsCache.MissError: return None - return Object(self, object_id, Object.Mode.READ) - def new(self, object_id: str): """Creates a new `Object` and open it for writing. - It returns a temporary instance of `Object`, the base - optionally set to `base_id`. It can be used to interact - with the store. - If changes to the object's content were made (by calling - `Object.write`), these must manually be committed to the - store via `commit()`. + It returns a instance of `Object` that can be used to + write tree and metadata. Use `commit` to attempt to + store the object in the cache. """ + assert self.active - obj = Object(self, object_id, Object.Mode.WRITE) + obj = Object(self.cache, object_id, Object.Mode.WRITE) + self._stack.enter_context(obj) self._objs.add(obj) return obj - def commit(self, obj: Object, object_id: str) -> str: - """Commits a Object to the object store + def commit(self, obj: Object, object_id: str): + """Commits the Object to the object cache as `object_id`. - Move the contents of the obj (Object) to object directory - of the store with a universally unique name. Creates a - symlink to that ('objects/{hash}') in the references - directory with the object_id as the name ('refs/{object_id}). - If the link already exists, it will be atomically replaced. - - If object_id is different from the id of the object, a copy - of the object will be stored. - - Returns: The name of the object + Attempts to store the contents of `obj` and its metadata + in the object cache. Whether anything is actually stored + depends on the configuration of the cache, i.e. its size + and how much free space is left or can be made available. + Therefore the caller should not assume that the stored + object can be retrived at all. """ - # The supplied object_id is not the object's final id, so - # we have to make a copy first - if obj.id != object_id: - tmp = self.new(object_id) - tmp.init(obj) - obj = tmp + assert self.active - # The object is stored in the objects directory using its unique - # name. This means that each commit will always result in a new - # object in the store, even if an identical one exists. - object_name = obj.store_tree() - - # symlink the object_id (config hash) in the refs directory to the - # object name in the objects directory. If a symlink by that name - # already exists, atomically replace it, but leave the backing object - # in place (it may be in use). - with self.tempdir() as tmp: - link = f"{tmp}/link" - os.symlink(f"../objects/{object_name}", link) - - ref = self.resolve_ref(object_id) - - if not ref: - raise RuntimeError("commit with unresolvable ref") - - os.replace(link, ref) - - return object_name + with self.cache.store(object_id) as name: + path = os.path.join(self.cache, name) + obj.clone(path) def cleanup(self): """Cleanup all created Objects that are still alive""" @@ -405,10 +395,19 @@ class ObjectStore(contextlib.AbstractContextManager): self._host_tree.cleanup() self._host_tree = None - for obj in self._objs: - obj.cleanup() + self._stack.close() + self._objs = set() + + def __fspath__(self): + return os.fspath(self.store) + + def __enter__(self): + assert not self.active + self._stack.enter_context(self.cache) + return self def __exit__(self, exc_type, exc_val, exc_tb): + assert self.active self.cleanup() diff --git a/schutzbot/manifest_tests.sh b/schutzbot/manifest_tests.sh index e7905f8c..f277cbf2 100755 --- a/schutzbot/manifest_tests.sh +++ b/schutzbot/manifest_tests.sh @@ -16,6 +16,9 @@ git checkout "$MANIFEST_DB_COMMIT" OSBUILD_LABEL=$(matchpathcon -n /usr/bin/osbuild) chcon $OSBUILD_LABEL tools/image-info +# set the maximum cache size to unlimited +echo "{}" | sudo osbuild --cache-max-size unlimited - + # run the tests from the manifest-db for this arch+distro echo "Running the osbuild-image-test for arch $ARCH and ditribution $DISTRO_CODE" sudo tools/osbuild-image-test --arch=$ARCH --distro=$DISTRO_CODE --image-info-path=tools/image-info diff --git a/test/mod/test_objectstore.py b/test/mod/test_objectstore.py index 1893a112..c68e7ce0 100644 --- a/test/mod/test_objectstore.py +++ b/test/mod/test_objectstore.py @@ -15,12 +15,10 @@ from .. import test def store_path(store: objectstore.ObjectStore, ref: str, path: str) -> bool: - if not store.contains(ref): + obj = store.get(ref) + if not obj: return False - obj = store.resolve_ref(ref) - if not obj or not os.path.exists(obj): - return False - return os.path.exists(os.path.join(obj, "data", "tree", path)) + return os.path.exists(os.path.join(obj, path)) @unittest.skipUnless(test.TestBase.can_bind_mount(), "root-only") @@ -35,8 +33,9 @@ class TestObjectStore(unittest.TestCase): def test_basic(self): # always use a temporary store so item counting works with objectstore.ObjectStore(self.store) as object_store: + object_store.maximum_size = 1024*1024*1024 + # No objects or references should be in the store - assert len(os.listdir(object_store.refs)) == 0 assert len(os.listdir(object_store.objects)) == 0 tree = object_store.new("a") @@ -47,157 +46,51 @@ class TestObjectStore(unittest.TestCase): p = Path(tree, "A") p.touch() - # consumes the object, puts it into read mode - object_store.commit(tree, "a") - + tree.finalize() # put the object into READ mode assert tree.mode == objectstore.Object.Mode.READ - assert object_store.contains("a") + # commit makes a copy, if space + object_store.commit(tree, "a") assert store_path(object_store, "a", "A") - assert len(os.listdir(object_store.refs)) == 1 - assert len(os.listdir(object_store.objects)) == 1 + # second object, based on the first one + obj2 = object_store.new("b") + obj2.init(tree) - tree = object_store.new("b") - p = Path(tree, "A") - p.touch() - p = Path(tree, "B") + p = Path(obj2, "B") p.touch() - # consumes the object, puts it into read mode + obj2.finalize() # put the object into READ mode + assert obj2.mode == objectstore.Object.Mode.READ + + # commit always makes a copy, if space object_store.commit(tree, "b") assert object_store.contains("b") + assert store_path(object_store, "b", "A") assert store_path(object_store, "b", "B") - assert len(os.listdir(object_store.refs)) == 2 assert len(os.listdir(object_store.objects)) == 2 - # assert len(os.listdir(f"{object_store.refs}/b/")) == 2 - - self.assertEqual(object_store.resolve_ref(None), None) - self.assertEqual(object_store.resolve_ref("a"), - f"{object_store.refs}/a") + # object should exist and should be in read mode tree = object_store.get("b") assert tree is not None assert tree.mode == objectstore.Object.Mode.READ def test_cleanup(self): # always use a temporary store so item counting works - with tempfile.TemporaryDirectory(dir="/var/tmp") as tmp: - with objectstore.ObjectStore(tmp) as object_store: - tree = object_store.new("a") - self.assertEqual(len(os.listdir(object_store.tmp)), 1) - p = Path(tree, "A") - p.touch() + with objectstore.ObjectStore(self.store) as object_store: + object_store.maximum_size = 1024*1024*1024 - # there should be no temporary Objects dirs anymore - self.assertEqual(len(os.listdir(object_store.tmp)), 0) - - def test_commit_clone(self): - # operate with a clean object store - with tempfile.TemporaryDirectory(dir="/var/tmp") as tmp: - # sample data to be used for read, write checks - data = "23" - - with objectstore.ObjectStore(tmp) as store: - assert len(os.listdir(store.refs)) == 0 - - tree = store.new("a") - with open(os.path.join(tree, "data"), "w", - encoding="utf-8") as f: - f.write(data) - st = os.fstat(f.fileno()) - data_inode = st.st_ino - - # commit the object as "x", making a copy - store.commit(tree, "x") - - # check that "data" got indeed copied - tree = store.get("x") - assert tree is not None - - with open(os.path.join(tree, "data"), "r", - encoding="utf-8") as f: - st = os.fstat(f.fileno()) - self.assertNotEqual(st.st_ino, data_inode) - data_read = f.read() - self.assertEqual(data, data_read) - - def test_commit_consume(self): - # operate with a clean object store - with tempfile.TemporaryDirectory(dir="/var/tmp") as tmp: - # sample data to be used for read, write checks - data = "23" - - with objectstore.ObjectStore(tmp) as store: - assert len(os.listdir(store.refs)) == 0 - - tree = store.new("a") - with open(os.path.join(tree, "data"), "w", encoding="utf8") as f: - f.write(data) - st = os.fstat(f.fileno()) - data_inode = st.st_ino - - # commit the object as "a" - store.commit(tree, "a") - assert len(os.listdir(store.refs)) == 1 - - # check that "data" is still the very - # same file after committing - with open(os.path.join(tree, "data"), "r", encoding="utf8") as f: - st = os.fstat(f.fileno()) - self.assertEqual(st.st_ino, data_inode) - data_read = f.read() - self.assertEqual(data, data_read) - - def test_object_base(self): - with objectstore.ObjectStore(self.store) as store: - assert len(os.listdir(store.refs)) == 0 - assert len(os.listdir(store.objects)) == 0 - - base = store.new("a") - p = Path(base, "A") - p.touch() - store.commit(base, "a") - - assert store.contains("a") - assert store_path(store, "a", "A") - - tree = store.new("b") - tree.init(base) - - p = Path(tree, "B") - p.touch() - - tree.finalize() - - assert os.path.exists(os.path.join(tree, "A")) - assert os.path.exists(os.path.join(tree, "B")) - - def test_snapshot(self): - with objectstore.ObjectStore(self.store) as store: - tree = store.new("b") + stage = os.path.join(object_store, "stage") + tree = object_store.new("a") + self.assertEqual(len(os.listdir(stage)), 1) p = Path(tree, "A") p.touch() - assert not store.contains("a") - store.commit(tree, "a") # store via "a", creates a clone - assert store.contains("a") - - p = Path(tree, "B") - p.touch() - store.commit(tree, "b") - - # check the references exist - assert os.path.exists(f"{store.refs}/a") - assert os.path.exists(f"{store.refs}/b") - - # check the contents of the trees - assert store_path(store, "a", "A") - assert not store_path(store, "a", "B") - assert store_path(store, "b", "A") - assert store_path(store, "b", "B") + # there should be no temporary Objects dirs anymore + with objectstore.ObjectStore(self.store) as object_store: + assert object_store.get("A") is None def test_metadata(self): @@ -256,6 +149,7 @@ class TestObjectStore(unittest.TestCase): assert md.get("a") == data with objectstore.ObjectStore(self.store) as store: + store.maximum_size = 1024*1024*1024 obj = store.new("a") p = Path(obj, "A") p.touch() diff --git a/test/test.py b/test/test.py index 92664573..98be29b1 100644 --- a/test/test.py +++ b/test/test.py @@ -12,6 +12,7 @@ import tempfile import unittest import osbuild.meta +from osbuild.objectstore import ObjectStore from osbuild.util import linux @@ -277,6 +278,8 @@ class OSBuild(contextlib.AbstractContextManager): _exitstack = None _cachedir = None + maximum_cache_size = 20 * 1024 * 1024 * 1024 # 20 GB + def __init__(self, *, cache_from=None): self._cache_from = cache_from @@ -297,6 +300,9 @@ class OSBuild(contextlib.AbstractContextManager): self._cachedir ], check=True) + with ObjectStore(self._cachedir) as store: + store.maximum_size = self.maximum_cache_size + # Keep our ExitStack for `__exit__()`. self._exitstack = self._exitstack.pop_all()