From 6a2a7d99f7cdf22eda7b2832e64557f7715d20e3 Mon Sep 17 00:00:00 2001 From: Christian Kellner Date: Tue, 18 Feb 2020 09:57:00 +0100 Subject: [PATCH] objectstore: unify commit and snapshot code paths As a result of the previous commits that implement copy on write semantics, `commit` can now be used to create snapshots. Whenever an Object is committed, its tree is moved to the store and it is being reset, i.e. a new clean workdir is created and the old one discarded. The moved tree is then set as the base of the reset Object. On the next call to `write` the moved tree will be copied over and forms the basis of the Object again. Should nobody want to write to Object after the snapshot, i.e. the `commit`, no copy will be made. NB: snapshots/commits will act now act as synchronization points: if a object with the same treesum, i.e. the very same content already exists, the move (i.e. `store_tree`) will gracefully fail and the existing content will be set as the base for Object. --- osbuild/objectstore.py | 27 +++++++++------------------ osbuild/pipeline.py | 2 +- test/test_objectstore.py | 2 +- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/osbuild/objectstore.py b/osbuild/objectstore.py index d760ce00..b70e4ce5 100644 --- a/osbuild/objectstore.py +++ b/osbuild/objectstore.py @@ -188,24 +188,6 @@ class ObjectStore: # left to do is to commit it to the object store self.commit(obj, object_id) - def snapshot(self, obj: str, object_id: str) -> str: - """Commit `obj` to store and ref it as `object_id` - - Create a snapshot of the object `obj` and store it via - its content hash in the object directory; additionally - create a new reference to it via `object_id` in the - reference directory. - - Returns: The treesum of the snapshot - """ - # Make a new temporary directory and Object; initialize - # the latter with the contents of `obj.path` and commit - # it to the store - with Object(self) as tmp: - tmp.base_path = obj.path - tmp.init() - return self.commit(tmp, object_id) - def commit(self, obj: Object, object_id: str) -> str: """Commits a Object to the object store @@ -235,4 +217,13 @@ class ObjectStore: os.symlink(f"../objects/{treesum_hash}", link) os.replace(link, self.resolve_ref(object_id)) + # the reference that is pointing to `treesum_hash` is now the base + # of `obj`. It is not actively initialized but any subsequent calls + # to `obj.write()` or `obj.path`will initialize it again + # NB: in the case that an object with the same treesum as `obj` + # already existed in the store obj.store_tree() will not actually + # have written anything to the store. In this case `obj` will then + # be initialized with the content of the already existing object. + obj.base_path = self.resolve_ref(object_id) + return treesum_hash diff --git a/osbuild/pipeline.py b/osbuild/pipeline.py index 8071fac6..067d1abf 100644 --- a/osbuild/pipeline.py +++ b/osbuild/pipeline.py @@ -281,7 +281,7 @@ class Pipeline: var=store, secrets=secrets) if stage.checkpoint: - object_store.snapshot(tree, stage.id) + object_store.commit(tree, stage.id) results["stages"].append(r.as_dict()) except BuildError as err: results["stages"].append(err.as_dict()) diff --git a/test/test_objectstore.py b/test/test_objectstore.py index c33e4fbc..0ab585b1 100644 --- a/test/test_objectstore.py +++ b/test/test_objectstore.py @@ -77,7 +77,7 @@ class TestObjectStore(unittest.TestCase): path = tree.write() p = Path(f"{path}/A") p.touch() - object_store.snapshot(tree, "a") + object_store.commit(tree, "a") path = tree.write() p = Path(f"{path}/B") p.touch()