objectstore: unify commit and snapshot code paths

As a result of the previous commits that implement copy on write
semantics, `commit` can now be used to create snapshots. Whenever
an Object is committed, its tree is moved to the store and it is
being reset, i.e. a new clean workdir is created and the old one
discarded. The moved tree is then set as the base of the reset
Object. On the next call to `write` the moved tree will be copied
over and forms the basis of the Object again. Should nobody want
to write to Object after the snapshot, i.e. the `commit`, no copy
will be made.
NB: snapshots/commits will act now act as synchronization points:
if a object with the same treesum, i.e. the very same content
already exists, the move (i.e. `store_tree`) will gracefully fail
and the existing content will be set as the base for Object.
This commit is contained in:
Christian Kellner 2020-02-18 09:57:00 +01:00 committed by Tom Gundersen
parent 39213b7f44
commit 6a2a7d99f7
3 changed files with 11 additions and 20 deletions

View file

@ -188,24 +188,6 @@ class ObjectStore:
# left to do is to commit it to the object store
self.commit(obj, object_id)
def snapshot(self, obj: str, object_id: str) -> str:
"""Commit `obj` to store and ref it as `object_id`
Create a snapshot of the object `obj` and store it via
its content hash in the object directory; additionally
create a new reference to it via `object_id` in the
reference directory.
Returns: The treesum of the snapshot
"""
# Make a new temporary directory and Object; initialize
# the latter with the contents of `obj.path` and commit
# it to the store
with Object(self) as tmp:
tmp.base_path = obj.path
tmp.init()
return self.commit(tmp, object_id)
def commit(self, obj: Object, object_id: str) -> str:
"""Commits a Object to the object store
@ -235,4 +217,13 @@ class ObjectStore:
os.symlink(f"../objects/{treesum_hash}", link)
os.replace(link, self.resolve_ref(object_id))
# the reference that is pointing to `treesum_hash` is now the base
# of `obj`. It is not actively initialized but any subsequent calls
# to `obj.write()` or `obj.path`will initialize it again
# NB: in the case that an object with the same treesum as `obj`
# already existed in the store obj.store_tree() will not actually
# have written anything to the store. In this case `obj` will then
# be initialized with the content of the already existing object.
obj.base_path = self.resolve_ref(object_id)
return treesum_hash

View file

@ -281,7 +281,7 @@ class Pipeline:
var=store,
secrets=secrets)
if stage.checkpoint:
object_store.snapshot(tree, stage.id)
object_store.commit(tree, stage.id)
results["stages"].append(r.as_dict())
except BuildError as err:
results["stages"].append(err.as_dict())

View file

@ -77,7 +77,7 @@ class TestObjectStore(unittest.TestCase):
path = tree.write()
p = Path(f"{path}/A")
p.touch()
object_store.snapshot(tree, "a")
object_store.commit(tree, "a")
path = tree.write()
p = Path(f"{path}/B")
p.touch()