From 42a365d12ff6b0dc764941a5b0eaf9935cce4b2b Mon Sep 17 00:00:00 2001 From: Christian Kellner Date: Tue, 18 Feb 2020 18:19:04 +0100 Subject: [PATCH] osbuild: no auto commit of the last stage Do not automatically commit the last stage of the pipeline to the store. The last stage is most likely not what should be cached, because it will contain all the individual customization and thus be very likely different for different users. Instead, the dnf or rpm stages have a higher chance of being the same and thus are better candidates for caching. Technically this change is done via two big changes that build upon new features introduces in the previous commits, most notably the copy on write semantics of Object and that input/output is being done via `objectstore.Object` instead of plain paths. The first of the two big changes is to create one new `Object` at the beginning of `pipeline.run` and use that, in write mode via `Object.write` across invocations of `stage.run` calls, with checkpoints being created after each stage on demand. The very same `Object` is then used in read mode via `Object.read` as the input tree for the Assembler. After the assembler is done the resulting image/tree is manually committed to the store. The other big change is to remove the `ObjectStore.commit` call from the `ObjectStore.new` method and thus the automatic commit after the last stage is gone. NB: since the build tree is being retrieved in `get_buildtree` from the store, a checkpoint for the last stage of the build pipeline is forced for now. Future commits will refactor will do away with that forced commit as well. Change osbuildtest.TestCase to always create a checkpoint at the final tree (the last stage of the pipeline), since tests need it to check the tree contents. --- osbuild/objectstore.py | 28 ++++++++++------------ osbuild/pipeline.py | 52 +++++++++++++++++++++------------------- test/osbuildtest.py | 16 +++++++++++++ test/test_objectstore.py | 15 ++++++++---- 4 files changed, 66 insertions(+), 45 deletions(-) diff --git a/osbuild/objectstore.py b/osbuild/objectstore.py index 924405d9..4335f417 100644 --- a/osbuild/objectstore.py +++ b/osbuild/objectstore.py @@ -216,32 +216,28 @@ class ObjectStore: yield path @contextlib.contextmanager - def new(self, object_id, base_id=None): - """Creates a new `Object` for `object_id`. + def new(self, base_id=None): + """Creates a new temporary `Object`. - This method must be used as a context manager. It returns a new - temporary instance of `Object`. It will only be committed to the - store if the context completes without raising an exception. + This method must be used as a context manager. It returns + a temporary instance of `Object`, which can then be used + for interaction with the store. + If changes to the object's content were made (by calling + `Object.write`), these must manually be committed to the + store via `commit()`. """ with Object(self) as obj: - # the object that is yielded will be added to the content store - # on success as object_id if base_id: - # if we were given a base id then this is the base for the - # new object - # NB: its initialization is deferred to the first write + # if we were given a base id then this is the base + # content for the new object + # NB: `Object` has copy-on-write semantics, so no + # copying of the data takes places at this point obj.base = base_id yield obj - # if the yield above raises an exception, the working tree - # is cleaned up by tempfile, otherwise, the it the content - # of it was created or modified by the caller. All that is - # left to do is to commit it to the object store - self.commit(obj, object_id) - def commit(self, obj: Object, object_id: str) -> str: """Commits a Object to the object store diff --git a/osbuild/pipeline.py b/osbuild/pipeline.py index 067d1abf..f10d7c11 100644 --- a/osbuild/pipeline.py +++ b/osbuild/pipeline.py @@ -243,46 +243,49 @@ class Pipeline: object_store = objectstore.ObjectStore(store) results = {} - if self.build: + if self.build and self.build.stages: + # For now, the last build stage is always committed to the object store + self.build.stages[-1].checkpoint = True + r = self.build.run(store, interactive, libdir, secrets) results["build"] = r if not r["success"]: results["success"] = False return results - with self.get_buildtree(object_store) as build_tree: + with self.get_buildtree(object_store) as build_tree, \ + object_store.new(base_id=self.tree_id) as tree: + if self.stages: if not object_store.contains(self.tree_id): # Find the last stage that already exists in the object store, and use # that as the base. - base = None base_idx = -1 + tree.base = None for i in reversed(range(len(self.stages))): if object_store.contains(self.stages[i].id): - base = self.stages[i].id + tree.base = self.stages[i].id base_idx = i break - # The tree does not exist. Create it and save it to the object store. If - # two run() calls race each-other, two trees may be generated, and it + # If two run() calls race each-other, two trees may be generated and it # is nondeterministic which of them will end up referenced by the tree_id - # in the content store. However, we guarantee that all tree_id's and all - # generated trees remain valid. + # in the content store if they are both committed. However, after the call + # to commit all the trees will be based on the winner. results["stages"] = [] try: - with object_store.new(self.tree_id, base_id=base) as tree: - for stage in self.stages[base_idx + 1:]: - r = stage.run(tree.write(), - self.runner, - build_tree, - store, - interactive=interactive, - libdir=libdir, - var=store, - secrets=secrets) - if stage.checkpoint: - object_store.commit(tree, stage.id) - results["stages"].append(r.as_dict()) + for stage in self.stages[base_idx + 1:]: + r = stage.run(tree.write(), + self.runner, + build_tree, + store, + interactive=interactive, + libdir=libdir, + var=store, + secrets=secrets) + if stage.checkpoint: + object_store.commit(tree, stage.id) + results["stages"].append(r.as_dict()) except BuildError as err: results["stages"].append(err.as_dict()) results["success"] = False @@ -293,9 +296,9 @@ class Pipeline: if self.assembler: if not object_store.contains(self.output_id): try: - with object_store.get(self.tree_id) as tree, \ - object_store.new(self.output_id) as output_dir: - r = self.assembler.run(tree, + with tree.read() as input_tree, \ + object_store.new() as output_dir: + r = self.assembler.run(input_tree, self.runner, build_tree, output_dir=output_dir.write(), @@ -303,6 +306,7 @@ class Pipeline: libdir=libdir, var=store) results["assembler"] = r.as_dict() + object_store.commit(output_dir, self.output_id) except BuildError as err: results["assembler"] = err.as_dict() results["success"] = False diff --git a/test/osbuildtest.py b/test/osbuildtest.py index e465a5ea..9b22588a 100644 --- a/test/osbuildtest.py +++ b/test/osbuildtest.py @@ -8,6 +8,8 @@ import subprocess import tempfile import unittest +import osbuild + class TestCase(unittest.TestCase): """A TestCase to test running the osbuild program. @@ -44,6 +46,20 @@ class TestCase(unittest.TestCase): osbuild_cmd.append("--sources") osbuild_cmd.append(sources) + # Create a checkpoint at the last stage, i.e. + # commit the final tree to the store, so that + # tests can use it to compare against + if input: + manifest = json.loads(input) + else: + with open(pipeline, "r") as f: + manifest = json.load(f) + + parsed = osbuild.load(manifest, {}) + if parsed.tree_id: + osbuild_cmd.append("--checkpoint") + osbuild_cmd.append(parsed.tree_id) + stdin = subprocess.PIPE if input else None p = subprocess.Popen(osbuild_cmd, encoding="utf-8", stdin=stdin, stdout=subprocess.PIPE) diff --git a/test/test_objectstore.py b/test/test_objectstore.py index 0ab585b1..17ea6d90 100644 --- a/test/test_objectstore.py +++ b/test/test_objectstore.py @@ -24,10 +24,11 @@ class TestObjectStore(unittest.TestCase): # always use a temporary store so item counting works with tempfile.TemporaryDirectory(dir="/var/tmp") as tmp: object_store = objectstore.ObjectStore(tmp) - with object_store.new("a") as tree: + with object_store.new() as tree: path = tree.write() p = Path(f"{path}/A") p.touch() + object_store.commit(tree, "a") assert os.path.exists(f"{object_store.refs}/a") assert os.path.exists(f"{object_store.refs}/a/A") @@ -35,12 +36,13 @@ class TestObjectStore(unittest.TestCase): assert len(os.listdir(object_store.objects)) == 1 assert len(os.listdir(f"{object_store.refs}/a/")) == 1 - with object_store.new("b") as tree: + with object_store.new() as tree: path = tree.write() p = Path(f"{path}/A") p.touch() p = Path(f"{path}/B") p.touch() + object_store.commit(tree, "b") assert os.path.exists(f"{object_store.refs}/b") assert os.path.exists(f"{object_store.refs}/b/B") @@ -52,15 +54,17 @@ class TestObjectStore(unittest.TestCase): def test_duplicate(self): with tempfile.TemporaryDirectory(dir="/var/tmp") as tmp: object_store = objectstore.ObjectStore(tmp) - with object_store.new("a") as tree: + with object_store.new() as tree: path = tree.write() p = Path(f"{path}/A") p.touch() + object_store.commit(tree, "a") - with object_store.new("b") as tree: + with object_store.new() as tree: path = tree.write() shutil.copy2(f"{object_store.refs}/a/A", f"{path}/A") + object_store.commit(tree, "b") assert os.path.exists(f"{object_store.refs}/a") assert os.path.exists(f"{object_store.refs}/a/A") @@ -73,7 +77,7 @@ class TestObjectStore(unittest.TestCase): def test_snapshot(self): object_store = objectstore.ObjectStore(self.store) - with object_store.new("b") as tree: + with object_store.new() as tree: path = tree.write() p = Path(f"{path}/A") p.touch() @@ -81,6 +85,7 @@ class TestObjectStore(unittest.TestCase): path = tree.write() p = Path(f"{path}/B") p.touch() + object_store.commit(tree, "b") # check the references exist assert os.path.exists(f"{object_store.refs}/a")