objectstore: direct path i/o for Object

The `Object.{read,write}` methods were introduced to implement
copy on write support. Calling `write` would trigger the copy,
if the object had a `base`. Additionally, a level of indirection
was introduced via bind mounts, which allowed to hide the actual
path of the object in the store and make sure that `read` really
returned a read-only path.
Support for copy-on-write was recently removed[1], and thus the
need for the `read` and `write` methods. We lose the benefits
of the indirection, but they are not really needed: the path to
the object is not really hidden since one can always use the
`resolve_ref` method to obtain the actual store object path.
The read only property of build trees is ensured via read only
bind mounts in the build root.
Instead of using `read` and `write`, `Object` now gained a new
`tree` property that is the path to the objects tree and also
is implementing `__fspath__` and so behaves like an `os.PathLike`
object and can thus transparently be used in many places, like
e.g. `os.path.join` or `pathlib.Path`.

[1] 5346025031
This commit is contained in:
Christian Kellner 2022-11-18 17:12:58 +00:00
parent a25ae2b1d5
commit f8ca0cf4bc
4 changed files with 123 additions and 149 deletions

View file

@ -4,7 +4,7 @@ import os
import subprocess
import tempfile
import uuid
from typing import Iterator, Optional, Set
from typing import Optional, Set
from osbuild.util import jsoncomm, rmrf
from osbuild.util.mnt import mount, umount
@ -52,43 +52,9 @@ class Object:
self._check_mode(Object.Mode.WRITE)
base.clone(self._path)
@contextlib.contextmanager
def write(self) -> Iterator[str]:
"""Return a path that can be written to"""
self._check_mode(Object.Mode.WRITE)
with self.tempdir("writer") as target:
mount(self._path, target, ro=False)
try:
yield target
finally:
umount(target)
@contextlib.contextmanager
def read(self) -> Iterator[PathLike]:
with self.tempdir("reader") as target:
with self.read_at(target) as path:
yield path
@contextlib.contextmanager
def read_at(self, target: PathLike, path: str = "/") -> Iterator[PathLike]:
"""Read the object or a part of it at given location
Map the tree or a part of it specified via `path` at the
specified path `target`.
"""
self._check_mode(Object.Mode.READ)
if self._path is None:
raise RuntimeError("read_at with no path.")
path = os.path.join(self._path, path.lstrip("/"))
mount(path, target)
try:
yield target
finally:
umount(target)
@property
def tree(self) -> str:
return self._path
def store_tree(self):
"""Store the tree with a fresh name and close it
@ -142,17 +108,16 @@ class Object:
def export(self, to_directory: PathLike):
"""Copy object into an external directory"""
with self.read() as from_directory:
subprocess.run(
[
"cp",
"--reflink=auto",
"-a",
os.fspath(from_directory) + "/.",
os.fspath(to_directory),
],
check=True,
)
subprocess.run(
[
"cp",
"--reflink=auto",
"-a",
os.fspath(self.tree) + "/.",
os.fspath(to_directory),
],
check=True,
)
def clone(self, to_directory: PathLike):
"""Clone the object to the specified directory"""
@ -170,6 +135,9 @@ class Object:
check=True,
)
def __fspath__(self):
return self.tree
class HostTree:
"""Read-only access to the host file system
@ -179,30 +147,43 @@ class HostTree:
the host file-system.
"""
_root: Optional[tempfile.TemporaryDirectory]
def __init__(self, store):
self.store = store
self._root = None
self.init()
@staticmethod
def write():
raise ValueError("Cannot write to host")
def init(self):
if self._root:
return
@contextlib.contextmanager
def read(self):
with self.store.tempdir() as tmp:
# Create a bare bones root file system
# with just /usr mounted from the host
usr = os.path.join(tmp, "usr")
os.makedirs(usr)
self._root = self.store.tempdir(prefix="host")
mount(tmp, tmp) # ensure / is read-only
mount("/usr", usr)
try:
yield tmp
finally:
umount(tmp)
root = self._root.name
# Create a bare bones root file system
# with just /usr mounted from the host
usr = os.path.join(root, "usr")
os.makedirs(usr)
# ensure / is read-only
mount(root, root)
mount("/usr", usr)
@property
def tree(self) -> os.PathLike:
if not self._root:
raise AssertionError("HostTree not initialized")
return self._root.name
def cleanup(self):
pass # noop for the host
if self._root:
umount(self._root.name)
self._root.cleanup()
self._root = None
def __fspath__(self) -> os.PathLike:
return self.tree
class ObjectStore(contextlib.AbstractContextManager):
@ -216,6 +197,7 @@ class ObjectStore(contextlib.AbstractContextManager):
os.makedirs(self.refs, exist_ok=True)
os.makedirs(self.tmp, exist_ok=True)
self._objs: Set[Object] = set()
self._host_tree: Optional[HostTree] = None
def _get_floating(self, object_id: str) -> Optional[Object]:
"""Internal: get a non-committed object"""
@ -224,6 +206,12 @@ class ObjectStore(contextlib.AbstractContextManager):
return obj
return None
@property
def host_tree(self) -> HostTree:
if not self._host_tree:
self._host_tree = HostTree(self)
return self._host_tree
def contains(self, object_id):
if not object_id:
return False
@ -318,6 +306,10 @@ class ObjectStore(contextlib.AbstractContextManager):
def cleanup(self):
"""Cleanup all created Objects that are still alive"""
if self._host_tree:
self._host_tree.cleanup()
self._host_tree = None
for obj in self._objs:
obj.cleanup()
@ -348,9 +340,7 @@ class StoreServer(api.BaseAPI):
sock.send({"path": None})
return
reader = obj.read()
path = self._stack.enter_context(reader)
sock.send({"path": path})
sock.send({"path": obj.tree})
def _read_tree_at(self, msg, sock):
object_id = msg["object-id"]
@ -363,14 +353,16 @@ class StoreServer(api.BaseAPI):
return
try:
reader = obj.read_at(target, subtree)
path = self._stack.enter_context(reader)
source = os.path.join(obj, subtree.lstrip("/"))
mount(source, target)
self._stack.callback(umount, target)
# pylint: disable=broad-except
except Exception as e:
sock.send({"error": str(e)})
return
sock.send({"path": path})
sock.send({"path": target})
def _mkdtemp(self, msg, sock):
args = {

View file

@ -304,7 +304,7 @@ class Pipeline:
# build on the host
if not self.build:
build_tree = objectstore.HostTree(object_store)
build_tree = object_store.host_tree
else:
build_tree = object_store.get(self.build)
@ -333,19 +333,18 @@ class Pipeline:
while todo:
stage = todo.pop()
with build_tree.read() as build_path, tree.write() as path:
monitor.stage(stage)
monitor.stage(stage)
r = stage.run(path,
self.runner,
build_path,
object_store,
monitor,
libdir,
stage_timeout)
r = stage.run(tree,
self.runner,
build_tree,
object_store,
monitor,
libdir,
stage_timeout)
monitor.result(r)
monitor.result(r)
results["stages"].append(r)
if not r.success:

View file

@ -80,9 +80,10 @@ class TestFormatV1(unittest.TestCase):
storedir = pathlib.Path(tmpdir, "store")
monitor = NullMonitor(sys.stderr.fileno())
libdir = os.path.abspath(os.curdir)
store = ObjectStore(storedir)
res = manifest.build(store, manifest.pipelines, monitor, libdir)
with ObjectStore(storedir) as store:
res = manifest.build(store, manifest.pipelines, monitor, libdir)
return res
def test_canonical(self):

View file

@ -44,9 +44,9 @@ class TestObjectStore(unittest.TestCase):
# new object should be in write mode
assert tree.mode == objectstore.Object.Mode.WRITE
with tree.write() as path:
p = Path(path, "A")
p.touch()
p = Path(tree, "A")
p.touch()
# consumes the object, puts it into read mode
object_store.commit(tree, "a")
@ -59,11 +59,10 @@ class TestObjectStore(unittest.TestCase):
assert len(os.listdir(object_store.objects)) == 1
tree = object_store.new("b")
with tree.write() as path:
p = Path(path, "A")
p.touch()
p = Path(path, "B")
p.touch()
p = Path(tree, "A")
p.touch()
p = Path(tree, "B")
p.touch()
# consumes the object, puts it into read mode
object_store.commit(tree, "b")
@ -89,9 +88,9 @@ class TestObjectStore(unittest.TestCase):
with objectstore.ObjectStore(tmp) as object_store:
tree = object_store.new("a")
self.assertEqual(len(os.listdir(object_store.tmp)), 1)
with tree.write() as path:
p = Path(path, "A")
p.touch()
p = Path(tree, "A")
p.touch()
# there should be no temporary Objects dirs anymore
self.assertEqual(len(os.listdir(object_store.tmp)), 0)
@ -105,9 +104,8 @@ class TestObjectStore(unittest.TestCase):
assert len(os.listdir(store.refs)) == 0
tree = store.new("a")
with tree.write() as path, \
open(os.path.join(path, "data"), "w",
encoding="utf-8") as f:
with open(os.path.join(tree, "data"), "w",
encoding="utf-8") as f:
f.write(data)
st = os.fstat(f.fileno())
data_inode = st.st_ino
@ -119,13 +117,12 @@ class TestObjectStore(unittest.TestCase):
tree = store.get("x")
assert tree is not None
with tree.read() as path:
with open(os.path.join(path, "data"), "r",
encoding="utf-8") as f:
st = os.fstat(f.fileno())
self.assertNotEqual(st.st_ino, data_inode)
data_read = f.read()
self.assertEqual(data, data_read)
with open(os.path.join(tree, "data"), "r",
encoding="utf-8") as f:
st = os.fstat(f.fileno())
self.assertNotEqual(st.st_ino, data_inode)
data_read = f.read()
self.assertEqual(data, data_read)
def test_commit_consume(self):
# operate with a clean object store
@ -137,8 +134,7 @@ class TestObjectStore(unittest.TestCase):
assert len(os.listdir(store.refs)) == 0
tree = store.new("a")
with tree.write() as path, \
open(os.path.join(path, "data"), "w", encoding="utf8") as f:
with open(os.path.join(tree, "data"), "w", encoding="utf8") as f:
f.write(data)
st = os.fstat(f.fileno())
data_inode = st.st_ino
@ -149,8 +145,7 @@ class TestObjectStore(unittest.TestCase):
# check that "data" is still the very
# same file after committing
with tree.read() as path, \
open(os.path.join(path, "data"), "r", encoding="utf8") as f:
with open(os.path.join(tree, "data"), "r", encoding="utf8") as f:
st = os.fstat(f.fileno())
self.assertEqual(st.st_ino, data_inode)
data_read = f.read()
@ -162,10 +157,9 @@ class TestObjectStore(unittest.TestCase):
assert len(os.listdir(store.objects)) == 0
base = store.new("a")
with base.write() as path:
p = Path(path, "A")
p.touch()
store.commit(base, "a")
p = Path(base, "A")
p.touch()
store.commit(base, "a")
assert store.contains("a")
assert store_path(store, "a", "A")
@ -173,30 +167,26 @@ class TestObjectStore(unittest.TestCase):
tree = store.new("b")
tree.init(base)
with tree.write() as path:
p = Path(path, "B")
p.touch()
p = Path(tree, "B")
p.touch()
tree.finalize()
with tree.read() as path:
assert os.path.exists(f"{path}/A")
assert os.path.exists(f"{path}/B")
assert os.path.exists(os.path.join(tree, "A"))
assert os.path.exists(os.path.join(tree, "B"))
def test_snapshot(self):
with objectstore.ObjectStore(self.store) as store:
tree = store.new("b")
with tree.write() as path:
p = Path(path, "A")
p.touch()
p = Path(tree, "A")
p.touch()
assert not store.contains("a")
store.commit(tree, "a") # store via "a", creates a clone
assert store.contains("a")
with tree.write() as path:
p = Path(path, "B")
p.touch()
p = Path(tree, "B")
p.touch()
store.commit(tree, "b")
# check the references exist
@ -210,19 +200,21 @@ class TestObjectStore(unittest.TestCase):
assert store_path(store, "b", "B")
def test_host_tree(self):
object_store = objectstore.ObjectStore(self.store)
host = objectstore.HostTree(object_store)
with objectstore.ObjectStore(self.store) as store:
host = store.host_tree
# check we cannot call `write`
with self.assertRaises(ValueError):
with host.write() as _:
pass
assert host.tree
assert os.fspath(host)
# check we actually cannot write to the path
with host.read() as path:
p = Path(path, "osbuild-test-file")
# check we actually cannot write to the path
p = Path(host.tree, "osbuild-test-file")
with self.assertRaises(OSError):
p.touch()
print("FOO")
# We cannot access the tree property after cleanup
with self.assertRaises(AssertionError):
_ = host.tree
# pylint: disable=too-many-statements
def test_store_server(self):
@ -250,16 +242,12 @@ class TestObjectStore(unittest.TestCase):
assert name.startswith("prefix")
assert name.endswith("suffix")
path = client.read_tree("42")
assert path is None
obj = store.new("42")
with obj.write() as path:
p = Path(path, "file.txt")
p.write_text("osbuild")
p = Path(obj, "file.txt")
p.write_text("osbuild")
p = Path(path, "directory")
p.mkdir()
p = Path(obj, "directory")
p.mkdir()
obj.finalize()
mountpoint = Path(tmpdir, "mountpoint")
@ -300,9 +288,3 @@ class TestObjectStore(unittest.TestCase):
with self.assertRaises(RuntimeError):
_ = client.read_tree_at("42", tmpdir, "/nonexistent")
# The tree is being read via the client, should
# not be able to write to it
with self.assertRaises(ValueError):
with obj.write() as _:
pass