objectstore: direct path i/o for Object

The `Object.{read,write}` methods were introduced to implement
copy on write support. Calling `write` would trigger the copy,
if the object had a `base`. Additionally, a level of indirection
was introduced via bind mounts, which allowed to hide the actual
path of the object in the store and make sure that `read` really
returned a read-only path.
Support for copy-on-write was recently removed[1], and thus the
need for the `read` and `write` methods. We lose the benefits
of the indirection, but they are not really needed: the path to
the object is not really hidden since one can always use the
`resolve_ref` method to obtain the actual store object path.
The read only property of build trees is ensured via read only
bind mounts in the build root.
Instead of using `read` and `write`, `Object` now gained a new
`tree` property that is the path to the objects tree and also
is implementing `__fspath__` and so behaves like an `os.PathLike`
object and can thus transparently be used in many places, like
e.g. `os.path.join` or `pathlib.Path`.

[1] 5346025031
This commit is contained in:
Christian Kellner 2022-11-18 17:12:58 +00:00
parent a25ae2b1d5
commit f8ca0cf4bc
4 changed files with 123 additions and 149 deletions

View file

@ -4,7 +4,7 @@ import os
import subprocess import subprocess
import tempfile import tempfile
import uuid import uuid
from typing import Iterator, Optional, Set from typing import Optional, Set
from osbuild.util import jsoncomm, rmrf from osbuild.util import jsoncomm, rmrf
from osbuild.util.mnt import mount, umount from osbuild.util.mnt import mount, umount
@ -52,43 +52,9 @@ class Object:
self._check_mode(Object.Mode.WRITE) self._check_mode(Object.Mode.WRITE)
base.clone(self._path) base.clone(self._path)
@contextlib.contextmanager @property
def write(self) -> Iterator[str]: def tree(self) -> str:
"""Return a path that can be written to""" return self._path
self._check_mode(Object.Mode.WRITE)
with self.tempdir("writer") as target:
mount(self._path, target, ro=False)
try:
yield target
finally:
umount(target)
@contextlib.contextmanager
def read(self) -> Iterator[PathLike]:
with self.tempdir("reader") as target:
with self.read_at(target) as path:
yield path
@contextlib.contextmanager
def read_at(self, target: PathLike, path: str = "/") -> Iterator[PathLike]:
"""Read the object or a part of it at given location
Map the tree or a part of it specified via `path` at the
specified path `target`.
"""
self._check_mode(Object.Mode.READ)
if self._path is None:
raise RuntimeError("read_at with no path.")
path = os.path.join(self._path, path.lstrip("/"))
mount(path, target)
try:
yield target
finally:
umount(target)
def store_tree(self): def store_tree(self):
"""Store the tree with a fresh name and close it """Store the tree with a fresh name and close it
@ -142,17 +108,16 @@ class Object:
def export(self, to_directory: PathLike): def export(self, to_directory: PathLike):
"""Copy object into an external directory""" """Copy object into an external directory"""
with self.read() as from_directory: subprocess.run(
subprocess.run( [
[ "cp",
"cp", "--reflink=auto",
"--reflink=auto", "-a",
"-a", os.fspath(self.tree) + "/.",
os.fspath(from_directory) + "/.", os.fspath(to_directory),
os.fspath(to_directory), ],
], check=True,
check=True, )
)
def clone(self, to_directory: PathLike): def clone(self, to_directory: PathLike):
"""Clone the object to the specified directory""" """Clone the object to the specified directory"""
@ -170,6 +135,9 @@ class Object:
check=True, check=True,
) )
def __fspath__(self):
return self.tree
class HostTree: class HostTree:
"""Read-only access to the host file system """Read-only access to the host file system
@ -179,30 +147,43 @@ class HostTree:
the host file-system. the host file-system.
""" """
_root: Optional[tempfile.TemporaryDirectory]
def __init__(self, store): def __init__(self, store):
self.store = store self.store = store
self._root = None
self.init()
@staticmethod def init(self):
def write(): if self._root:
raise ValueError("Cannot write to host") return
@contextlib.contextmanager self._root = self.store.tempdir(prefix="host")
def read(self):
with self.store.tempdir() as tmp:
# Create a bare bones root file system
# with just /usr mounted from the host
usr = os.path.join(tmp, "usr")
os.makedirs(usr)
mount(tmp, tmp) # ensure / is read-only root = self._root.name
mount("/usr", usr) # Create a bare bones root file system
try: # with just /usr mounted from the host
yield tmp usr = os.path.join(root, "usr")
finally: os.makedirs(usr)
umount(tmp)
# ensure / is read-only
mount(root, root)
mount("/usr", usr)
@property
def tree(self) -> os.PathLike:
if not self._root:
raise AssertionError("HostTree not initialized")
return self._root.name
def cleanup(self): def cleanup(self):
pass # noop for the host if self._root:
umount(self._root.name)
self._root.cleanup()
self._root = None
def __fspath__(self) -> os.PathLike:
return self.tree
class ObjectStore(contextlib.AbstractContextManager): class ObjectStore(contextlib.AbstractContextManager):
@ -216,6 +197,7 @@ class ObjectStore(contextlib.AbstractContextManager):
os.makedirs(self.refs, exist_ok=True) os.makedirs(self.refs, exist_ok=True)
os.makedirs(self.tmp, exist_ok=True) os.makedirs(self.tmp, exist_ok=True)
self._objs: Set[Object] = set() self._objs: Set[Object] = set()
self._host_tree: Optional[HostTree] = None
def _get_floating(self, object_id: str) -> Optional[Object]: def _get_floating(self, object_id: str) -> Optional[Object]:
"""Internal: get a non-committed object""" """Internal: get a non-committed object"""
@ -224,6 +206,12 @@ class ObjectStore(contextlib.AbstractContextManager):
return obj return obj
return None return None
@property
def host_tree(self) -> HostTree:
if not self._host_tree:
self._host_tree = HostTree(self)
return self._host_tree
def contains(self, object_id): def contains(self, object_id):
if not object_id: if not object_id:
return False return False
@ -318,6 +306,10 @@ class ObjectStore(contextlib.AbstractContextManager):
def cleanup(self): def cleanup(self):
"""Cleanup all created Objects that are still alive""" """Cleanup all created Objects that are still alive"""
if self._host_tree:
self._host_tree.cleanup()
self._host_tree = None
for obj in self._objs: for obj in self._objs:
obj.cleanup() obj.cleanup()
@ -348,9 +340,7 @@ class StoreServer(api.BaseAPI):
sock.send({"path": None}) sock.send({"path": None})
return return
reader = obj.read() sock.send({"path": obj.tree})
path = self._stack.enter_context(reader)
sock.send({"path": path})
def _read_tree_at(self, msg, sock): def _read_tree_at(self, msg, sock):
object_id = msg["object-id"] object_id = msg["object-id"]
@ -363,14 +353,16 @@ class StoreServer(api.BaseAPI):
return return
try: try:
reader = obj.read_at(target, subtree) source = os.path.join(obj, subtree.lstrip("/"))
path = self._stack.enter_context(reader) mount(source, target)
self._stack.callback(umount, target)
# pylint: disable=broad-except # pylint: disable=broad-except
except Exception as e: except Exception as e:
sock.send({"error": str(e)}) sock.send({"error": str(e)})
return return
sock.send({"path": path}) sock.send({"path": target})
def _mkdtemp(self, msg, sock): def _mkdtemp(self, msg, sock):
args = { args = {

View file

@ -304,7 +304,7 @@ class Pipeline:
# build on the host # build on the host
if not self.build: if not self.build:
build_tree = objectstore.HostTree(object_store) build_tree = object_store.host_tree
else: else:
build_tree = object_store.get(self.build) build_tree = object_store.get(self.build)
@ -333,19 +333,18 @@ class Pipeline:
while todo: while todo:
stage = todo.pop() stage = todo.pop()
with build_tree.read() as build_path, tree.write() as path:
monitor.stage(stage) monitor.stage(stage)
r = stage.run(path, r = stage.run(tree,
self.runner, self.runner,
build_path, build_tree,
object_store, object_store,
monitor, monitor,
libdir, libdir,
stage_timeout) stage_timeout)
monitor.result(r) monitor.result(r)
results["stages"].append(r) results["stages"].append(r)
if not r.success: if not r.success:

View file

@ -80,9 +80,10 @@ class TestFormatV1(unittest.TestCase):
storedir = pathlib.Path(tmpdir, "store") storedir = pathlib.Path(tmpdir, "store")
monitor = NullMonitor(sys.stderr.fileno()) monitor = NullMonitor(sys.stderr.fileno())
libdir = os.path.abspath(os.curdir) libdir = os.path.abspath(os.curdir)
store = ObjectStore(storedir)
res = manifest.build(store, manifest.pipelines, monitor, libdir) with ObjectStore(storedir) as store:
res = manifest.build(store, manifest.pipelines, monitor, libdir)
return res return res
def test_canonical(self): def test_canonical(self):

View file

@ -44,9 +44,9 @@ class TestObjectStore(unittest.TestCase):
# new object should be in write mode # new object should be in write mode
assert tree.mode == objectstore.Object.Mode.WRITE assert tree.mode == objectstore.Object.Mode.WRITE
with tree.write() as path: p = Path(tree, "A")
p = Path(path, "A") p.touch()
p.touch()
# consumes the object, puts it into read mode # consumes the object, puts it into read mode
object_store.commit(tree, "a") object_store.commit(tree, "a")
@ -59,11 +59,10 @@ class TestObjectStore(unittest.TestCase):
assert len(os.listdir(object_store.objects)) == 1 assert len(os.listdir(object_store.objects)) == 1
tree = object_store.new("b") tree = object_store.new("b")
with tree.write() as path: p = Path(tree, "A")
p = Path(path, "A") p.touch()
p.touch() p = Path(tree, "B")
p = Path(path, "B") p.touch()
p.touch()
# consumes the object, puts it into read mode # consumes the object, puts it into read mode
object_store.commit(tree, "b") object_store.commit(tree, "b")
@ -89,9 +88,9 @@ class TestObjectStore(unittest.TestCase):
with objectstore.ObjectStore(tmp) as object_store: with objectstore.ObjectStore(tmp) as object_store:
tree = object_store.new("a") tree = object_store.new("a")
self.assertEqual(len(os.listdir(object_store.tmp)), 1) self.assertEqual(len(os.listdir(object_store.tmp)), 1)
with tree.write() as path: p = Path(tree, "A")
p = Path(path, "A") p.touch()
p.touch()
# there should be no temporary Objects dirs anymore # there should be no temporary Objects dirs anymore
self.assertEqual(len(os.listdir(object_store.tmp)), 0) self.assertEqual(len(os.listdir(object_store.tmp)), 0)
@ -105,9 +104,8 @@ class TestObjectStore(unittest.TestCase):
assert len(os.listdir(store.refs)) == 0 assert len(os.listdir(store.refs)) == 0
tree = store.new("a") tree = store.new("a")
with tree.write() as path, \ with open(os.path.join(tree, "data"), "w",
open(os.path.join(path, "data"), "w", encoding="utf-8") as f:
encoding="utf-8") as f:
f.write(data) f.write(data)
st = os.fstat(f.fileno()) st = os.fstat(f.fileno())
data_inode = st.st_ino data_inode = st.st_ino
@ -119,13 +117,12 @@ class TestObjectStore(unittest.TestCase):
tree = store.get("x") tree = store.get("x")
assert tree is not None assert tree is not None
with tree.read() as path: with open(os.path.join(tree, "data"), "r",
with open(os.path.join(path, "data"), "r", encoding="utf-8") as f:
encoding="utf-8") as f: st = os.fstat(f.fileno())
st = os.fstat(f.fileno()) self.assertNotEqual(st.st_ino, data_inode)
self.assertNotEqual(st.st_ino, data_inode) data_read = f.read()
data_read = f.read() self.assertEqual(data, data_read)
self.assertEqual(data, data_read)
def test_commit_consume(self): def test_commit_consume(self):
# operate with a clean object store # operate with a clean object store
@ -137,8 +134,7 @@ class TestObjectStore(unittest.TestCase):
assert len(os.listdir(store.refs)) == 0 assert len(os.listdir(store.refs)) == 0
tree = store.new("a") tree = store.new("a")
with tree.write() as path, \ with open(os.path.join(tree, "data"), "w", encoding="utf8") as f:
open(os.path.join(path, "data"), "w", encoding="utf8") as f:
f.write(data) f.write(data)
st = os.fstat(f.fileno()) st = os.fstat(f.fileno())
data_inode = st.st_ino data_inode = st.st_ino
@ -149,8 +145,7 @@ class TestObjectStore(unittest.TestCase):
# check that "data" is still the very # check that "data" is still the very
# same file after committing # same file after committing
with tree.read() as path, \ with open(os.path.join(tree, "data"), "r", encoding="utf8") as f:
open(os.path.join(path, "data"), "r", encoding="utf8") as f:
st = os.fstat(f.fileno()) st = os.fstat(f.fileno())
self.assertEqual(st.st_ino, data_inode) self.assertEqual(st.st_ino, data_inode)
data_read = f.read() data_read = f.read()
@ -162,10 +157,9 @@ class TestObjectStore(unittest.TestCase):
assert len(os.listdir(store.objects)) == 0 assert len(os.listdir(store.objects)) == 0
base = store.new("a") base = store.new("a")
with base.write() as path: p = Path(base, "A")
p = Path(path, "A") p.touch()
p.touch() store.commit(base, "a")
store.commit(base, "a")
assert store.contains("a") assert store.contains("a")
assert store_path(store, "a", "A") assert store_path(store, "a", "A")
@ -173,30 +167,26 @@ class TestObjectStore(unittest.TestCase):
tree = store.new("b") tree = store.new("b")
tree.init(base) tree.init(base)
with tree.write() as path: p = Path(tree, "B")
p = Path(path, "B") p.touch()
p.touch()
tree.finalize() tree.finalize()
with tree.read() as path: assert os.path.exists(os.path.join(tree, "A"))
assert os.path.exists(f"{path}/A") assert os.path.exists(os.path.join(tree, "B"))
assert os.path.exists(f"{path}/B")
def test_snapshot(self): def test_snapshot(self):
with objectstore.ObjectStore(self.store) as store: with objectstore.ObjectStore(self.store) as store:
tree = store.new("b") tree = store.new("b")
with tree.write() as path: p = Path(tree, "A")
p = Path(path, "A") p.touch()
p.touch()
assert not store.contains("a") assert not store.contains("a")
store.commit(tree, "a") # store via "a", creates a clone store.commit(tree, "a") # store via "a", creates a clone
assert store.contains("a") assert store.contains("a")
with tree.write() as path: p = Path(tree, "B")
p = Path(path, "B") p.touch()
p.touch()
store.commit(tree, "b") store.commit(tree, "b")
# check the references exist # check the references exist
@ -210,19 +200,21 @@ class TestObjectStore(unittest.TestCase):
assert store_path(store, "b", "B") assert store_path(store, "b", "B")
def test_host_tree(self): def test_host_tree(self):
object_store = objectstore.ObjectStore(self.store) with objectstore.ObjectStore(self.store) as store:
host = objectstore.HostTree(object_store) host = store.host_tree
# check we cannot call `write` assert host.tree
with self.assertRaises(ValueError): assert os.fspath(host)
with host.write() as _:
pass
# check we actually cannot write to the path # check we actually cannot write to the path
with host.read() as path: p = Path(host.tree, "osbuild-test-file")
p = Path(path, "osbuild-test-file")
with self.assertRaises(OSError): with self.assertRaises(OSError):
p.touch() p.touch()
print("FOO")
# We cannot access the tree property after cleanup
with self.assertRaises(AssertionError):
_ = host.tree
# pylint: disable=too-many-statements # pylint: disable=too-many-statements
def test_store_server(self): def test_store_server(self):
@ -250,16 +242,12 @@ class TestObjectStore(unittest.TestCase):
assert name.startswith("prefix") assert name.startswith("prefix")
assert name.endswith("suffix") assert name.endswith("suffix")
path = client.read_tree("42")
assert path is None
obj = store.new("42") obj = store.new("42")
with obj.write() as path: p = Path(obj, "file.txt")
p = Path(path, "file.txt") p.write_text("osbuild")
p.write_text("osbuild")
p = Path(path, "directory") p = Path(obj, "directory")
p.mkdir() p.mkdir()
obj.finalize() obj.finalize()
mountpoint = Path(tmpdir, "mountpoint") mountpoint = Path(tmpdir, "mountpoint")
@ -300,9 +288,3 @@ class TestObjectStore(unittest.TestCase):
with self.assertRaises(RuntimeError): with self.assertRaises(RuntimeError):
_ = client.read_tree_at("42", tmpdir, "/nonexistent") _ = client.read_tree_at("42", tmpdir, "/nonexistent")
# The tree is being read via the client, should
# not be able to write to it
with self.assertRaises(ValueError):
with obj.write() as _:
pass