util/fscache: provide store_tree() helper

Add a helper that copies an entire directory tree including all metadata
into the cache. Use it in the ObjectStore to commit entries.

Unlike FsCache.store() this does not require entering the context from
the call-site. Instead, all data is directly passed to the cache and the
operation is under full control of the cache.

The ObjectStore is adjusted to make use of this. This requires exposing
the root-path (rather than the tree-path) to be accessible for
individual objects, hence a `path`-@property is added alongside the
`tree`-@property. Note that `__fspath__` still refers to the tree-path,
since this is the only path really required for outside access other
than from the object-manager itself.

Signed-off-by: David Rheinsberg <david.rheinsberg@gmail.com>
This commit is contained in:
David Rheinsberg 2022-12-15 12:38:16 +01:00
parent 50f8f6ac47
commit 8a9efa89fc
3 changed files with 109 additions and 27 deletions

View file

@ -171,13 +171,27 @@ class Object:
self._check_mode(Object.Mode.WRITE)
assert self.active
assert self._path
base.clone(self._path)
subprocess.run(
[
"cp",
"--reflink=auto",
"-a",
os.fspath(base.path) + "/.",
os.fspath(self.path),
],
check=True,
)
@property
def path(self) -> str:
assert self.active
assert self._path
return self._path
@property
def tree(self) -> str:
assert self.active
assert self._path
return os.path.join(self._path, "tree")
return os.path.join(self.path, "tree")
@property
def meta(self) -> Metadata:
@ -243,22 +257,6 @@ class Object:
check=True,
)
def clone(self, to_directory: PathLike):
"""Clone the object to the specified directory"""
assert self._path
subprocess.run(
[
"cp",
"--reflink=auto",
"-a",
os.fspath(self._path) + "/.",
os.fspath(to_directory),
],
check=True,
)
def __fspath__(self):
return self.tree
@ -421,13 +419,12 @@ class ObjectStore(contextlib.AbstractContextManager):
assert self.active
with self.cache.store(object_id) as name:
path = os.path.join(self.cache, name)
# we clamp the mtime of `obj` itself so that it
# resuming a snapshop and building with a snapshot
# goes through the same code path
obj.clamp_mtime()
obj.clone(path)
# we clamp the mtime of `obj` itself so that it
# resuming a snapshop and building with a snapshot
# goes through the same code path
obj.clamp_mtime()
self.cache.store_tree(object_id, obj.path + "/.")
def cleanup(self):
"""Cleanup all created Objects that are still alive"""

View file

@ -13,6 +13,7 @@ import ctypes
import errno
import json
import os
import subprocess
import uuid
from typing import Any, Dict, NamedTuple, Optional, Tuple, Union
@ -1044,3 +1045,49 @@ class FsCache(contextlib.AbstractContextManager, os.PathLike):
json.dump(info_raw, f)
self._load_cache_info(info)
def store_tree(self, name: str, tree: Any):
"""Store file system tree in cache
Create a new entry in the object store containing a copy of the file
system tree specified as `tree`. This behaves like `store()` but instead
of providing a context to the caller it will copy the specified tree.
Similar to `store()`, when the entry is committed it is immediately
unlocked and released to the cache. This means it might vanish at any
moment due to a parallel cleanup. Hence, a caller cannot rely on the
object being available in the cache once this call returns.
If `tree` points to a file, the file is copied. If it points to a
directory, the entire directory tree is copied including the root entry
itself. To copy an entire directory without its root entry, use the
`path/.` notation. Links are never followed but copied verbatim.
All metadata is preserved, if possible.
Parameters:
-----------
name
Name to store the object under.
tree:
Path to the file system tree to copy.
"""
with self.store(name) as rpath_data:
r = subprocess.run(
[
"cp",
"--reflink=auto",
"-a",
"--",
os.fspath(tree),
self._path(rpath_data),
],
check=False,
encoding="utf-8",
stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
)
if r.returncode != 0:
code = r.returncode
msg = r.stdout.strip()
raise RuntimeError(f"Cannot copy into file-system cache ({code}): {msg}")

View file

@ -290,6 +290,44 @@ def test_load(tmpdir):
pass
def test_store_tree(tmpdir):
#
# API tests for the `store_tree()` method.
#
cache = fscache.FsCache("osbuild-test-appid", tmpdir)
with pytest.raises(AssertionError):
cache.store_tree("foobar", "invalid/dir")
with cache:
cache.info = cache.info._replace(maximum_size=1024*1024*1024)
with pytest.raises(ValueError):
cache.store_tree("", "invalid/dir")
with pytest.raises(RuntimeError):
cache.store_tree("key", "invalid/dir")
with tempfile.TemporaryDirectory(dir="/var/tmp") as tmp:
with open(os.path.join(tmp, "outside"), "x", encoding="utf8") as f:
f.write("foo")
os.mkdir(os.path.join(tmp, "tree"))
with open(os.path.join(tmp, "tree", "inside"), "x", encoding="utf8") as f:
f.write("bar")
with open(os.path.join(tmp, "tree", "more-inside"), "x", encoding="utf8") as f:
f.write("foobar")
cache.store_tree("key", os.path.join(tmp, "tree"))
with cache.load("key") as rpath:
assert len(list(os.scandir(os.path.join(cache, rpath)))) == 1
assert len(list(os.scandir(os.path.join(cache, rpath, "tree")))) == 2
with open(os.path.join(cache, rpath, "tree", "inside"), "r", encoding="utf8") as f:
assert f.read() == "bar"
with open(os.path.join(cache, rpath, "tree", "more-inside"), "r", encoding="utf8") as f:
assert f.read() == "foobar"
def test_basic(tmpdir):
#
# A basic cache store+load test.