objectstore: don't store objects by their treesum

The treesum of a filesystem tree is the content hash of all its
files, its directory structure and file metadata.

By storing trees by their treesum we avoid storing duplicates of
identical trees, at the cost of computing the hashes for every
commit to the store.

This has limited benefit as the likelihood of two trees being
identical is slim, in particular when we already have the ability
to cache based on pipeline/stage ID (i.e., we can avoid rebuilding
trees if the pipelines that built them were the same).

Drop the concept of a treesum entirely, even though I very much
liked the idea in theory...

Signed-off-by: Tom Gundersen <teg@jklm.no>
This commit is contained in:
Tom Gundersen 2021-10-30 16:02:36 +01:00
parent bf3c80372a
commit e97f6ef34e
3 changed files with 27 additions and 113 deletions

View file

@ -1,15 +1,13 @@
import contextlib
import errno
import hashlib
import os
import subprocess
import tempfile
import uuid
from typing import Optional
from osbuild.util.types import PathLike
from osbuild.util import ctx, jsoncomm, rmrf
from osbuild.util import jsoncomm, rmrf
from . import api
from . import treesum
__all__ = [
@ -86,15 +84,6 @@ class Object:
self._base = base_id
self.id = base_id
@property
def treesum(self) -> str:
"""Calculate the treesum of the object"""
with self._open() as fd:
m = hashlib.sha256()
treesum.treesum(m, fd)
treesum_hash = m.hexdigest()
return treesum_hash
@property
def _path(self) -> str:
if self._base and not self._init:
@ -146,20 +135,21 @@ class Object:
umount(target)
self._readers -= 1
def store_tree(self, destination: str):
"""Store the tree at destination and reset itself
def store_tree(self):
"""Store the tree with a fresh name and reset itself
Moves the tree atomically by using rename(2). If the
target already exist, does nothing. Afterwards it
resets itself and can be used as if it was new.
Moves the tree atomically by using rename(2), to a
randomly generated unique name. Afterwards it resets
itself and can be used as if it was new.
"""
self._check_writable()
self._check_readers()
self._check_writer()
self.init()
with ctx.suppress_oserror(errno.ENOTEMPTY, errno.EEXIST):
os.rename(self._tree, destination)
destination = str(uuid.uuid4())
os.rename(self._tree, os.path.join(self.store.objects, destination))
self.reset()
return destination
def reset(self):
self.cleanup()
@ -349,41 +339,34 @@ class ObjectStore(contextlib.AbstractContextManager):
"""Commits a Object to the object store
Move the contents of the obj (Object) to object directory
of the store with the content hash (obj.treesum) as its name.
Creates a symlink to that ('objects/{hash}') in the references
of the store with a universally unique name. Creates a
symlink to that ('objects/{hash}') in the references
directory with the object_id as the name ('refs/{object_id}).
If the link already exists, it will be atomically replaced.
Returns: The treesum of the object
Returns: The name of the object
"""
treesum_hash = obj.treesum
# the object is stored in the objects directory using its content
# hash as its name, ideally a given object_id (i.e., given config)
# will always produce the same content hash, but that is not
# guaranteed. If an object with the same treesum already exist, us
# the existing one instead
obj.store_tree(os.path.join(self.objects, treesum_hash))
# the object is stored in the objects directory using its unique
# name. This means that eatch commit will always result in a new
# object in the store, even if an identical one exists.
object_name = obj.store_tree()
# symlink the object_id (config hash) in the refs directory to the
# treesum (content hash) in the objects directory. If a symlink by
# that name already exists, atomically replace it, but leave the
# backing object in place (it may be in use).
# object name in the objects directory. If a symlink by that name
# already exists, atomically replace it, but leave the backing object
# in place (it may be in use).
with self.tempdir() as tmp:
link = f"{tmp}/link"
os.symlink(f"../objects/{treesum_hash}", link)
os.symlink(f"../objects/{object_name}", link)
os.replace(link, self.resolve_ref(object_id))
# the reference that is pointing to `treesum_hash` is now the base
# the reference that is pointing to `object_name` is now the base
# of `obj`. It is not actively initialized but any subsequent calls
# to `obj.write()` will initialize it again
# NB: in the case that an object with the same treesum as `obj`
# already existed in the store obj.store_tree() will not actually
# have written anything to the store. In this case `obj` will then
# be initialized with the content of the already existing object.
obj.base = object_id
return treesum_hash
return object_name
def cleanup(self):
"""Cleanup all created Objects that are still alive"""

View file

@ -1,63 +0,0 @@
import errno
import json
import os
import stat
#pylint: disable=too-many-branches
def treesum(m, dir_fd):
"""Compute a content hash of a filesystem tree
Parameters
----------
m : hash object
the hash object to append the treesum to
dir_fd : int
directory file descriptor number to operate on
The hash is stable between runs, and guarantees that two filesystem
trees with the same hash, are functionally equivalent from the OS
point of view.
The file, symlink and directory names and contents are recursively
hashed, together with security-relevant metadata."""
with os.scandir(f"/proc/self/fd/{dir_fd}") as it:
for dirent in sorted(it, key=(lambda d: d.name)):
stat_result = dirent.stat(follow_symlinks=False)
metadata = {}
metadata["name"] = os.fsdecode(dirent.name)
metadata["mode"] = stat_result.st_mode
metadata["uid"] = stat_result.st_uid
metadata["gid"] = stat_result.st_gid
# include the size of symlink target/file-contents so we don't have to delimit it
metadata["size"] = stat_result.st_size
# getxattr cannot operate on a dir_fd, so do a trick and rely on the entries in /proc
stable_file_path = os.path.join(f"/proc/self/fd/{dir_fd}", dirent.name)
try:
selinux_label = os.getxattr(stable_file_path, b"security.selinux", follow_symlinks=False)
except OSError as e:
# SELinux support is optional
if e.errno != errno.ENODATA:
raise
else:
metadata["selinux"] = os.fsdecode(selinux_label)
# hash the JSON representation of the metadata to stay unique/stable/well-defined
m.update(json.dumps(metadata, sort_keys=True).encode())
if dirent.is_symlink():
m.update(os.fsdecode(os.readlink(dirent.name, dir_fd=dir_fd)).encode())
else:
fd = os.open(dirent.name, flags=os.O_RDONLY, dir_fd=dir_fd)
try:
if dirent.is_dir(follow_symlinks=False):
treesum(m, fd)
elif dirent.is_file(follow_symlinks=False):
# hash a page at a time (using f with fd as default is a hack to please pylint)
for byte_block in iter(lambda f=fd: os.read(f, 4096), b""):
m.update(byte_block)
elif stat.S_ISCHR(stat_result.st_mode) or stat.S_ISBLK(stat_result.st_mode):
m.update(json.dumps({"dev": stat_result.st_rdev}).encode())
else:
raise ValueError("Found unexpected filetype on OS image.")
finally:
os.close(fd)

View file

@ -101,7 +101,7 @@ class TestObjectStore(unittest.TestCase):
assert os.path.exists(f"{object_store.refs}/b/A")
assert len(os.listdir(object_store.refs)) == 2
assert len(os.listdir(object_store.objects)) == 1
assert len(os.listdir(object_store.objects)) == 2
assert len(os.listdir(f"{object_store.refs}/a/")) == 1
assert len(os.listdir(f"{object_store.refs}/b/")) == 1
@ -133,7 +133,7 @@ class TestObjectStore(unittest.TestCase):
assert os.path.exists(f"{object_store.refs}/c/C")
assert len(os.listdir(object_store.refs)) == 3
assert len(os.listdir(object_store.objects)) == 2
assert len(os.listdir(object_store.objects)) == 3
def test_object_copy_on_write(self):
# operate with a clean object store
@ -152,7 +152,7 @@ class TestObjectStore(unittest.TestCase):
st = os.fstat(f.fileno())
data_inode = st.st_ino
# commit the object as "x"
x_hash = object_store.commit(tree, "x")
object_store.commit(tree, "x")
# after the commit, "x" is now the base
# of "tree"
self.assertEqual(tree.base, "x")
@ -171,7 +171,6 @@ class TestObjectStore(unittest.TestCase):
# the very same content
with object_store.new(base_id="x") as tree:
self.assertEqual(tree.base, "x")
self.assertEqual(tree.treesum, x_hash)
with tree.read() as path:
with open(os.path.join(path, "data"), "r") as f:
# copy-on-write: since we have not written
@ -189,9 +188,6 @@ class TestObjectStore(unittest.TestCase):
self.assertNotEqual(st.st_ino, data_inode)
p = Path(path, "other_data")
p.touch()
# now that we have written, the treesum
# should have changed
self.assertNotEqual(tree.treesum, x_hash)
def test_object_mode(self):
object_store = objectstore.ObjectStore(self.store)
@ -205,9 +201,7 @@ class TestObjectStore(unittest.TestCase):
# check multiple readers are ok
with tree.read() as _:
# calculating the treesum also is reading,
# so this is 3 nested readers
_ = tree.treesum
pass
# writing should still fail
with self.assertRaises(ValueError):