objectstore: don't store objects by their treesum

The treesum of a filesystem tree is the content hash of all its
files, its directory structure and file metadata.

By storing trees by their treesum we avoid storing duplicates of
identical trees, at the cost of computing the hashes for every
commit to the store.

This has limited benefit as the likelihood of two trees being
identical is slim, in particular when we already have the ability
to cache based on pipeline/stage ID (i.e., we can avoid rebuilding
trees if the pipelines that built them were the same).

Drop the concept of a treesum entirely, even though I very much
liked the idea in theory...

Signed-off-by: Tom Gundersen <teg@jklm.no>
This commit is contained in:
Tom Gundersen 2021-10-30 16:02:36 +01:00
parent bf3c80372a
commit e97f6ef34e
3 changed files with 27 additions and 113 deletions

View file

@ -1,15 +1,13 @@
import contextlib import contextlib
import errno
import hashlib
import os import os
import subprocess import subprocess
import tempfile import tempfile
import uuid
from typing import Optional from typing import Optional
from osbuild.util.types import PathLike from osbuild.util.types import PathLike
from osbuild.util import ctx, jsoncomm, rmrf from osbuild.util import jsoncomm, rmrf
from . import api from . import api
from . import treesum
__all__ = [ __all__ = [
@ -86,15 +84,6 @@ class Object:
self._base = base_id self._base = base_id
self.id = base_id self.id = base_id
@property
def treesum(self) -> str:
"""Calculate the treesum of the object"""
with self._open() as fd:
m = hashlib.sha256()
treesum.treesum(m, fd)
treesum_hash = m.hexdigest()
return treesum_hash
@property @property
def _path(self) -> str: def _path(self) -> str:
if self._base and not self._init: if self._base and not self._init:
@ -146,20 +135,21 @@ class Object:
umount(target) umount(target)
self._readers -= 1 self._readers -= 1
def store_tree(self, destination: str): def store_tree(self):
"""Store the tree at destination and reset itself """Store the tree with a fresh name and reset itself
Moves the tree atomically by using rename(2). If the Moves the tree atomically by using rename(2), to a
target already exist, does nothing. Afterwards it randomly generated unique name. Afterwards it resets
resets itself and can be used as if it was new. itself and can be used as if it was new.
""" """
self._check_writable() self._check_writable()
self._check_readers() self._check_readers()
self._check_writer() self._check_writer()
self.init() self.init()
with ctx.suppress_oserror(errno.ENOTEMPTY, errno.EEXIST): destination = str(uuid.uuid4())
os.rename(self._tree, destination) os.rename(self._tree, os.path.join(self.store.objects, destination))
self.reset() self.reset()
return destination
def reset(self): def reset(self):
self.cleanup() self.cleanup()
@ -349,41 +339,34 @@ class ObjectStore(contextlib.AbstractContextManager):
"""Commits a Object to the object store """Commits a Object to the object store
Move the contents of the obj (Object) to object directory Move the contents of the obj (Object) to object directory
of the store with the content hash (obj.treesum) as its name. of the store with a universally unique name. Creates a
Creates a symlink to that ('objects/{hash}') in the references symlink to that ('objects/{hash}') in the references
directory with the object_id as the name ('refs/{object_id}). directory with the object_id as the name ('refs/{object_id}).
If the link already exists, it will be atomically replaced. If the link already exists, it will be atomically replaced.
Returns: The treesum of the object Returns: The name of the object
""" """
treesum_hash = obj.treesum
# the object is stored in the objects directory using its content # the object is stored in the objects directory using its unique
# hash as its name, ideally a given object_id (i.e., given config) # name. This means that eatch commit will always result in a new
# will always produce the same content hash, but that is not # object in the store, even if an identical one exists.
# guaranteed. If an object with the same treesum already exist, us object_name = obj.store_tree()
# the existing one instead
obj.store_tree(os.path.join(self.objects, treesum_hash))
# symlink the object_id (config hash) in the refs directory to the # symlink the object_id (config hash) in the refs directory to the
# treesum (content hash) in the objects directory. If a symlink by # object name in the objects directory. If a symlink by that name
# that name already exists, atomically replace it, but leave the # already exists, atomically replace it, but leave the backing object
# backing object in place (it may be in use). # in place (it may be in use).
with self.tempdir() as tmp: with self.tempdir() as tmp:
link = f"{tmp}/link" link = f"{tmp}/link"
os.symlink(f"../objects/{treesum_hash}", link) os.symlink(f"../objects/{object_name}", link)
os.replace(link, self.resolve_ref(object_id)) os.replace(link, self.resolve_ref(object_id))
# the reference that is pointing to `treesum_hash` is now the base # the reference that is pointing to `object_name` is now the base
# of `obj`. It is not actively initialized but any subsequent calls # of `obj`. It is not actively initialized but any subsequent calls
# to `obj.write()` will initialize it again # to `obj.write()` will initialize it again
# NB: in the case that an object with the same treesum as `obj`
# already existed in the store obj.store_tree() will not actually
# have written anything to the store. In this case `obj` will then
# be initialized with the content of the already existing object.
obj.base = object_id obj.base = object_id
return treesum_hash return object_name
def cleanup(self): def cleanup(self):
"""Cleanup all created Objects that are still alive""" """Cleanup all created Objects that are still alive"""

View file

@ -1,63 +0,0 @@
import errno
import json
import os
import stat
#pylint: disable=too-many-branches
def treesum(m, dir_fd):
"""Compute a content hash of a filesystem tree
Parameters
----------
m : hash object
the hash object to append the treesum to
dir_fd : int
directory file descriptor number to operate on
The hash is stable between runs, and guarantees that two filesystem
trees with the same hash, are functionally equivalent from the OS
point of view.
The file, symlink and directory names and contents are recursively
hashed, together with security-relevant metadata."""
with os.scandir(f"/proc/self/fd/{dir_fd}") as it:
for dirent in sorted(it, key=(lambda d: d.name)):
stat_result = dirent.stat(follow_symlinks=False)
metadata = {}
metadata["name"] = os.fsdecode(dirent.name)
metadata["mode"] = stat_result.st_mode
metadata["uid"] = stat_result.st_uid
metadata["gid"] = stat_result.st_gid
# include the size of symlink target/file-contents so we don't have to delimit it
metadata["size"] = stat_result.st_size
# getxattr cannot operate on a dir_fd, so do a trick and rely on the entries in /proc
stable_file_path = os.path.join(f"/proc/self/fd/{dir_fd}", dirent.name)
try:
selinux_label = os.getxattr(stable_file_path, b"security.selinux", follow_symlinks=False)
except OSError as e:
# SELinux support is optional
if e.errno != errno.ENODATA:
raise
else:
metadata["selinux"] = os.fsdecode(selinux_label)
# hash the JSON representation of the metadata to stay unique/stable/well-defined
m.update(json.dumps(metadata, sort_keys=True).encode())
if dirent.is_symlink():
m.update(os.fsdecode(os.readlink(dirent.name, dir_fd=dir_fd)).encode())
else:
fd = os.open(dirent.name, flags=os.O_RDONLY, dir_fd=dir_fd)
try:
if dirent.is_dir(follow_symlinks=False):
treesum(m, fd)
elif dirent.is_file(follow_symlinks=False):
# hash a page at a time (using f with fd as default is a hack to please pylint)
for byte_block in iter(lambda f=fd: os.read(f, 4096), b""):
m.update(byte_block)
elif stat.S_ISCHR(stat_result.st_mode) or stat.S_ISBLK(stat_result.st_mode):
m.update(json.dumps({"dev": stat_result.st_rdev}).encode())
else:
raise ValueError("Found unexpected filetype on OS image.")
finally:
os.close(fd)

View file

@ -101,7 +101,7 @@ class TestObjectStore(unittest.TestCase):
assert os.path.exists(f"{object_store.refs}/b/A") assert os.path.exists(f"{object_store.refs}/b/A")
assert len(os.listdir(object_store.refs)) == 2 assert len(os.listdir(object_store.refs)) == 2
assert len(os.listdir(object_store.objects)) == 1 assert len(os.listdir(object_store.objects)) == 2
assert len(os.listdir(f"{object_store.refs}/a/")) == 1 assert len(os.listdir(f"{object_store.refs}/a/")) == 1
assert len(os.listdir(f"{object_store.refs}/b/")) == 1 assert len(os.listdir(f"{object_store.refs}/b/")) == 1
@ -133,7 +133,7 @@ class TestObjectStore(unittest.TestCase):
assert os.path.exists(f"{object_store.refs}/c/C") assert os.path.exists(f"{object_store.refs}/c/C")
assert len(os.listdir(object_store.refs)) == 3 assert len(os.listdir(object_store.refs)) == 3
assert len(os.listdir(object_store.objects)) == 2 assert len(os.listdir(object_store.objects)) == 3
def test_object_copy_on_write(self): def test_object_copy_on_write(self):
# operate with a clean object store # operate with a clean object store
@ -152,7 +152,7 @@ class TestObjectStore(unittest.TestCase):
st = os.fstat(f.fileno()) st = os.fstat(f.fileno())
data_inode = st.st_ino data_inode = st.st_ino
# commit the object as "x" # commit the object as "x"
x_hash = object_store.commit(tree, "x") object_store.commit(tree, "x")
# after the commit, "x" is now the base # after the commit, "x" is now the base
# of "tree" # of "tree"
self.assertEqual(tree.base, "x") self.assertEqual(tree.base, "x")
@ -171,7 +171,6 @@ class TestObjectStore(unittest.TestCase):
# the very same content # the very same content
with object_store.new(base_id="x") as tree: with object_store.new(base_id="x") as tree:
self.assertEqual(tree.base, "x") self.assertEqual(tree.base, "x")
self.assertEqual(tree.treesum, x_hash)
with tree.read() as path: with tree.read() as path:
with open(os.path.join(path, "data"), "r") as f: with open(os.path.join(path, "data"), "r") as f:
# copy-on-write: since we have not written # copy-on-write: since we have not written
@ -189,9 +188,6 @@ class TestObjectStore(unittest.TestCase):
self.assertNotEqual(st.st_ino, data_inode) self.assertNotEqual(st.st_ino, data_inode)
p = Path(path, "other_data") p = Path(path, "other_data")
p.touch() p.touch()
# now that we have written, the treesum
# should have changed
self.assertNotEqual(tree.treesum, x_hash)
def test_object_mode(self): def test_object_mode(self):
object_store = objectstore.ObjectStore(self.store) object_store = objectstore.ObjectStore(self.store)
@ -205,9 +201,7 @@ class TestObjectStore(unittest.TestCase):
# check multiple readers are ok # check multiple readers are ok
with tree.read() as _: with tree.read() as _:
# calculating the treesum also is reading, pass
# so this is 3 nested readers
_ = tree.treesum
# writing should still fail # writing should still fail
with self.assertRaises(ValueError): with self.assertRaises(ValueError):