debian-forge/osbuild/util/fscache.py
Michael Vogt 6b8c1872f6 fscache: use remove_lru() to reclaim space when the cache is full
This commit adds code that will remove the least recently used
entries when a store() operation does not succeeds because the
cache is full. To be more efficient it will try to free
twice the requested size (this can be configured in the code).
2024-02-06 17:16:48 +01:00

1278 lines
50 KiB
Python

"""File System Cache
This module implements a data cache that uses the file system to store data
as well as protect parallel access. It implements automatic cache management
and allows purging the cache during runtime, pruning old entries and keeping
the cache under a given limit.
"""
# pylint: disable=too-many-lines
import contextlib
import ctypes
import errno
import json
import os
import subprocess
import uuid
from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union
from osbuild.util import ctx, linux, rmrf
__all__ = [
"FsCache",
"FsCacheInfo",
]
MaximumSizeType = Optional[Union[int, str]]
class FsCacheInfo(NamedTuple):
"""File System Cache Information
This type represents static cache information. It is an immutable named
tuple and used to query or set the configuration of a cache.
creation_boot_id - Hashed linux boot-id at the time of cache-creation
maximum_size - Maximum cache size in bytes, or "unlimited"
version - version of the cache data structures
"""
creation_boot_id: Optional[str] = None
maximum_size: MaximumSizeType = None
version: Optional[int] = None
@classmethod
def from_json(cls, data: Any) -> "FsCacheInfo":
"""Create tuple from parsed JSON
This takes a parsed JSON value and converts it into a tuple with the
same information. Unknown fields in the input are ignored. The input
is usually taken from `json.load()` and similar.
"""
if not isinstance(data, dict):
return cls()
creation_boot_id = None
maximum_size: MaximumSizeType = None
version = None
# parse "creation-boot-id"
_creation_boot_id = data.get("creation-boot-id")
if isinstance(_creation_boot_id, str) and len(_creation_boot_id) == 32:
creation_boot_id = _creation_boot_id
# parse "maximum-size"
_maximum_size = data.get("maximum-size")
if isinstance(_maximum_size, int):
maximum_size = _maximum_size
elif isinstance(_maximum_size, str) and _maximum_size == "unlimited":
maximum_size = "unlimited"
# parse "version"
_version = data.get("version")
if isinstance(_version, int):
version = _version
# create immutable tuple
return cls(
creation_boot_id,
maximum_size,
version,
)
def to_json(self) -> Dict[str, Any]:
"""Convert tuple into parsed JSON
Return a parsed JSON value that represents the same values as this
tuple does. Unset values are skipped. The returned value can be
converted into formatted JSON via `json.dump()` and similar.
"""
data: Dict[str, Any] = {}
if self.creation_boot_id is not None:
data["creation-boot-id"] = self.creation_boot_id
if self.maximum_size is not None:
data["maximum-size"] = self.maximum_size
if self.version is not None:
data["version"] = self.version
return data
class FsCacheObjectInfo(NamedTuple):
""" File System Cache object information
This type represents information about a single cache object. The
last_used information is only guaranteed to be valid while the cache
is locked.
"""
name: str
last_used: float
class FsCache(contextlib.AbstractContextManager, os.PathLike):
"""File System Cache
This file system cache context represents an on-disk cache. That is, it
allows storing information on the file system, and retrieving it from other
contexts.
A single cache directory can be shared between many processes at the same
time. The cache protects access to the cached data. The cache must not be
shared over non-coherent network storage, but is designed for system-local
linux file-systems.
The file-system layout is as follows:
[cache]/
├── cache.info
├── cache.lock
├── cache.size
├── objects/
│ ├── [id0]
│ ├── [id1]/
│ │ ├── data/
│ │ │ └── ...
│ │ ├── object.info
│ │ └── object.lock
│ └── ...
└── stage/
├── uuid-[uuid0]
├── uuid-[uuid1]/
│ ├── data/
│ │ └── ...
│ ├── object.info
│ └── object.lock
└── ...
The central data store is in the `objects` subdirectory. Every cache entry
has a separate subdirectory there. To guard access, a read-lock on
`object.lock` is required for all readers, a write-lock is required for all
writers. Static information about the object is available in the
`object.info` file.
As an optimization, entries in the object store consisting of a single
file can be stored directly underneath `objects` without a separate
subdirectory hierarchy. Their guarding lock is directly taken on this file
and no metadata is available, other than the file information itself. This
is used extensively by the cache management to prepare objects for atomic
replacements. Due to lack of metadata, they are volatile and can be
deleted as soon as they are unlocked.
Generally, access to the cache is non-blocking. That is, if a read-lock
cannot be acquired, an entry is considered non-existant. Thus, unless
treated as a `write-once` cache, cache efficiency will decrease when taking
write-locks.
The `data/` directory contains the content of a cache entry. Its content
is solely defined by the creator of the entry and the cache makes no
assumptions about its layout. Note that the `data/` directory itself can be
modified (e.g., permission-changes) if an unnamed top-level directory is
desired (e.g., to store a directory tree).
Additionally to the `objects/` directory, a similar `stage/` directory is
provided. This directory is `write-only` and used to prepare entries for
the object store before committing them. The staging area is optional. It
is completely safe to do the same directly in the object store. However,
the separation allows putting the staging area on a different file-system
(e.g., symlinking to a tmpfs), and thus improving performance for larger
operations. Otherwise, the staging area follows the same rules as the
object store, except that only writers are expected. Hence, staging entries
always use a unique UUID as name. To commit a staging entry, a user is
expected to create an entry in the object store and copy/move the `data/`
directory over.
To guard against parallel accesses, a set of locks is utilized. Generally,
a `*.lock`-file locks the directory it is in, while a lock on any other
file just locks that file (unfortunately, we cannot acquire write-locks on
directories directly, since it would require opening them for writing,
which is not possible on linux). `cache.lock` can be used to guard the
entire cache. A write-lock will keep any other parallel operation out,
while a read-lock merely acquires cache access (you are still allowed to
modify the cache, but need fine-grained locking). Hence, a write-lock on the
global `cache.lock` file is only required for operations that cannot use
fine-grained locking. The latter requires individual locking for each file
or each object store entry you modify. In all those cases you must ensure
for parallel modifications, since lock acquisition on file-systems can only
be done after opening a file.
"""
class MissError(Exception):
"""Cache Miss Exception
This error is raised when a cache entry is not found. Due to the
shared nature of the cache, a caller must be aware that any entry can
be created or deleted by other concurrent operations, at any point in
time. Hence, a cache miss only reflects the state of the cache at a
particular time under a particular lock.
"""
# static parameters
_dirname_data = "data"
_dirname_objects = "objects"
_dirname_stage = "stage"
_filename_cache_info = "cache.info"
_filename_cache_lock = "cache.lock"
_filename_cache_size = "cache.size"
_filename_cache_tag = "CACHEDIR.TAG"
_filename_object_info = "object.info"
_filename_object_lock = "object.lock"
_version_current = 1
_version_minimum = 1
# constant properties
_appid: str
_tracers: Dict[str, Any]
_path_cache: Any
# context-manager properties
_active: bool
_bootid: Optional[str]
_lock: Optional[int]
_info: FsCacheInfo
_info_maximum_size: int
def __init__(self, appid: str, path_cache: Any):
"""Create File System Cache
This creates a new file-system cache. It does not create the cache, nor
access any of its content. You must enter its context-manager to prepare
the cache for access. Any access outside of a context-manager will raise
an assertion error, unless explicitly stated otherwise.
Parameters:
-----------
appid
The application-ID of the caller. This can be any random string. It
is used to initialize the application-specific boot-ID used to tag
caches and detect whether an entry was created during the same boot.
path_cache
The path to the cache directory. The directory (and the path to it)
is created if it does not exist.
"""
self._appid = appid
self._tracers = {}
self._path_cache = os.fspath(path_cache)
self._active = False
self._bootid = None
self._lock = None
self._info = FsCacheInfo()
self._info_maximum_size = 0
def _trace(self, trace: str):
"""Trace execution
Execute registered trace-hooks for the given trace string. This allows
tests to register callbacks that are executed at runtime at a specific
location in the code. During normal operation, no such hooks should be
used.
The trace-hooks are used to trigger race-conditions during tests and
verify they are handled gracefully.
Parameters:
-----------
trace
The trace-hook to run.
"""
if trace in self._tracers:
self._tracers[trace]()
@staticmethod
def _calculate_space(path_target: str) -> int:
"""Calculate total space of a directory tree
Calculate the total amount of storage required for a directory tree in
bytes. This does not account for metadata, but only for stored file
content.
Note that this may differ from the sum of the file sizes as it
takes sparse files into account.
Parameters:
-----------
path_target
File-system path to the directory to operate on.
"""
return os.lstat(path_target).st_blocks * 512 + sum(
os.lstat(
os.path.join(path, f)
).st_blocks * 512 for path, dirs, files in os.walk(
path_target
) for f in files + dirs
)
def __fspath__(self) -> Any:
"""Return cache path
Return the path to this cache as provided to the constructor of the
cache. No conversions are applied, so the path is absolute if the
path as provided by the caller was absolute, and vice-versa.
This is part of the `os.PathLike` interface. See its documentation.
"""
return self._path_cache
def _path(self, *rpaths):
"""Return absolute path into cache location
Take the relative path from the caller and turn it into an absolute
path. Since most operations take a relative path from the cache root
to a cache location, this function can be used to make those paths
absolute.
Parameters:
-----------
rpaths
Relative paths from cache root to the desired cache location.
"""
return os.path.join(self, *rpaths)
@contextlib.contextmanager
def _atomic_open(
self,
rpath: str,
*,
wait: bool,
write: bool,
closefd: bool = True,
oflags: int = 0,
):
"""Atomically open and lock file
Open the cache-file at the specified relative path and acquire a
lock on it. Yield the file-descriptor to the caller. Once control
returns, all locks are released (if not already done so by the
caller) and the file-descriptor is closed.
Note that this operation involves a retry-loop in case the file is
replaced or moved before the lock is acquired.
Parameters:
-----------
rpath
Relative path from the cache-root to the file to open.
wait
Whether to wait for locks to be acquired.
write
If false, the file is opened for reading and a read lock is
acquired. If true, it is opened for read and write and a write
lock is acquired.
closefd
If false, retain file-descriptor (and lock) on success.
oflags
Additional open-flags to pass to `os.open()`.
"""
fd = None
path = self._path(rpath)
try:
while True:
# Open the file and acquire a lock. Make sure not to modify the
# file in any way, ever. If non-blocking operation was requested
# the lock call will raise `EAGAIN` if contended.
flags = os.O_RDONLY | os.O_CLOEXEC | oflags
lock = linux.fcntl.F_RDLCK
if write:
flags = flags | os.O_RDWR
lock = linux.fcntl.F_WRLCK
self._trace("_atomic_open:open")
fd = os.open(path, flags, 0o644)
self._trace("_atomic_open:lock")
linux.fcntl_flock(fd, lock, wait=wait)
# The file might have been replaced between opening it and
# acquiring the lock. Hence, run `stat(2)` on the path again
# and compare it to `fstat(2)` of the open file. If they differ
# simply retry.
# On NFS, the lock-acquisition has invalidated the caches, hence
# the metadata is refetched. On linux, the first query will
# succeed and reflect the drop in link-count. Every further
# query will yield `ESTALE`. Yet, we cannot rely on being the
# first to query, so proceed carefully.
# On non-NFS, information is coherent and we can simply proceed
# comparing the DEV+INO information to see whether the file was
# replaced.
retry = False
try:
st_fd = os.stat(fd)
except OSError as e:
if e.errno != errno.ESTALE:
raise
retry = True
try:
st_path = os.stat(path)
except OSError as e:
if e.errno not in [errno.ENOENT, errno.ESTALE]:
raise
retry = True
if retry or st_fd.st_dev != st_path.st_dev or st_fd.st_ino != st_path.st_ino:
linux.fcntl_flock(fd, linux.fcntl.F_UNLCK)
os.close(fd)
fd = None
continue
# Yield control to the caller to make use of the FD. If the FD
# is to be retained, clear it before returning to the cleanup
# handlers.
yield fd
if not closefd:
fd = None
return
finally:
if fd is not None:
linux.fcntl_flock(fd, linux.fcntl.F_UNLCK)
os.close(fd)
@contextlib.contextmanager
def _atomic_file(
self,
rpath: str,
rpath_store: str,
closefd: bool = True,
ignore_exist: bool = False,
replace: bool = False,
):
"""Create and link temporary file
Create a new temporary file and yield control to the caller to fill in
data and metadata. Once control is returned, the file is linked at the
specified location. If an exception is raised, the temporary file is
discarded.
This function emulates the behavior of `O_TMPFILE` for systems and
file-systems where it is not available.
Parameters:
-----------
rpath
Relative path from cache-root to the location where to link the
file on success.
rpath_store
Relative path from cache-root to the store to use for temporary
files. This must share the same mount-instance as the final path.
closefd
If false, retain file-descriptor (and lock) on success.
ignore_exist
If true, an existing file at the desired location during a
replacement will not cause an error.
replace
If true, replace a previous file at the specified location. If
false, no replacement takes place and the temporary file is
discarded.
"""
assert not replace or not ignore_exist
rpath_tmp = None
try:
# First create a random file in the selected store. This file will
# have a UUID as name and thus we can safely use `O_CREAT|O_EXCL`
# to create it and guarantee its uniqueness.
name = "uuid-" + uuid.uuid4().hex
rpath_tmp = os.path.join(rpath_store, name)
with self._atomic_open(
rpath_tmp,
wait=True,
write=True,
closefd=closefd,
oflags=os.O_CREAT | os.O_EXCL,
) as fd:
# Yield control to the caller to fill in data and metadata.
with os.fdopen(fd, "r+", closefd=False, encoding="utf8") as file:
yield file
suppress = []
if ignore_exist:
suppress.append(errno.EEXIST)
if replace:
# Move the file into the desired location, possibly
# replacing any existing entry.
os.rename(
src=self._path(rpath_tmp),
dst=self._path(rpath),
)
else:
# Preferably, we used `RENAME_NOREPLACE`, but this is not
# supported on NFS. Instead, we create a hard-link, which
# will fail if the target already exists. We rely on the
# cleanup-path to drop the original link.
with ctx.suppress_oserror(*suppress):
os.link(
src=self._path(rpath_tmp),
dst=self._path(rpath),
follow_symlinks=False,
)
finally:
if rpath_tmp is not None:
# If the temporary file exists, we delete it. If we haven't
# created it, or if we already moved it, this will be a no-op.
# Due to the unique name, we will never delete a file we do not
# own. If we hard-linked the file, this merely deletes the
# original temporary link.
# On fatal errors, we leak the file into the object store. Due
# to the released lock and UUID name, cache management will
# clean it up.
with ctx.suppress_oserror(errno.ENOENT):
os.unlink(self._path(rpath_tmp))
def _atomic_dir(self, rpath_store: str) -> Tuple[str, int]:
"""Atomically create and lock an anonymous directory
Create an anonymous directory in the specified storage directory
relative to the cache-root. The directory will have a UUID as name. On
success, the name of the directory and the open file-descriptor to its
acquired lock file (write-locked) are returned.
The lock-file logic follows the cache-logic for objects. Hence, the
cache scaffolding for the specified store must exist. No other cache
infrastructure is required, though.
Parameters:
-----------
rpath_store
Relative path from the cache-root to the storage directory to create
the new anonymous directory in. Most likely, this is either the
object-store or the staging-area.
"""
rpath_dir = None
rpath_lock = None
try:
while True:
# Allocate a UUID for the new directory and prepare the paths
# to the directory and lock-file inside.
name = "uuid-" + uuid.uuid4().hex
rpath_dir = os.path.join(rpath_store, name)
rpath_lock = os.path.join(rpath_dir, self._filename_object_lock)
# Create an anonymous lock-file, but before linking it create
# the target directory to link the file in. Use an ExitStack
# to control exactly where to catch exceptions.
with contextlib.ExitStack() as es:
f = es.enter_context(
self._atomic_file(
rpath_lock,
rpath_store,
closefd=False,
)
)
lockfd = f.fileno()
os.mkdir(self._path(rpath_dir))
# Exit the `_atomic_file()` context, thus triggering a link
# of the anonymous lock-file into the new directory. A
# parallel cleanup might have deleted the empty directory,
# so catch `ENOENT` and retry.
try:
es.close()
except OSError as e:
if e.errno == errno.ENOENT:
continue
raise
return (name, lockfd)
except BaseException:
# On error, we might have already created the directory or even
# linked the lock-file. Try unlinking both, but ignore errors if
# they do not exist. Due to using UUIDs as names we cannot conflict
# with entries created by some-one else.
if rpath_lock is not None:
with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR):
os.unlink(self._path(rpath_lock))
if rpath_dir is not None:
with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR):
os.rmdir(self._path(rpath_dir))
raise
def _create_scaffolding(self):
"""Create cache scaffolding
Create the directories leading to the cache, as well as the internal
scaffolding directories and files. This ensures that an existing cache
is not interrupted or rewritten. Hence, this can safely be called in
parallel, even on live caches.
If this happens to create a new cache, it is initialized with its
default configuration and constraints. By default, this means the cache
has a maximum size of 0 and thus is only used as staging area with no
long-time storage.
This call requires no cache-infrastructure to be in place, and can be
called repeatedly at any time.
"""
# Create the directory-scaffolding of the cache. Make sure to ignore
# errors when they already exist, to allow for parallel setups.
dirs = [
self._path(self._dirname_objects),
self._path(self._dirname_stage),
]
for i in dirs:
os.makedirs(i, exist_ok=True)
# Create the file-scaffolding of the cache. We fill in the default
# information and ignore racing operations.
with self._atomic_file(self._filename_cache_tag, self._dirname_objects, ignore_exist=True) as f:
f.write(
"Signature: 8a477f597d28d172789f06886806bc55\n"
"# This is a cache directory tag created by osbuild (see https://bford.info/cachedir/)\n"
)
with self._atomic_file(self._filename_cache_info, self._dirname_objects, ignore_exist=True) as f:
json.dump({"version": self._version_current}, f)
with self._atomic_file(self._filename_cache_lock, self._dirname_objects, ignore_exist=True) as f:
pass
with self._atomic_file(self._filename_cache_size, self._dirname_objects, ignore_exist=True) as f:
f.write("0")
def _load_cache_info(self, info: Optional[FsCacheInfo] = None):
"""Load cache information
This loads information about the cache into this cache-instance. The
cache-information is itself cached on this instance and only updated
on request. If the underlying file in the cache changes at runtime it
is not automatically re-loaded. Only when this function is called the
information is reloaded.
By default this function reads the cache-information from the
respective file in the cache and then caches it on this instance. If
the `info` argument is not `None`, then no information is read from the
file-system, but instead the information is taken from the `info`
argument. This allows changing the cache-information of this instance
without necessarily modifying the underlying file.
This call requires the cache scaffolding to be fully created.
Parameters:
-----------
info
If `None`, the cache info file is read. Otherwise, the information
is taken from this tuple.
"""
# Parse the JSON data into python.
if info is None:
with open(self._path(self._filename_cache_info), "r", encoding="utf8") as f:
info_raw = json.load(f)
info = FsCacheInfo.from_json(info_raw)
# Retain information.
self._info = info
# Parse `maximum-size` into internal representation.
if info.maximum_size == "unlimited":
self._info_maximum_size = -1
elif isinstance(info.maximum_size, int):
self._info_maximum_size = info.maximum_size
elif info.maximum_size is None:
self._info_maximum_size = 0
else:
raise ValueError(
f"maximum-size can only be set to 'unlimited' or an integer value, got {type(info.maximum_size)}")
def _is_active(self):
# Internal helper to verify we are in an active context-manager.
return self._active
def _is_compatible(self):
# Internal helper to verify the cache-version is supported.
return self._info.version is not None and \
self._version_minimum <= self._info.version <= self._version_current
def __enter__(self):
assert not self._active
try:
# Acquire the current boot-id so we can tag entries accordingly, and
# judge entries that are from previous boots.
self._bootid = linux.proc_boot_id(self._appid).hex
# Create the scaffolding for the entire cache.
self._create_scaffolding()
# Acquire a shared cache lock.
self._lock = os.open(
self._path(self._filename_cache_lock),
os.O_RDONLY | os.O_CLOEXEC,
)
linux.fcntl_flock(self._lock, linux.fcntl.F_RDLCK, wait=True)
# Read the cache configuration.
self._load_cache_info()
self._active = True
return self
except BaseException:
self.__exit__(None, None, None)
raise
def __exit__(self, exc_type, exc_value, exc_tb):
# Discard any state of this context and reset to original state.
if self._lock is not None:
linux.fcntl_flock(self._lock, linux.fcntl.F_UNLCK)
os.close(self._lock)
self._lock = None
self._active = False
self._bootid = None
self._info = FsCacheInfo()
# We always have to leave the file-system scaffolding around. Even if
# the cache is entirely empty, we cannot know whether there are other
# parallel accesses (without unreasonable effort).
def _update_cache_size(self, diff: int) -> bool:
"""Update cache size
Update the total cache size by the specified amount, unless it exceeds
the cache limits.
This carefully updates the stored cache size to allow for parallel
updates by other cache users. If the cache limits are exceeded, the
operation is canceled and `False` is returned. Otherwise, `True` is
returned.
If the specified amount is negative, the operation always succeeds. If
the cache size would end up negative, it is capped at 0.
This operation requires an active context.
"""
assert self._is_active()
assert self._is_compatible()
# Open the cache-size and lock it for writing. But instead of writing
# directly to it, we replace it with a new file. This guarantees that
# we cannot crash while writing a partial size, but always atomically
# update the content.
with self._atomic_open(self._filename_cache_size, write=True, wait=True) as fd:
with os.fdopen(fd, "r", closefd=False, encoding="utf8") as f:
size = json.load(f)
if size + diff < 0:
size = 0
elif (self._info_maximum_size < 0) or (size + diff <= self._info_maximum_size):
size = size + diff
else:
return False
with self._atomic_file(self._filename_cache_size, self._dirname_objects, replace=True) as f:
json.dump(size, f)
return True
def _rm_r_object(self, rpath_dir: str):
"""Remove object
Recursively remove all traces of a stored object. This either requires
the caller to hold a write-lock on the entry, or otherwise guarantee
that no cache lookups can acquire the entry concurrently.
This carefully deletes any traces of the entry, making sure to first
mark the object as invalid, and dropping the lock-file last. This can
safely be called on partially constructured or non-existing entries.
Parameters:
-----------
rpath_dir
Relative path from the cache-root to the object directory.
"""
path_dir = self._path(rpath_dir)
path_info = os.path.join(path_dir, self._filename_object_info)
path_lock = os.path.join(path_dir, self._filename_object_lock)
# Optimization: Bail out early if the entry is non-existant
if not os.path.lexists(path_dir):
return
# First step, we unlink the info-file. This will mark the entry as
# volatile and thus it will get cleaned up by cache management in case
# we crash while deleting it. Furthermore, no cache lookups will ever
# consider the entry again if the info-file is missing.
with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR):
os.unlink(path_info)
# Now iterate the directory and drop everything _except_ the lock file.
# This makes sure no parallel operation will needlessly race with us. In
# case no lock is acquired, we still allow for parallel racing cleanups.
#
# Note that racing cleanups might delete the entire directory at any
# time during this iteration. Furthermore, `scandir()` is not atomic but
# repeatedly calls into the kernel. Hence, we carefully bail out once
# it reports a non-existant directory.
with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR):
for entry in os.scandir(path_dir):
if entry.name == self._filename_object_lock:
continue
with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR):
if entry.is_dir():
rmrf.rmtree(entry.path)
else:
os.unlink(entry.path)
# With everything gone, we unlink the lock-file and eventually delete
# the directory. Again, cleanup routines might have raced us, so avoid
# failing in case the entries are already gone.
with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR):
os.unlink(path_lock)
with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR):
os.rmdir(path_dir)
@contextlib.contextmanager
def stage(self):
"""Create staging entry
Create a new entry in the staging area and yield control to the caller
with the relative path to the entry. Once control returns, the staging
entry is completely discarded.
If the application crashes while holding a staging entry, it will be
left behind in the staging directory, but unlocked and marked as stale.
Hence, any cache management routine will discard it.
"""
# We check for an active context, but we never check for
# version-compatibility, because there is no way we can run without
# a staging area. Hence, the staging-area has to be backwards
# compatible at all times.
assert self._is_active()
uuidname = None
lockfd = None
try:
# Create and lock a new anonymous object in the staging area.
uuidname, lockfd = self._atomic_dir(self._dirname_stage)
rpath_data = os.path.join(
self._dirname_stage,
uuidname,
self._dirname_data,
)
# Prepare an empty data directory and yield it to the caller.
os.mkdir(self._path(rpath_data))
yield rpath_data
finally:
if lockfd is not None:
self._rm_r_object(os.path.join(self._dirname_stage, uuidname))
linux.fcntl_flock(lockfd, linux.fcntl.F_UNLCK)
os.close(lockfd)
@contextlib.contextmanager
def store(self, name: str):
"""Store object in cache
Create a new entry and store it in the cache with the specified name.
The entry is first created with an anonymous name and control is yielded
to the caller to fill in data. Once control returns, the entry is
committed with the specified name.
The final commit is skipped if an entry with the given name already
exists, or its name is claimed for other reasons. Furthermore, the
commit is skipped if cache limits are exceeded, or if cache maintenance
refuses the commit. Hence, a commit can never be relied upon and the
entry might be deleted from the cache as soon as the commit was invoked.
Parameters:
-----------
name
Name to store the object under.
"""
assert self._is_active()
assert self._bootid is not None
if not name:
raise ValueError()
# If the cache-version is incompatible to this implementation, we short
# this call into the staging-area (which is always compatible). This
# avoids raising an exception (at the cost of dealing with this in the
# caller), and instead just creates a temporary copy which we discard.
if not self._is_compatible():
with self.stage() as p:
yield p
return
uuidname = None
lockfd = None
try:
# Create and lock a new anonymous object in the staging area.
uuidname, lockfd = self._atomic_dir(self._dirname_objects)
rpath_uuid = os.path.join(
self._dirname_objects,
uuidname,
)
rpath_data = os.path.join(
rpath_uuid,
self._dirname_data,
)
rpath_info = os.path.join(
rpath_uuid,
self._filename_object_info,
)
path_uuid = self._path(rpath_uuid)
path_data = self._path(rpath_data)
path_info = self._path(rpath_info)
# Prepare an empty data directory and yield it to the caller.
os.mkdir(path_data)
yield rpath_data
# Collect metadata about the new entry.
info: Dict[str, Any] = {}
info["creation-boot-id"] = self._bootid
info["size"] = self._calculate_space(path_data)
# Exit early if it never is going to fit
if self._info_maximum_size > -1 and info["size"] > self._info_maximum_size:
return
# Update the total cache-size. If it exceeds the limits, remove
# least recently used objects until there is enough space.
#
# Note that if we crash after updating the total cache size, but
# before committing the object information, the total cache size
# will be out of sync.
#
# However, it is never overcommitted, so we will never
# violate any cache invariants. Future code needs to resync
# the cache (e.g. on open with some simple journal strategy).
if not self._update_cache_size(info["size"]):
# try to free space
self._remove_lru(info["size"])
# and see if the update can happen now
if not self._update_cache_size(info["size"]):
# stil could not free enough space
return
try:
# Commit the object-information, thus marking it as fully
# committed and accounted in the cache.
with open(path_info, "x", encoding="utf8") as f:
json.dump(info, f)
# As last step move the entry to the desired location. If the
# target name is already taken, we bail out and pretend the
# entry was immediately overwritten by another one.
#
# Preferably, we used RENAME_NOREPLACE, but this is not
# available on all file-systems. Hence, we rely on the fact
# that non-empty directories cannot be replaced, so we
# automatically get RENAME_NOREPLACE behavior.
path_name = self._path(self._dirname_objects, name)
try:
os.rename(
src=path_uuid,
dst=path_name,
)
except OSError as e:
ignore = [errno.EEXIST, errno.ENOTDIR, errno.ENOTEMPTY]
if e.errno not in ignore:
raise
uuidname = None
finally:
# If the anonymous entry still exists, it will be cleaned up by
# the outer handler. Hence, make sure to drop the info file
# again and de-account it, so we don't overcommit.
if os.path.lexists(path_uuid):
with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR):
os.unlink(path_info)
self._update_cache_size(-info["size"])
finally:
if lockfd is not None:
if uuidname is not None:
# In case this runs after the object was renamed, but before
# `uuidname` was cleared, then `_rm_r_object()` will be a
# no-op.
self._rm_r_object(os.path.join(self._dirname_objects, uuidname))
linux.fcntl_flock(lockfd, linux.fcntl.F_UNLCK)
os.close(lockfd)
@contextlib.contextmanager
def load(self, name: str):
"""Load a cache entry
Find the cache entry with the given name, acquire a read-lock and
yield its path back to the caller. Once control returns, the entry
is released.
The returned path is the relative path between the cache and the top
level directory of the cache entry.
Parameters:
-----------
name
Name of the cache entry to find.
"""
assert self._is_active()
if not name:
raise ValueError()
if not self._is_compatible():
raise self.MissError()
with contextlib.ExitStack() as es:
# Use an ExitStack so we can catch exceptions raised by the
# `__enter__()` call on the context-manager. We want to catch
# `OSError` exceptions and convert them to cache-misses.
obj_lock_path = os.path.join(
self._dirname_objects, name, self._filename_object_lock)
try:
lock_fd = es.enter_context(
self._atomic_open(
obj_lock_path,
write=False,
wait=False,
)
)
except OSError as e:
if e.errno in [errno.EAGAIN, errno.ENOENT, errno.ENOTDIR]:
raise self.MissError() from None
raise e
libc = linux.Libc.default()
libc.futimens(lock_fd, ctypes.byref(linux.c_timespec_times2(
atime=linux.c_timespec(tv_sec=0, tv_nsec=libc.UTIME_NOW),
mtime=linux.c_timespec(tv_sec=0, tv_nsec=libc.UTIME_OMIT),
)))
yield os.path.join(
self._dirname_objects,
name,
self._dirname_data,
)
def _last_used(self, name: str) -> float:
"""Return the last time the given object was last used.
Note that the resolution is only as good as what the filesystem "atime"
gives us.
"""
obj_lock_path = os.path.join(
self._dirname_objects, name, self._filename_object_lock)
try:
return os.stat(self._path(obj_lock_path)).st_atime
except OSError as e:
if e.errno in [errno.EAGAIN, errno.ENOENT, errno.ENOTDIR]:
raise self.MissError() from None
raise e
def _last_used_objs(self) -> List[FsCacheObjectInfo]:
"""Return a list of FsCacheObjectInfo with name, last_used
information sorted by last_used time.
Note that this function will be racy when used without a lock and
the caller needs to handle this.
"""
objs = []
for name in os.listdir(self._path(self._dirname_objects)):
try:
last_used = self._last_used(name)
except (OSError, FsCache.MissError):
continue
objs.append(FsCacheObjectInfo(name=name, last_used=last_used))
return sorted(objs, key=lambda obj: obj.last_used)
def _remove_lru(self, required_size):
""""
Make room in the cache for "required_size" by remove the least
recently used entry from the cache. Note that the cache may
clear more than required_size.
"""
# To avoid having to take a global cache lock the strategy is:
# 1. Get list of (object, last_used) sorted from oldest to newest.
# This is racy so we need to take care of that in step(2).
# 2. Start with the oldest entry, try to take a write_lock
# (with O_NOATIME to be extra sure that atime information is
# correct). Get the "last_used" (atime) time and compare to what
# we expect in the list. If it diverges the object got load()ed
# while we iterated. Skip it and go to (2) again.
# 3. Remove entry, update cache size after the entry is removed.
#
# Note that there is a risk to get out-of-sync in (3). If the
# process dies while removing and before updating the cache
# size the cache will be over reported.
# Try to clean at least twice the requested size to avoid having
# to do this all over again
try_to_free = required_size * 2
freed_so_far = 0
for name, last_used in self._last_used_objs():
# take write lock for the indivdual object
rpath = os.path.join(self._dirname_objects, name)
rpath_lock = os.path.join(rpath, self._filename_object_lock)
# Ideally there would some lock helper instead of the low-level
# file manipulation to abstract this a bit more.
try:
with self._atomic_open(
rpath_lock,
wait=False,
write=True,
# atime carries the "last-used" data so don't alter it
oflags=os.O_EXCL | os.O_NOATIME,
):
if last_used != self._last_used(name):
continue
# This is racy right now if the process is killed
# during "_rm_r_object(rpath)" because then the
# cache size is never reduced by the amount that
# was about to be deleted.
#
# To fix it we need to (atomic) rename the
# "object.info" file in _rm_r_object() to
# something like "object.removing". Then when
# opening the cache scan for leftover
# "object.removing" files and finish the cleanup
# and update the cache size based on the size
# recorded inside "object.removing".
size = self._calculate_space(self._path(rpath))
self._rm_r_object(rpath)
self._update_cache_size(-size)
freed_so_far += size
if freed_so_far >= try_to_free:
break
except BlockingIOError:
continue
# return True if at least the required size got freed
return freed_so_far > required_size
@property
def info(self) -> FsCacheInfo:
"""Query Cache Information
Return the parsed cache information which is currently cached on this
cache-instance. The cache information has all unknown fields stripped.
Unset values are represented by `None`, and the cache will interpret
it as the default value for the respective field.
"""
assert self._is_active()
return self._info
@info.setter
def info(self, info: FsCacheInfo):
"""Write Cache Information
Update and write the cache-information onto the file-system. This first
locks the cache-information file, reads it in, updates the newly read
information with the data from `info`, writes the result back to disk
and finally unlocks the file.
There are a few caveats to take into account:
* The locking guarantees that simultaneous updates will be properly
ordered and never discard any information.
* Since this reads in the newest cache-information, this function can
update cache-information values other than the ones from `info`. Any
value unset in `info` will be re-read from disk and thus might
change (in the future, if required, this can be adjusted to allow a
caller to hook into the operation while the lock is held).
* You cannot strip known values from the cache-information. Any value
not present in `info` is left unchanged. You must explicitly set a
value to its default to reset it.
* Cache-information fields that are not known to this implementation
are never exposed to the caller, but are left unchanged on-disk.
This guarantees that future extensions are left alone and are not
accidentally stripped.
The cached information of this instance is updated to reflect the
changes.
Parameters:
-----------
info
Cache information object to consume and write.
"""
assert self._is_active()
with self._atomic_open(self._filename_cache_info, write=True, wait=True) as fd:
with os.fdopen(fd, "r", closefd=False, encoding="utf8") as f:
info_raw = json.load(f)
# If the on-disk data is in an unexpected format, we never touch
# it. If it is a JSON-object, we update it with the new values and
# then re-parse it into a full `FsCacheInfo` with all known fields
# populated.
if isinstance(info_raw, dict):
info_raw.update(info.to_json())
info = FsCacheInfo.from_json(info_raw)
# Replace the file with the new values. This releases the lock.
if self._is_compatible():
with self._atomic_file(self._filename_cache_info, self._dirname_objects, replace=True) as f:
json.dump(info_raw, f)
self._load_cache_info(info)
def store_tree(self, name: str, tree: Any):
"""Store file system tree in cache
Create a new entry in the object store containing a copy of the file
system tree specified as `tree`. This behaves like `store()` but instead
of providing a context to the caller it will copy the specified tree.
Similar to `store()`, when the entry is committed it is immediately
unlocked and released to the cache. This means it might vanish at any
moment due to a parallel cleanup. Hence, a caller cannot rely on the
object being available in the cache once this call returns.
If `tree` points to a file, the file is copied. If it points to a
directory, the entire directory tree is copied including the root entry
itself. To copy an entire directory without its root entry, use the
`path/.` notation. Links are never followed but copied verbatim.
All metadata is preserved, if possible.
Parameters:
-----------
name
Name to store the object under.
tree:
Path to the file system tree to copy.
"""
with self.store(name) as rpath_data:
r = subprocess.run(
[
"cp",
"--reflink=auto",
"-a",
"--",
os.fspath(tree),
self._path(rpath_data),
],
check=False,
encoding="utf-8",
stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
)
if r.returncode != 0:
code = r.returncode
msg = r.stdout.strip()
raise RuntimeError(f"Cannot copy into file-system cache ({code}): {msg}")