"""File System Cache This module implements a data cache that uses the file system to store data as well as protect parallel access. It implements automatic cache management and allows purging the cache during runtime, pruning old entries and keeping the cache under a given limit. """ # pylint: disable=too-many-lines import contextlib import ctypes import errno import json import os import subprocess import uuid from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union from osbuild.util import ctx, linux, rmrf __all__ = [ "FsCache", "FsCacheInfo", ] MaximumSizeType = Optional[Union[int, str]] class FsCacheInfo(NamedTuple): """File System Cache Information This type represents static cache information. It is an immutable named tuple and used to query or set the configuration of a cache. creation_boot_id - Hashed linux boot-id at the time of cache-creation maximum_size - Maximum cache size in bytes, or "unlimited" version - version of the cache data structures """ creation_boot_id: Optional[str] = None maximum_size: MaximumSizeType = None version: Optional[int] = None @classmethod def from_json(cls, data: Any) -> "FsCacheInfo": """Create tuple from parsed JSON This takes a parsed JSON value and converts it into a tuple with the same information. Unknown fields in the input are ignored. The input is usually taken from `json.load()` and similar. """ if not isinstance(data, dict): return cls() creation_boot_id = None maximum_size: MaximumSizeType = None version = None # parse "creation-boot-id" _creation_boot_id = data.get("creation-boot-id") if isinstance(_creation_boot_id, str) and len(_creation_boot_id) == 32: creation_boot_id = _creation_boot_id # parse "maximum-size" _maximum_size = data.get("maximum-size") if isinstance(_maximum_size, int): maximum_size = _maximum_size elif isinstance(_maximum_size, str) and _maximum_size == "unlimited": maximum_size = "unlimited" # parse "version" _version = data.get("version") if isinstance(_version, int): version = _version # create immutable tuple return cls( creation_boot_id, maximum_size, version, ) def to_json(self) -> Dict[str, Any]: """Convert tuple into parsed JSON Return a parsed JSON value that represents the same values as this tuple does. Unset values are skipped. The returned value can be converted into formatted JSON via `json.dump()` and similar. """ data: Dict[str, Any] = {} if self.creation_boot_id is not None: data["creation-boot-id"] = self.creation_boot_id if self.maximum_size is not None: data["maximum-size"] = self.maximum_size if self.version is not None: data["version"] = self.version return data class FsCacheObjectInfo(NamedTuple): """ File System Cache object information This type represents information about a single cache object. The last_used information is only guaranteed to be valid while the cache is locked. """ name: str last_used: float class FsCache(contextlib.AbstractContextManager, os.PathLike): """File System Cache This file system cache context represents an on-disk cache. That is, it allows storing information on the file system, and retrieving it from other contexts. A single cache directory can be shared between many processes at the same time. The cache protects access to the cached data. The cache must not be shared over non-coherent network storage, but is designed for system-local linux file-systems. The file-system layout is as follows: [cache]/ ├── cache.info ├── cache.lock ├── cache.size ├── objects/ │ ├── [id0] │ ├── [id1]/ │ │ ├── data/ │ │ │ └── ... │ │ ├── object.info │ │ └── object.lock │ └── ... └── stage/ ├── uuid-[uuid0] ├── uuid-[uuid1]/ │ ├── data/ │ │ └── ... │ ├── object.info │ └── object.lock └── ... The central data store is in the `objects` subdirectory. Every cache entry has a separate subdirectory there. To guard access, a read-lock on `object.lock` is required for all readers, a write-lock is required for all writers. Static information about the object is available in the `object.info` file. As an optimization, entries in the object store consisting of a single file can be stored directly underneath `objects` without a separate subdirectory hierarchy. Their guarding lock is directly taken on this file and no metadata is available, other than the file information itself. This is used extensively by the cache management to prepare objects for atomic replacements. Due to lack of metadata, they are volatile and can be deleted as soon as they are unlocked. Generally, access to the cache is non-blocking. That is, if a read-lock cannot be acquired, an entry is considered non-existant. Thus, unless treated as a `write-once` cache, cache efficiency will decrease when taking write-locks. The `data/` directory contains the content of a cache entry. Its content is solely defined by the creator of the entry and the cache makes no assumptions about its layout. Note that the `data/` directory itself can be modified (e.g., permission-changes) if an unnamed top-level directory is desired (e.g., to store a directory tree). Additionally to the `objects/` directory, a similar `stage/` directory is provided. This directory is `write-only` and used to prepare entries for the object store before committing them. The staging area is optional. It is completely safe to do the same directly in the object store. However, the separation allows putting the staging area on a different file-system (e.g., symlinking to a tmpfs), and thus improving performance for larger operations. Otherwise, the staging area follows the same rules as the object store, except that only writers are expected. Hence, staging entries always use a unique UUID as name. To commit a staging entry, a user is expected to create an entry in the object store and copy/move the `data/` directory over. To guard against parallel accesses, a set of locks is utilized. Generally, a `*.lock`-file locks the directory it is in, while a lock on any other file just locks that file (unfortunately, we cannot acquire write-locks on directories directly, since it would require opening them for writing, which is not possible on linux). `cache.lock` can be used to guard the entire cache. A write-lock will keep any other parallel operation out, while a read-lock merely acquires cache access (you are still allowed to modify the cache, but need fine-grained locking). Hence, a write-lock on the global `cache.lock` file is only required for operations that cannot use fine-grained locking. The latter requires individual locking for each file or each object store entry you modify. In all those cases you must ensure for parallel modifications, since lock acquisition on file-systems can only be done after opening a file. """ class MissError(Exception): """Cache Miss Exception This error is raised when a cache entry is not found. Due to the shared nature of the cache, a caller must be aware that any entry can be created or deleted by other concurrent operations, at any point in time. Hence, a cache miss only reflects the state of the cache at a particular time under a particular lock. """ # static parameters _dirname_data = "data" _dirname_objects = "objects" _dirname_stage = "stage" _filename_cache_info = "cache.info" _filename_cache_lock = "cache.lock" _filename_cache_size = "cache.size" _filename_cache_tag = "CACHEDIR.TAG" _filename_object_info = "object.info" _filename_object_lock = "object.lock" _version_current = 1 _version_minimum = 1 # constant properties _appid: str _tracers: Dict[str, Any] _path_cache: Any # context-manager properties _active: bool _bootid: Optional[str] _lock: Optional[int] _info: FsCacheInfo _info_maximum_size: int def __init__(self, appid: str, path_cache: Any): """Create File System Cache This creates a new file-system cache. It does not create the cache, nor access any of its content. You must enter its context-manager to prepare the cache for access. Any access outside of a context-manager will raise an assertion error, unless explicitly stated otherwise. Parameters: ----------- appid The application-ID of the caller. This can be any random string. It is used to initialize the application-specific boot-ID used to tag caches and detect whether an entry was created during the same boot. path_cache The path to the cache directory. The directory (and the path to it) is created if it does not exist. """ self._appid = appid self._tracers = {} self._path_cache = os.fspath(path_cache) self._active = False self._bootid = None self._lock = None self._info = FsCacheInfo() self._info_maximum_size = 0 def _trace(self, trace: str): """Trace execution Execute registered trace-hooks for the given trace string. This allows tests to register callbacks that are executed at runtime at a specific location in the code. During normal operation, no such hooks should be used. The trace-hooks are used to trigger race-conditions during tests and verify they are handled gracefully. Parameters: ----------- trace The trace-hook to run. """ if trace in self._tracers: self._tracers[trace]() @staticmethod def _calculate_space(path_target: str) -> int: """Calculate total space of a directory tree Calculate the total amount of storage required for a directory tree in bytes. This does not account for metadata, but only for stored file content. Note that this may differ from the sum of the file sizes as it takes sparse files into account. Parameters: ----------- path_target File-system path to the directory to operate on. """ return os.lstat(path_target).st_blocks * 512 + sum( os.lstat( os.path.join(path, f) ).st_blocks * 512 for path, dirs, files in os.walk( path_target ) for f in files + dirs ) def __fspath__(self) -> Any: """Return cache path Return the path to this cache as provided to the constructor of the cache. No conversions are applied, so the path is absolute if the path as provided by the caller was absolute, and vice-versa. This is part of the `os.PathLike` interface. See its documentation. """ return self._path_cache def _path(self, *rpaths): """Return absolute path into cache location Take the relative path from the caller and turn it into an absolute path. Since most operations take a relative path from the cache root to a cache location, this function can be used to make those paths absolute. Parameters: ----------- rpaths Relative paths from cache root to the desired cache location. """ return os.path.join(self, *rpaths) @contextlib.contextmanager def _atomic_open( self, rpath: str, *, wait: bool, write: bool, closefd: bool = True, oflags: int = 0, ): """Atomically open and lock file Open the cache-file at the specified relative path and acquire a lock on it. Yield the file-descriptor to the caller. Once control returns, all locks are released (if not already done so by the caller) and the file-descriptor is closed. Note that this operation involves a retry-loop in case the file is replaced or moved before the lock is acquired. Parameters: ----------- rpath Relative path from the cache-root to the file to open. wait Whether to wait for locks to be acquired. write If false, the file is opened for reading and a read lock is acquired. If true, it is opened for read and write and a write lock is acquired. closefd If false, retain file-descriptor (and lock) on success. oflags Additional open-flags to pass to `os.open()`. """ fd = None path = self._path(rpath) try: while True: # Open the file and acquire a lock. Make sure not to modify the # file in any way, ever. If non-blocking operation was requested # the lock call will raise `EAGAIN` if contended. flags = os.O_RDONLY | os.O_CLOEXEC | oflags lock = linux.fcntl.F_RDLCK if write: flags = flags | os.O_RDWR lock = linux.fcntl.F_WRLCK self._trace("_atomic_open:open") fd = os.open(path, flags, 0o644) self._trace("_atomic_open:lock") linux.fcntl_flock(fd, lock, wait=wait) # The file might have been replaced between opening it and # acquiring the lock. Hence, run `stat(2)` on the path again # and compare it to `fstat(2)` of the open file. If they differ # simply retry. # On NFS, the lock-acquisition has invalidated the caches, hence # the metadata is refetched. On linux, the first query will # succeed and reflect the drop in link-count. Every further # query will yield `ESTALE`. Yet, we cannot rely on being the # first to query, so proceed carefully. # On non-NFS, information is coherent and we can simply proceed # comparing the DEV+INO information to see whether the file was # replaced. retry = False try: st_fd = os.stat(fd) except OSError as e: if e.errno != errno.ESTALE: raise retry = True try: st_path = os.stat(path) except OSError as e: if e.errno not in [errno.ENOENT, errno.ESTALE]: raise retry = True if retry or st_fd.st_dev != st_path.st_dev or st_fd.st_ino != st_path.st_ino: linux.fcntl_flock(fd, linux.fcntl.F_UNLCK) os.close(fd) fd = None continue # Yield control to the caller to make use of the FD. If the FD # is to be retained, clear it before returning to the cleanup # handlers. yield fd if not closefd: fd = None return finally: if fd is not None: linux.fcntl_flock(fd, linux.fcntl.F_UNLCK) os.close(fd) @contextlib.contextmanager def _atomic_file( self, rpath: str, rpath_store: str, closefd: bool = True, ignore_exist: bool = False, replace: bool = False, ): """Create and link temporary file Create a new temporary file and yield control to the caller to fill in data and metadata. Once control is returned, the file is linked at the specified location. If an exception is raised, the temporary file is discarded. This function emulates the behavior of `O_TMPFILE` for systems and file-systems where it is not available. Parameters: ----------- rpath Relative path from cache-root to the location where to link the file on success. rpath_store Relative path from cache-root to the store to use for temporary files. This must share the same mount-instance as the final path. closefd If false, retain file-descriptor (and lock) on success. ignore_exist If true, an existing file at the desired location during a replacement will not cause an error. replace If true, replace a previous file at the specified location. If false, no replacement takes place and the temporary file is discarded. """ assert not replace or not ignore_exist rpath_tmp = None try: # First create a random file in the selected store. This file will # have a UUID as name and thus we can safely use `O_CREAT|O_EXCL` # to create it and guarantee its uniqueness. name = "uuid-" + uuid.uuid4().hex rpath_tmp = os.path.join(rpath_store, name) with self._atomic_open( rpath_tmp, wait=True, write=True, closefd=closefd, oflags=os.O_CREAT | os.O_EXCL, ) as fd: # Yield control to the caller to fill in data and metadata. with os.fdopen(fd, "r+", closefd=False, encoding="utf8") as file: yield file suppress = [] if ignore_exist: suppress.append(errno.EEXIST) if replace: # Move the file into the desired location, possibly # replacing any existing entry. os.rename( src=self._path(rpath_tmp), dst=self._path(rpath), ) else: # Preferably, we used `RENAME_NOREPLACE`, but this is not # supported on NFS. Instead, we create a hard-link, which # will fail if the target already exists. We rely on the # cleanup-path to drop the original link. with ctx.suppress_oserror(*suppress): os.link( src=self._path(rpath_tmp), dst=self._path(rpath), follow_symlinks=False, ) finally: if rpath_tmp is not None: # If the temporary file exists, we delete it. If we haven't # created it, or if we already moved it, this will be a no-op. # Due to the unique name, we will never delete a file we do not # own. If we hard-linked the file, this merely deletes the # original temporary link. # On fatal errors, we leak the file into the object store. Due # to the released lock and UUID name, cache management will # clean it up. with ctx.suppress_oserror(errno.ENOENT): os.unlink(self._path(rpath_tmp)) def _atomic_dir(self, rpath_store: str) -> Tuple[str, int]: """Atomically create and lock an anonymous directory Create an anonymous directory in the specified storage directory relative to the cache-root. The directory will have a UUID as name. On success, the name of the directory and the open file-descriptor to its acquired lock file (write-locked) are returned. The lock-file logic follows the cache-logic for objects. Hence, the cache scaffolding for the specified store must exist. No other cache infrastructure is required, though. Parameters: ----------- rpath_store Relative path from the cache-root to the storage directory to create the new anonymous directory in. Most likely, this is either the object-store or the staging-area. """ rpath_dir = None rpath_lock = None try: while True: # Allocate a UUID for the new directory and prepare the paths # to the directory and lock-file inside. name = "uuid-" + uuid.uuid4().hex rpath_dir = os.path.join(rpath_store, name) rpath_lock = os.path.join(rpath_dir, self._filename_object_lock) # Create an anonymous lock-file, but before linking it create # the target directory to link the file in. Use an ExitStack # to control exactly where to catch exceptions. with contextlib.ExitStack() as es: f = es.enter_context( self._atomic_file( rpath_lock, rpath_store, closefd=False, ) ) lockfd = f.fileno() os.mkdir(self._path(rpath_dir)) # Exit the `_atomic_file()` context, thus triggering a link # of the anonymous lock-file into the new directory. A # parallel cleanup might have deleted the empty directory, # so catch `ENOENT` and retry. try: es.close() except OSError as e: if e.errno == errno.ENOENT: continue raise return (name, lockfd) except BaseException: # On error, we might have already created the directory or even # linked the lock-file. Try unlinking both, but ignore errors if # they do not exist. Due to using UUIDs as names we cannot conflict # with entries created by some-one else. if rpath_lock is not None: with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR): os.unlink(self._path(rpath_lock)) if rpath_dir is not None: with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR): os.rmdir(self._path(rpath_dir)) raise def _create_scaffolding(self): """Create cache scaffolding Create the directories leading to the cache, as well as the internal scaffolding directories and files. This ensures that an existing cache is not interrupted or rewritten. Hence, this can safely be called in parallel, even on live caches. If this happens to create a new cache, it is initialized with its default configuration and constraints. By default, this means the cache has a maximum size of 0 and thus is only used as staging area with no long-time storage. This call requires no cache-infrastructure to be in place, and can be called repeatedly at any time. """ # Create the directory-scaffolding of the cache. Make sure to ignore # errors when they already exist, to allow for parallel setups. dirs = [ self._path(self._dirname_objects), self._path(self._dirname_stage), ] for i in dirs: os.makedirs(i, exist_ok=True) # Create the file-scaffolding of the cache. We fill in the default # information and ignore racing operations. with self._atomic_file(self._filename_cache_tag, self._dirname_objects, ignore_exist=True) as f: f.write( "Signature: 8a477f597d28d172789f06886806bc55\n" "# This is a cache directory tag created by osbuild (see https://bford.info/cachedir/)\n" ) with self._atomic_file(self._filename_cache_info, self._dirname_objects, ignore_exist=True) as f: json.dump({"version": self._version_current}, f) with self._atomic_file(self._filename_cache_lock, self._dirname_objects, ignore_exist=True) as f: pass with self._atomic_file(self._filename_cache_size, self._dirname_objects, ignore_exist=True) as f: f.write("0") def _load_cache_info(self, info: Optional[FsCacheInfo] = None): """Load cache information This loads information about the cache into this cache-instance. The cache-information is itself cached on this instance and only updated on request. If the underlying file in the cache changes at runtime it is not automatically re-loaded. Only when this function is called the information is reloaded. By default this function reads the cache-information from the respective file in the cache and then caches it on this instance. If the `info` argument is not `None`, then no information is read from the file-system, but instead the information is taken from the `info` argument. This allows changing the cache-information of this instance without necessarily modifying the underlying file. This call requires the cache scaffolding to be fully created. Parameters: ----------- info If `None`, the cache info file is read. Otherwise, the information is taken from this tuple. """ # Parse the JSON data into python. if info is None: with open(self._path(self._filename_cache_info), "r", encoding="utf8") as f: info_raw = json.load(f) info = FsCacheInfo.from_json(info_raw) # Retain information. self._info = info # Parse `maximum-size` into internal representation. if info.maximum_size == "unlimited": self._info_maximum_size = -1 elif isinstance(info.maximum_size, int): self._info_maximum_size = info.maximum_size elif info.maximum_size is None: self._info_maximum_size = 0 else: raise ValueError( f"maximum-size can only be set to 'unlimited' or an integer value, got {type(info.maximum_size)}") def _is_active(self): # Internal helper to verify we are in an active context-manager. return self._active def _is_compatible(self): # Internal helper to verify the cache-version is supported. return self._info.version is not None and \ self._version_minimum <= self._info.version <= self._version_current def __enter__(self): assert not self._active try: # Acquire the current boot-id so we can tag entries accordingly, and # judge entries that are from previous boots. self._bootid = linux.proc_boot_id(self._appid).hex # Create the scaffolding for the entire cache. self._create_scaffolding() # Acquire a shared cache lock. self._lock = os.open( self._path(self._filename_cache_lock), os.O_RDONLY | os.O_CLOEXEC, ) linux.fcntl_flock(self._lock, linux.fcntl.F_RDLCK, wait=True) # Read the cache configuration. self._load_cache_info() self._active = True return self except BaseException: self.__exit__(None, None, None) raise def __exit__(self, exc_type, exc_value, exc_tb): # Discard any state of this context and reset to original state. if self._lock is not None: linux.fcntl_flock(self._lock, linux.fcntl.F_UNLCK) os.close(self._lock) self._lock = None self._active = False self._bootid = None self._info = FsCacheInfo() # We always have to leave the file-system scaffolding around. Even if # the cache is entirely empty, we cannot know whether there are other # parallel accesses (without unreasonable effort). def _update_cache_size(self, diff: int) -> bool: """Update cache size Update the total cache size by the specified amount, unless it exceeds the cache limits. This carefully updates the stored cache size to allow for parallel updates by other cache users. If the cache limits are exceeded, the operation is canceled and `False` is returned. Otherwise, `True` is returned. If the specified amount is negative, the operation always succeeds. If the cache size would end up negative, it is capped at 0. This operation requires an active context. """ assert self._is_active() assert self._is_compatible() # Open the cache-size and lock it for writing. But instead of writing # directly to it, we replace it with a new file. This guarantees that # we cannot crash while writing a partial size, but always atomically # update the content. with self._atomic_open(self._filename_cache_size, write=True, wait=True) as fd: with os.fdopen(fd, "r", closefd=False, encoding="utf8") as f: size = json.load(f) if size + diff < 0: size = 0 elif (self._info_maximum_size < 0) or (size + diff <= self._info_maximum_size): size = size + diff else: return False with self._atomic_file(self._filename_cache_size, self._dirname_objects, replace=True) as f: json.dump(size, f) return True def _rm_r_object(self, rpath_dir: str): """Remove object Recursively remove all traces of a stored object. This either requires the caller to hold a write-lock on the entry, or otherwise guarantee that no cache lookups can acquire the entry concurrently. This carefully deletes any traces of the entry, making sure to first mark the object as invalid, and dropping the lock-file last. This can safely be called on partially constructured or non-existing entries. Parameters: ----------- rpath_dir Relative path from the cache-root to the object directory. """ path_dir = self._path(rpath_dir) path_info = os.path.join(path_dir, self._filename_object_info) path_lock = os.path.join(path_dir, self._filename_object_lock) # Optimization: Bail out early if the entry is non-existant if not os.path.lexists(path_dir): return # First step, we unlink the info-file. This will mark the entry as # volatile and thus it will get cleaned up by cache management in case # we crash while deleting it. Furthermore, no cache lookups will ever # consider the entry again if the info-file is missing. with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR): os.unlink(path_info) # Now iterate the directory and drop everything _except_ the lock file. # This makes sure no parallel operation will needlessly race with us. In # case no lock is acquired, we still allow for parallel racing cleanups. # # Note that racing cleanups might delete the entire directory at any # time during this iteration. Furthermore, `scandir()` is not atomic but # repeatedly calls into the kernel. Hence, we carefully bail out once # it reports a non-existant directory. with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR): for entry in os.scandir(path_dir): if entry.name == self._filename_object_lock: continue with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR): if entry.is_dir(): rmrf.rmtree(entry.path) else: os.unlink(entry.path) # With everything gone, we unlink the lock-file and eventually delete # the directory. Again, cleanup routines might have raced us, so avoid # failing in case the entries are already gone. with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR): os.unlink(path_lock) with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR): os.rmdir(path_dir) @contextlib.contextmanager def stage(self): """Create staging entry Create a new entry in the staging area and yield control to the caller with the relative path to the entry. Once control returns, the staging entry is completely discarded. If the application crashes while holding a staging entry, it will be left behind in the staging directory, but unlocked and marked as stale. Hence, any cache management routine will discard it. """ # We check for an active context, but we never check for # version-compatibility, because there is no way we can run without # a staging area. Hence, the staging-area has to be backwards # compatible at all times. assert self._is_active() uuidname = None lockfd = None try: # Create and lock a new anonymous object in the staging area. uuidname, lockfd = self._atomic_dir(self._dirname_stage) rpath_data = os.path.join( self._dirname_stage, uuidname, self._dirname_data, ) # Prepare an empty data directory and yield it to the caller. os.mkdir(self._path(rpath_data)) yield rpath_data finally: if lockfd is not None: self._rm_r_object(os.path.join(self._dirname_stage, uuidname)) linux.fcntl_flock(lockfd, linux.fcntl.F_UNLCK) os.close(lockfd) @contextlib.contextmanager def store(self, name: str): """Store object in cache Create a new entry and store it in the cache with the specified name. The entry is first created with an anonymous name and control is yielded to the caller to fill in data. Once control returns, the entry is committed with the specified name. The final commit is skipped if an entry with the given name already exists, or its name is claimed for other reasons. Furthermore, the commit is skipped if cache limits are exceeded, or if cache maintenance refuses the commit. Hence, a commit can never be relied upon and the entry might be deleted from the cache as soon as the commit was invoked. Parameters: ----------- name Name to store the object under. """ assert self._is_active() assert self._bootid is not None if not name: raise ValueError() # If the cache-version is incompatible to this implementation, we short # this call into the staging-area (which is always compatible). This # avoids raising an exception (at the cost of dealing with this in the # caller), and instead just creates a temporary copy which we discard. if not self._is_compatible(): with self.stage() as p: yield p return uuidname = None lockfd = None try: # Create and lock a new anonymous object in the staging area. uuidname, lockfd = self._atomic_dir(self._dirname_objects) rpath_uuid = os.path.join( self._dirname_objects, uuidname, ) rpath_data = os.path.join( rpath_uuid, self._dirname_data, ) rpath_info = os.path.join( rpath_uuid, self._filename_object_info, ) path_uuid = self._path(rpath_uuid) path_data = self._path(rpath_data) path_info = self._path(rpath_info) # Prepare an empty data directory and yield it to the caller. os.mkdir(path_data) yield rpath_data # Collect metadata about the new entry. info: Dict[str, Any] = {} info["creation-boot-id"] = self._bootid info["size"] = self._calculate_space(path_data) # Exit early if it never is going to fit if self._info_maximum_size > -1 and info["size"] > self._info_maximum_size: return # Update the total cache-size. If it exceeds the limits, remove # least recently used objects until there is enough space. # # Note that if we crash after updating the total cache size, but # before committing the object information, the total cache size # will be out of sync. # # However, it is never overcommitted, so we will never # violate any cache invariants. Future code needs to resync # the cache (e.g. on open with some simple journal strategy). if not self._update_cache_size(info["size"]): # try to free space self._remove_lru(info["size"]) # and see if the update can happen now if not self._update_cache_size(info["size"]): # stil could not free enough space return try: # Commit the object-information, thus marking it as fully # committed and accounted in the cache. with open(path_info, "x", encoding="utf8") as f: json.dump(info, f) # As last step move the entry to the desired location. If the # target name is already taken, we bail out and pretend the # entry was immediately overwritten by another one. # # Preferably, we used RENAME_NOREPLACE, but this is not # available on all file-systems. Hence, we rely on the fact # that non-empty directories cannot be replaced, so we # automatically get RENAME_NOREPLACE behavior. path_name = self._path(self._dirname_objects, name) try: os.rename( src=path_uuid, dst=path_name, ) except OSError as e: ignore = [errno.EEXIST, errno.ENOTDIR, errno.ENOTEMPTY] if e.errno not in ignore: raise uuidname = None finally: # If the anonymous entry still exists, it will be cleaned up by # the outer handler. Hence, make sure to drop the info file # again and de-account it, so we don't overcommit. if os.path.lexists(path_uuid): with ctx.suppress_oserror(errno.ENOENT, errno.ENOTDIR): os.unlink(path_info) self._update_cache_size(-info["size"]) finally: if lockfd is not None: if uuidname is not None: # In case this runs after the object was renamed, but before # `uuidname` was cleared, then `_rm_r_object()` will be a # no-op. self._rm_r_object(os.path.join(self._dirname_objects, uuidname)) linux.fcntl_flock(lockfd, linux.fcntl.F_UNLCK) os.close(lockfd) @contextlib.contextmanager def load(self, name: str): """Load a cache entry Find the cache entry with the given name, acquire a read-lock and yield its path back to the caller. Once control returns, the entry is released. The returned path is the relative path between the cache and the top level directory of the cache entry. Parameters: ----------- name Name of the cache entry to find. """ assert self._is_active() if not name: raise ValueError() if not self._is_compatible(): raise self.MissError() with contextlib.ExitStack() as es: # Use an ExitStack so we can catch exceptions raised by the # `__enter__()` call on the context-manager. We want to catch # `OSError` exceptions and convert them to cache-misses. obj_lock_path = os.path.join( self._dirname_objects, name, self._filename_object_lock) try: lock_fd = es.enter_context( self._atomic_open( obj_lock_path, write=False, wait=False, ) ) except OSError as e: if e.errno in [errno.EAGAIN, errno.ENOENT, errno.ENOTDIR]: raise self.MissError() from None raise e libc = linux.Libc.default() libc.futimens(lock_fd, ctypes.byref(linux.c_timespec_times2( atime=linux.c_timespec(tv_sec=0, tv_nsec=libc.UTIME_NOW), mtime=linux.c_timespec(tv_sec=0, tv_nsec=libc.UTIME_OMIT), ))) yield os.path.join( self._dirname_objects, name, self._dirname_data, ) def _last_used(self, name: str) -> float: """Return the last time the given object was last used. Note that the resolution is only as good as what the filesystem "atime" gives us. """ obj_lock_path = os.path.join( self._dirname_objects, name, self._filename_object_lock) try: return os.stat(self._path(obj_lock_path)).st_atime except OSError as e: if e.errno in [errno.EAGAIN, errno.ENOENT, errno.ENOTDIR]: raise self.MissError() from None raise e def _last_used_objs(self) -> List[FsCacheObjectInfo]: """Return a list of FsCacheObjectInfo with name, last_used information sorted by last_used time. Note that this function will be racy when used without a lock and the caller needs to handle this. """ objs = [] for name in os.listdir(self._path(self._dirname_objects)): try: last_used = self._last_used(name) except (OSError, FsCache.MissError): continue objs.append(FsCacheObjectInfo(name=name, last_used=last_used)) return sorted(objs, key=lambda obj: obj.last_used) def _remove_lru(self, required_size): """" Make room in the cache for "required_size" by remove the least recently used entry from the cache. Note that the cache may clear more than required_size. """ # To avoid having to take a global cache lock the strategy is: # 1. Get list of (object, last_used) sorted from oldest to newest. # This is racy so we need to take care of that in step(2). # 2. Start with the oldest entry, try to take a write_lock # (with O_NOATIME to be extra sure that atime information is # correct). Get the "last_used" (atime) time and compare to what # we expect in the list. If it diverges the object got load()ed # while we iterated. Skip it and go to (2) again. # 3. Remove entry, update cache size after the entry is removed. # # Note that there is a risk to get out-of-sync in (3). If the # process dies while removing and before updating the cache # size the cache will be over reported. # Try to clean at least twice the requested size to avoid having # to do this all over again try_to_free = required_size * 2 freed_so_far = 0 for name, last_used in self._last_used_objs(): # take write lock for the indivdual object rpath = os.path.join(self._dirname_objects, name) rpath_lock = os.path.join(rpath, self._filename_object_lock) # Ideally there would some lock helper instead of the low-level # file manipulation to abstract this a bit more. try: with self._atomic_open( rpath_lock, wait=False, write=True, # atime carries the "last-used" data so don't alter it oflags=os.O_EXCL | os.O_NOATIME, ): if last_used != self._last_used(name): continue # This is racy right now if the process is killed # during "_rm_r_object(rpath)" because then the # cache size is never reduced by the amount that # was about to be deleted. # # To fix it we need to (atomic) rename the # "object.info" file in _rm_r_object() to # something like "object.removing". Then when # opening the cache scan for leftover # "object.removing" files and finish the cleanup # and update the cache size based on the size # recorded inside "object.removing". size = self._calculate_space(self._path(rpath)) self._rm_r_object(rpath) self._update_cache_size(-size) freed_so_far += size if freed_so_far >= try_to_free: break except BlockingIOError: continue # return True if at least the required size got freed return freed_so_far > required_size @property def info(self) -> FsCacheInfo: """Query Cache Information Return the parsed cache information which is currently cached on this cache-instance. The cache information has all unknown fields stripped. Unset values are represented by `None`, and the cache will interpret it as the default value for the respective field. """ assert self._is_active() return self._info @info.setter def info(self, info: FsCacheInfo): """Write Cache Information Update and write the cache-information onto the file-system. This first locks the cache-information file, reads it in, updates the newly read information with the data from `info`, writes the result back to disk and finally unlocks the file. There are a few caveats to take into account: * The locking guarantees that simultaneous updates will be properly ordered and never discard any information. * Since this reads in the newest cache-information, this function can update cache-information values other than the ones from `info`. Any value unset in `info` will be re-read from disk and thus might change (in the future, if required, this can be adjusted to allow a caller to hook into the operation while the lock is held). * You cannot strip known values from the cache-information. Any value not present in `info` is left unchanged. You must explicitly set a value to its default to reset it. * Cache-information fields that are not known to this implementation are never exposed to the caller, but are left unchanged on-disk. This guarantees that future extensions are left alone and are not accidentally stripped. The cached information of this instance is updated to reflect the changes. Parameters: ----------- info Cache information object to consume and write. """ assert self._is_active() with self._atomic_open(self._filename_cache_info, write=True, wait=True) as fd: with os.fdopen(fd, "r", closefd=False, encoding="utf8") as f: info_raw = json.load(f) # If the on-disk data is in an unexpected format, we never touch # it. If it is a JSON-object, we update it with the new values and # then re-parse it into a full `FsCacheInfo` with all known fields # populated. if isinstance(info_raw, dict): info_raw.update(info.to_json()) info = FsCacheInfo.from_json(info_raw) # Replace the file with the new values. This releases the lock. if self._is_compatible(): with self._atomic_file(self._filename_cache_info, self._dirname_objects, replace=True) as f: json.dump(info_raw, f) self._load_cache_info(info) def store_tree(self, name: str, tree: Any): """Store file system tree in cache Create a new entry in the object store containing a copy of the file system tree specified as `tree`. This behaves like `store()` but instead of providing a context to the caller it will copy the specified tree. Similar to `store()`, when the entry is committed it is immediately unlocked and released to the cache. This means it might vanish at any moment due to a parallel cleanup. Hence, a caller cannot rely on the object being available in the cache once this call returns. If `tree` points to a file, the file is copied. If it points to a directory, the entire directory tree is copied including the root entry itself. To copy an entire directory without its root entry, use the `path/.` notation. Links are never followed but copied verbatim. All metadata is preserved, if possible. Parameters: ----------- name Name to store the object under. tree: Path to the file system tree to copy. """ with self.store(name) as rpath_data: r = subprocess.run( [ "cp", "--reflink=auto", "-a", "--", os.fspath(tree), self._path(rpath_data), ], check=False, encoding="utf-8", stderr=subprocess.STDOUT, stdout=subprocess.PIPE, ) if r.returncode != 0: code = r.returncode msg = r.stdout.strip() raise RuntimeError(f"Cannot copy into file-system cache ({code}): {msg}")