fscache: use remove_lru() to reclaim space when the cache is full

This commit adds code that will remove the least recently used
entries when a store() operation does not succeeds because the
cache is full. To be more efficient it will try to free
twice the requested size (this can be configured in the code).
This commit is contained in:
Michael Vogt 2023-12-13 17:39:55 +01:00 committed by Achilleas Koutsou
parent 6096f999f3
commit 6b8c1872f6
2 changed files with 80 additions and 40 deletions

View file

@ -683,8 +683,11 @@ class FsCache(contextlib.AbstractContextManager, os.PathLike):
self._info_maximum_size = -1 self._info_maximum_size = -1
elif isinstance(info.maximum_size, int): elif isinstance(info.maximum_size, int):
self._info_maximum_size = info.maximum_size self._info_maximum_size = info.maximum_size
else: elif info.maximum_size is None:
self._info_maximum_size = 0 self._info_maximum_size = 0
else:
raise ValueError(
f"maximum-size can only be set to 'unlimited' or an integer value, got {type(info.maximum_size)}")
def _is_active(self): def _is_active(self):
# Internal helper to verify we are in an active context-manager. # Internal helper to verify we are in an active context-manager.
@ -942,19 +945,27 @@ class FsCache(contextlib.AbstractContextManager, os.PathLike):
info["creation-boot-id"] = self._bootid info["creation-boot-id"] = self._bootid
info["size"] = self._calculate_space(path_data) info["size"] = self._calculate_space(path_data)
# Update the total cache-size. If it exceeds the limits, bail out # Exit early if it never is going to fit
# but do not trigger an error. It behaves as if the entry was if self._info_maximum_size > -1 and info["size"] > self._info_maximum_size:
# committed and immediately deleted by racing cache management. No return
# need to tell the caller about it (if that is ever needed, we can
# provide for it). # Update the total cache-size. If it exceeds the limits, remove
# least recently used objects until there is enough space.
# #
# Note that if we crash after updating the total cache size, but # Note that if we crash after updating the total cache size, but
# before committing the object information, the total cache size # before committing the object information, the total cache size
# will be out of sync. However, it is never overcommitted, so we # will be out of sync.
# will never violate any cache invariants. The cache-size will be #
# re-synchronized by any full cache-management operation. # However, it is never overcommitted, so we will never
# violate any cache invariants. Future code needs to resync
# the cache (e.g. on open with some simple journal strategy).
if not self._update_cache_size(info["size"]): if not self._update_cache_size(info["size"]):
return # try to free space
self._remove_lru(info["size"])
# and see if the update can happen now
if not self._update_cache_size(info["size"]):
# stil could not free enough space
return
try: try:
# Commit the object-information, thus marking it as fully # Commit the object-information, thus marking it as fully
@ -1146,7 +1157,6 @@ class FsCache(contextlib.AbstractContextManager, os.PathLike):
break break
except BlockingIOError: except BlockingIOError:
continue continue
# return True if at least the required size got freed # return True if at least the required size got freed
return freed_so_far > required_size return freed_so_far > required_size

View file

@ -465,38 +465,55 @@ def test_cache_load_updates_last_used_on_noatime(tmp_path):
test_cache_load_updates_last_used(mnt_path) test_cache_load_updates_last_used(mnt_path)
@pytest.mark.skipif(not has_precise_fs_timestamps(), reason="need precise fs timestamps")
def test_cache_full_behavior(tmp_path): def test_cache_full_behavior(tmp_path):
def _cache_size_from_file():
with open(cache._path(cache._filename_cache_size), encoding="utf8") as fp:
return json.load(fp)
cache = fscache.FsCache("osbuild-cache-evict", tmp_path) cache = fscache.FsCache("osbuild-cache-evict", tmp_path)
with cache: with cache:
# use big sizes to mask the effect of dirs using 4k of space too # use big sizes to mask the effect of {dirs,cache.info} using
cache.info = cache.info._replace(maximum_size=192 * 1024) # 4k of space too
# add one object to the store, we are below the limit obj_size = 128 * 1024
with cache.store("o1") as rpath: # cache is big enough to hold 4 objects (add buffer)
rpath_f1 = os.path.join(tmp_path, rpath, "f1") max_cache_size = (4 * obj_size) + int(0.5 * obj_size)
with open(rpath_f1, "wb") as fp: cache.info = cache.info._replace(maximum_size=max_cache_size)
fp.write(b'a' * 64 * 1024) # add 4 object to the store, we are below the limit
assert cache._calculate_space(tmp_path) > 64 * 1024 for i in range(1, 5):
assert cache._calculate_space(tmp_path) < 128 * 1024 with cache.store(f"o{i}") as rpath:
with cache.load("o1") as o: rpath = os.path.join(tmp_path, rpath, f"f{i}")
assert o != "" with open(rpath, "wb") as fp:
# and one more fp.write(b'a' * obj_size)
with cache.store("o2") as rpath: # info file is updated
rpath_f2 = os.path.join(tmp_path, rpath, "f2") assert _cache_size_from_file() >= i * obj_size
with open(rpath_f2, "wb") as fp: assert _cache_size_from_file() < (i + 1) * obj_size
fp.write(b'b' * 64 * 1024) # disk is updated
assert cache._calculate_space(tmp_path) > 128 * 1024 assert cache._calculate_space(tmp_path) >= i * obj_size
assert cache._calculate_space(tmp_path) < 192 * 1024 assert cache._calculate_space(tmp_path) < (i + 1) * obj_size
with cache.load("o2") as o: with cache.load(f"o{i}") as o:
assert o != "" assert o != ""
# adding a third one will (silently) fail because the cache is full sleep_for_fs()
with cache.store("o3") as rpath: # adding one more
rpath_f3 = os.path.join(tmp_path, rpath, "f3") with cache.store("o-full") as rpath:
with open(rpath_f3, "wb") as fp: rpath = os.path.join(tmp_path, rpath, "f-full")
fp.write(b'b' * 128 * 1024) with open(rpath, "wb") as fp:
assert cache._calculate_space(tmp_path) > 128 * 1024 fp.write(b'b' * obj_size)
assert cache._calculate_space(tmp_path) < 192 * 1024 # cache file is updated, it will free twice the size of the
with pytest.raises(fscache.FsCache.MissError): # requested obj
with cache.load("o3") as o: assert _cache_size_from_file() >= 2 * obj_size
assert _cache_size_from_file() < max_cache_size
# disk is updated
assert cache._calculate_space(tmp_path) >= 3 * obj_size
assert cache._calculate_space(tmp_path) < max_cache_size
# o1,o2 is least recently used and got removed
for obj in ["o1", "o2"]:
with pytest.raises(fscache.FsCache.MissError):
with cache.load(obj) as o:
pass
# and o-full made it in
for obj in ["o3", "o4", "o-full"]:
with cache.load(obj) as o:
pass pass
@ -556,3 +573,16 @@ def test_cache_remove_lru(tmpdir):
# and keep removing is fine # and keep removing is fine
cache._remove_lru(1) cache._remove_lru(1)
assert sorted_objs == [] assert sorted_objs == []
def test_cache_obj_too_big(tmp_path):
cache = fscache.FsCache("osbuild-cache-evict", tmp_path)
with cache:
max_cache_size = 16 * 1024
cache.info = cache.info._replace(maximum_size=max_cache_size)
with cache.store("o1") as rpath:
with open(os.path.join(tmp_path, rpath, "f1"), "wb") as fp:
fp.write(b'a' * 2 * max_cache_size)
with pytest.raises(fscache.FsCache.MissError):
with cache.load("o1"):
pass