fscache: add FsCache._remove_lru() to remove entries

The FsCache._remove_lru() removes the least recently used entry
from the cache.
This commit is contained in:
Michael Vogt 2023-12-13 10:07:32 +01:00 committed by Achilleas Koutsou
parent b2a82beb75
commit 6096f999f3
2 changed files with 100 additions and 0 deletions

View file

@ -1086,6 +1086,70 @@ class FsCache(contextlib.AbstractContextManager, os.PathLike):
objs.append(FsCacheObjectInfo(name=name, last_used=last_used))
return sorted(objs, key=lambda obj: obj.last_used)
def _remove_lru(self, required_size):
""""
Make room in the cache for "required_size" by remove the least
recently used entry from the cache. Note that the cache may
clear more than required_size.
"""
# To avoid having to take a global cache lock the strategy is:
# 1. Get list of (object, last_used) sorted from oldest to newest.
# This is racy so we need to take care of that in step(2).
# 2. Start with the oldest entry, try to take a write_lock
# (with O_NOATIME to be extra sure that atime information is
# correct). Get the "last_used" (atime) time and compare to what
# we expect in the list. If it diverges the object got load()ed
# while we iterated. Skip it and go to (2) again.
# 3. Remove entry, update cache size after the entry is removed.
#
# Note that there is a risk to get out-of-sync in (3). If the
# process dies while removing and before updating the cache
# size the cache will be over reported.
# Try to clean at least twice the requested size to avoid having
# to do this all over again
try_to_free = required_size * 2
freed_so_far = 0
for name, last_used in self._last_used_objs():
# take write lock for the indivdual object
rpath = os.path.join(self._dirname_objects, name)
rpath_lock = os.path.join(rpath, self._filename_object_lock)
# Ideally there would some lock helper instead of the low-level
# file manipulation to abstract this a bit more.
try:
with self._atomic_open(
rpath_lock,
wait=False,
write=True,
# atime carries the "last-used" data so don't alter it
oflags=os.O_EXCL | os.O_NOATIME,
):
if last_used != self._last_used(name):
continue
# This is racy right now if the process is killed
# during "_rm_r_object(rpath)" because then the
# cache size is never reduced by the amount that
# was about to be deleted.
#
# To fix it we need to (atomic) rename the
# "object.info" file in _rm_r_object() to
# something like "object.removing". Then when
# opening the cache scan for leftover
# "object.removing" files and finish the cleanup
# and update the cache size based on the size
# recorded inside "object.removing".
size = self._calculate_space(self._path(rpath))
self._rm_r_object(rpath)
self._update_cache_size(-size)
freed_so_far += size
if freed_so_far >= try_to_free:
break
except BlockingIOError:
continue
# return True if at least the required size got freed
return freed_so_far > required_size
@property
def info(self) -> FsCacheInfo:
"""Query Cache Information

View file

@ -520,3 +520,39 @@ def test_cache_last_used_objs(tmpdir):
pass
sorted_objs = cache._last_used_objs()
assert [e[0] for e in sorted_objs] == ["o3", "o1", "o2"]
@pytest.mark.skipif(not has_precise_fs_timestamps(), reason="need precise fs timestamps")
def test_cache_remove_lru(tmpdir):
cache = fscache.FsCache("osbuild-cache-id", tmpdir)
with cache:
cache.info = cache.info._replace(maximum_size=-1)
# add objs to the store
for obj in ["o3", "o2", "o1"]:
with cache.store(obj):
pass
with cache.load(obj):
pass
sleep_for_fs()
# precondition check: we have least used o3,o2,o1
sorted_objs = cache._last_used_objs()
assert [e[0] for e in sorted_objs] == ["o3", "o2", "o1"]
# removed least recently used (o3), now o2 is least recently used
cache._remove_lru(1)
sorted_objs = cache._last_used_objs()
assert [e[0] for e in sorted_objs] == ["o2", "o1"]
# now load o2 (previously least recently used)
with cache.load("o2"):
pass
sleep_for_fs()
# and ensure that removing the lru removes "o1" now and keeps "o2"
cache._remove_lru(1)
sorted_objs = cache._last_used_objs()
assert [e[0] for e in sorted_objs] == ["o2"]
# removing last obj
cache._remove_lru(1)
sorted_objs = cache._last_used_objs()
assert sorted_objs == []
# and keep removing is fine
cache._remove_lru(1)
assert sorted_objs == []