Manually clear the buffer cache of the loop device, which seems to be required in order to make sure that data written via the loop device is actually landing in the file: Since commitc1379f6the file descriptor of the loop device is explicitly cleared. This broke manifests that involved creating a FAT filesystem. Said file system could later not be mounted. The breaking change was identified to indeed be commitc1379f6. Using `biosnoop` we saw that some write operations were missing when clearing the file descriptor that were present when using the auto-clearing feature of the loop device (see below). Reading the corresponding kernel source (v5.13.8), the current theory is that when using the auto clear feature, once the last handle on the loop device is closed, the code path in the kernel is: blkdev_close (fs/block_dev.c) blkdev_put (fs/block_dev.c) __blkdev_put (fs/block_dev.c) sync_blockdev (fs/block_dev.c) On the other hand when manually clearing the file descriptor, the code path seems to be: loop_clr_fd (fs/loop.c) __loop_clr_fd (fs/loop.c) The latter first removes the backing file and then calls `bdput`, and thus no call to sync_blockdev is made. Luckily, sync_blockdev can be called via an ioctl, `BLKFLSBUF`, which we no do, via the new helper function `lo.flush_buf`. This fixes the observed issue and leads to the same biosnoop trace as observed when using the auto clear feature without explicitly clearing the fd. NB: we considered reverting the commitc1379f6, but we want to make sure that we control to point when the backing file is cleared from the fd, since sub-sequent osbuild stages will re-use the file and we want to ensure no loop device still has the file open and that all the data in is in the file. -- biosnoop trace -- 4.115946 mkfs.fat 731297 loop1 R 0 4096 0.08 4.116096 mkfs.fat 731297 loop1 R 8 4096 0.02 4.116176 mkfs.fat 731297 loop1 R 16 4096 0.02 [...] 4.120632 mkfs.fat 731297 loop1 R 400 4096 0.02 4.200354 org.osbuild.lo 731281 vda W 4182432 32768 0.64 4.200429 org.osbuild.lo 731281 vda W 6279584 32768 0.70 4.200657 ? 0 R 0 0 0.19 4.200946 org.osbuild.lo 731281 vda W 3328128 4096 0.20 4.201109 ? 0 R 0 0 0.13 [the following entires were missing with manual flushing:] 4.201601 org.osbuild.lo 731281 loop1 W 0 4096 0.24 4.201634 org.osbuild.lo 731281 loop1 W 8 4096 0.26 4.201645 org.osbuild.lo 731281 loop1 W 16 4096 0.27 [...] 4.203118 org.osbuild.lo 731281 loop1 W 432 4096 0.25 Reported-by: Achilleas Koutsou <achilleas@koutsou.net> Reported-by: Tomas Hozza <thozza@redhat.com>
151 lines
4.1 KiB
Python
Executable file
151 lines
4.1 KiB
Python
Executable file
#!/usr/bin/python3
|
|
"""
|
|
Loopback device host service
|
|
|
|
This service can be used to expose a file or a subset of it as a
|
|
device node. The file is specified via the `filename`, and the
|
|
subset can be specified via `offset` and `size`.
|
|
The resulting device name is returned together with the device
|
|
node numbers (`major`, `minor`). The device is closed when the
|
|
service is shut down.
|
|
A typical use case is formatting the file or a partition in the
|
|
file with a file system or mounting a previously created file
|
|
system contained in the file.
|
|
"""
|
|
|
|
|
|
import argparse
|
|
import errno
|
|
import os
|
|
import sys
|
|
|
|
from typing import Dict
|
|
|
|
from osbuild import devices
|
|
from osbuild import loop
|
|
|
|
|
|
SCHEMA = """
|
|
"additionalProperties": false,
|
|
"required": ["filename"],
|
|
"properties": {
|
|
"filename": {
|
|
"type": "string",
|
|
"description": "File to associate with the loopback device"
|
|
},
|
|
"start": {
|
|
"type": "number",
|
|
"description": "Start of the data segment (in sectors)",
|
|
"default": 0
|
|
},
|
|
"size": {
|
|
"type": "number",
|
|
"description": "Size limit of the data segment (in sectors)"
|
|
},
|
|
"sector-size": {
|
|
"type": "number",
|
|
"description": "Sector size (in bytes)",
|
|
"default": 512
|
|
},
|
|
"lock": {
|
|
"type": "boolean",
|
|
"description": "Lock the device after opening it"
|
|
}
|
|
}
|
|
"""
|
|
|
|
|
|
class LoopbackService(devices.DeviceService):
|
|
|
|
def __init__(self, args: argparse.Namespace):
|
|
super().__init__(args)
|
|
self.fd = None
|
|
self.lo = None
|
|
self.ctl = loop.LoopControl()
|
|
|
|
def make_loop(self, fd: int, offset, sizelimit, lock):
|
|
if not sizelimit:
|
|
stat = os.fstat(fd)
|
|
sizelimit = stat.st_size - offset
|
|
else:
|
|
sizelimit *= self.sector_size
|
|
|
|
lo = self.ctl.loop_for_fd(fd, lock=lock,
|
|
offset=offset,
|
|
sizelimit=sizelimit,
|
|
partscan=False,
|
|
autoclear=True)
|
|
|
|
return lo
|
|
|
|
def open(self, devpath: str, parent: str, tree: str, options: Dict):
|
|
filename = options["filename"]
|
|
self.sector_size = options.get("sector-size", 512)
|
|
start = options.get("start", 0) * self.sector_size
|
|
size = options.get("size")
|
|
lock = options.get("lock", False)
|
|
|
|
path = os.path.join(tree, filename.lstrip("/"))
|
|
|
|
self.fd = os.open(path, os.O_RDWR | os.O_CLOEXEC)
|
|
print(f"file '{filename}'' opened as {self.fd}")
|
|
try:
|
|
self.lo = self.make_loop(self.fd, start, size, lock)
|
|
except Exception as error: # pylint: disable: broad-except
|
|
self.close()
|
|
raise error from None
|
|
|
|
dir_fd = -1
|
|
try:
|
|
dir_fd = os.open(devpath, os.O_CLOEXEC | os.O_PATH)
|
|
self.lo.mknod(dir_fd)
|
|
finally:
|
|
if dir_fd > -1:
|
|
os.close(dir_fd)
|
|
|
|
res = {
|
|
"path": self.lo.devname,
|
|
"node": {
|
|
"major": self.lo.LOOP_MAJOR,
|
|
"minor": self.lo.minor,
|
|
}
|
|
}
|
|
|
|
return res
|
|
|
|
def close(self):
|
|
# Calling `close` is valid on closed
|
|
# `LoopControl` and `Loop` objects
|
|
self.ctl.close()
|
|
|
|
if self.lo:
|
|
# Flush the buffer cache of the loop device. This
|
|
# seems to be required when clearing the fd of the
|
|
# loop device (as of kernel 5.13.8) or otherwise
|
|
# it leads to data loss.
|
|
self.lo.flushbuf()
|
|
|
|
# clear the fd. Since it might not immediately be
|
|
# cleared (due to a race with udev or some other
|
|
# process still having a reference to the loop dev)
|
|
# we give it some time and wait for the clearing
|
|
self.lo.clear_fd_wait(self.fd, 30)
|
|
self.lo.close()
|
|
self.lo = None
|
|
|
|
if self.fd is not None:
|
|
fd = self.fd
|
|
self.fd = None
|
|
try:
|
|
os.fsync(fd)
|
|
finally:
|
|
os.close(fd)
|
|
|
|
|
|
def main():
|
|
service = LoopbackService.from_args(sys.argv[1:])
|
|
service.main()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|