From 803433fb6200b3c7f607a5a0f925086ff64e74e8 Mon Sep 17 00:00:00 2001 From: David Rheinsberg Date: Thu, 13 Aug 2020 09:06:25 +0200 Subject: [PATCH] api: prevent early output retrieval Change the API endpoint to prevent retrieving monitor-output from a running instance. Instead, we require the caller to exit the API context before querying the monitor-output. This guarantees that the api-thread was synchronously taken down and scheduled any outstanding events. This fixes an issue where a side-channel notifies us of a buildroot exit, but the api-thread has not yet returned from epoll, and thus might not have dispatched pending I/O events, yet. If we instead wait for the thread to exit, we have a synchronous shutdown and know that all *ordered* kernel events must have been handled. In particular, imagine a build-root program running (like `echo` in the test_monitor unittest) which writes data to the stdout-pipe and then immediately exits. The syscall-order guarantees that the data is written to the pipe before the SIGCHLD is sent (or wait(2) returns). However, we retrieve the SIGCHLD from our main-thread usually (p.join() in our test, and BuildRoot() in our main code), while the pipe-reading is done from an API thread. Therefore, we might end up handling the SIGCHLD first (just imagine a single-threaded CPU that schedules the main task before the thread). To avoid this race, we can simply synchronize with the api-thread. Since we already have this synchronization as part of the api-thread takedown, it is as simple as stopping the api-thread before continuing with operations. Lastly, if a write operation to a pipe was issued, we are guaranteed that a SIGCHLD synchronization across processes is ordered correctly. Furthermore, the python event-loop also guarantees that stopping an event-loop will necessarily dispatch all outstanding events. A read is guaranteed to be outstanding in our race-scenario, so the read will be dispatched. The only possible problem is `_output_ready()` only dispatching a maximum of 4096 bytes. This might need to be fixed separately. A comment is left in place. --- osbuild/api.py | 13 ++++++++++++- osbuild/pipeline.py | 4 ++-- test/mod/test_monitor.py | 13 +++++++------ 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/osbuild/api.py b/osbuild/api.py index e2fee3ed..d4ece715 100644 --- a/osbuild/api.py +++ b/osbuild/api.py @@ -91,9 +91,13 @@ class BaseAPI(abc.ABC): self.event_loop.run_forever() self.event_loop.remove_reader(server) + @property + def running(self): + return self.event_loop is not None + def __enter__(self): # We are not re-entrant, so complain if re-entered. - assert self.event_loop is None + assert not self.running if not self.socket_address: self._socketdir = self._make_socket_dir() @@ -141,6 +145,13 @@ class API(BaseAPI): @property def output(self): + # Only once the event-loop was stopped, you are guaranteed that the + # api-thread scheduled all outstanding events. Therefore, we disallow + # asking for the output-data from a running api context. If we happen + # to need live streaming access to the output in the future, we need + # to redesign the output-handling, anyway. + assert not self.running + return self._output_data.getvalue() def _prepare_input(self): diff --git a/osbuild/pipeline.py b/osbuild/pipeline.py index 0cf1746f..1b63b3f1 100644 --- a/osbuild/pipeline.py +++ b/osbuild/pipeline.py @@ -91,7 +91,7 @@ class Stage: binds=[os.fspath(tree) + ":/run/osbuild/tree"], readonly_binds=ro_binds) - return BuildResult(self, r.returncode, api.output, api.metadata) + return BuildResult(self, r.returncode, api.output, api.metadata) class Assembler: @@ -149,7 +149,7 @@ class Assembler: binds=binds, readonly_binds=ro_binds) - return BuildResult(self, r.returncode, api.output, api.metadata) + return BuildResult(self, r.returncode, api.output, api.metadata) class Pipeline: diff --git a/test/mod/test_monitor.py b/test/mod/test_monitor.py index 8004a76b..01d84e77 100644 --- a/test/mod/test_monitor.py +++ b/test/mod/test_monitor.py @@ -73,12 +73,13 @@ class TestMonitor(unittest.TestCase): path = os.path.join(tmpdir, "osbuild-api") logfile = os.path.join(tmpdir, "log.txt") - with open(logfile, "w") as log, \ - API(args, LogMonitor(log.fileno()), socket_address=path) as api: - p = mp.Process(target=echo, args=(path, )) - p.start() - p.join() - self.assertEqual(p.exitcode, 0) + with open(logfile, "w") as log: + api = API(args, LogMonitor(log.fileno()), socket_address=path) + with api as api: + p = mp.Process(target=echo, args=(path, )) + p.start() + p.join() + self.assertEqual(p.exitcode, 0) output = api.output # pylint: disable=no-member assert output