monitor: limit the amount of data sent in JSONSeqMontior.result()

This commit limits the output in the json pipeline to a "reasonable" length. We ran into issues (e.g. [0]) from a combination of a stage that produce tons of output (dracut, ~256 kb, see issue#1976) and the consumer ("images" osbuild/monitor.go) that used a golang scanner with a max default buffer of 64kb before erroring. So limit it here. The stage result from via json is mostly for information and any error will most likely at the end. Plus consumers can collect the individual log lines on their own if desired via the "log()" messages that are stream in "real-time" with the added benefit that e.g. timestamps can be added to the logs etc. [0] https://issues.redhat.com/browse/RHEL-77988
2025-02-13 13:16:08 +01:00 · 2025-02-13 13:16:08 +01:00 · b22cbd3298
commit b22cbd3298
parent a08736be49
2 changed files with 40 additions and 2 deletions
--- a/osbuild/monitor.py
+++ b/osbuild/monitor.py
@ -341,12 +341,32 @@ class JSONSeqMonitor(BaseMonitor):
        self._context.set_stage(module)
        self.log(f"Starting module {module.name}", origin="osbuild.monitor")

-    # result is for modules
    def result(self, result: Union[BuildResult, DownloadResult]) -> None:
+        """ Called when the module (stage or download) is finished
+
+        This will stream a log entry that the stage finished and the result
+        is available via the json-seq monitor as well. Note that while the
+        stage output is part of the result it may be abbreviated. To get
+        an entire buildlog the consumer needs to simply log the calls to
+        "log()" which contain more detailed information as well.
+        """
        # we may need to check pipeline ids here in the future
        if self._progress.sub_progress:
            self._progress.sub_progress.incr()

+        # Limit the output in the json pipeline to a "reasonable"
+        # length. We ran into an issue from a combination of a stage
+        # that produce tons of output (~256 kb, see issue#1976) and
+        # the consumer that used a golang scanner with a max default
+        # buffer of 64kb before erroring.
+        #
+        # Consumers can collect the individual log lines on their own
+        # if desired via the "log()" method.
+        max_output_len = 31_000
+        if len(result.output) > max_output_len:
+            removed = len(result.output) - max_output_len
+            result.output = f"[...{removed} bytes hidden...]\n{result.output[removed:]}"
+
        self._jsonseq(log_entry(
            f"Finished module {result.name}",
            context=self._context.with_origin("osbuild.monitor"),
--- a/test/mod/test_monitor.py
+++ b/test/mod/test_monitor.py
@ -19,7 +19,7 @@ import osbuild
 import osbuild.meta
 from osbuild.monitor import Context, JSONSeqMonitor, LogMonitor, Progress, log_entry
 from osbuild.objectstore import ObjectStore
-from osbuild.pipeline import Runner
+from osbuild.pipeline import BuildResult, Runner, Stage

 from .. import test

@ -442,3 +442,21 @@ def test_json_progress_monitor_handles_racy_writes(tmp_path):
                json.loads(line)
            except json.decoder.JSONDecodeError:
                pytest.fail(f"the jsonseq stream is not valid json, got {line}")
+
+
+def test_json_progress_monitor_excessive_output_in_result(tmp_path):
+    index = osbuild.meta.Index(os.curdir)
+    info = index.get_module_info("Stage", "org.osbuild.noop")
+    stage = Stage(info, "source_opts", "build", "base", "options", "source_epoch")
+
+    output_path = tmp_path / "jsonseq.log"
+    output = "beginning-marker-vanishes\n" + "1" * 32_000 + "\nend-marker"
+    build_result = BuildResult(stage, 0, output, {})
+    with output_path.open("w") as fp:
+        mon = JSONSeqMonitor(fp.fileno(), 1)
+        mon.result(build_result)
+    with output_path.open() as fp:
+        line = fp.readline().strip("\x1e")
+        json_result = json.loads(line)
+    assert json_result["result"]["output"].startswith("[...1037 bytes hidden...]\n1111")
+    assert json_result["result"]["output"].endswith("1111\nend-marker")