From b22cbd3298d7c0fdc620c590c273ce0252b460af Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Thu, 13 Feb 2025 13:16:08 +0100 Subject: [PATCH] monitor: limit the amount of data sent in JSONSeqMontior.result() This commit limits the output in the json pipeline to a "reasonable" length. We ran into issues (e.g. [0]) from a combination of a stage that produce tons of output (dracut, ~256 kb, see issue#1976) and the consumer ("images" osbuild/monitor.go) that used a golang scanner with a max default buffer of 64kb before erroring. So limit it here. The stage result from via json is mostly for information and any error will most likely at the end. Plus consumers can collect the individual log lines on their own if desired via the "log()" messages that are stream in "real-time" with the added benefit that e.g. timestamps can be added to the logs etc. [0] https://issues.redhat.com/browse/RHEL-77988 --- osbuild/monitor.py | 22 +++++++++++++++++++++- test/mod/test_monitor.py | 20 +++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/osbuild/monitor.py b/osbuild/monitor.py index 9ece10f3..0393aab1 100644 --- a/osbuild/monitor.py +++ b/osbuild/monitor.py @@ -341,12 +341,32 @@ class JSONSeqMonitor(BaseMonitor): self._context.set_stage(module) self.log(f"Starting module {module.name}", origin="osbuild.monitor") - # result is for modules def result(self, result: Union[BuildResult, DownloadResult]) -> None: + """ Called when the module (stage or download) is finished + + This will stream a log entry that the stage finished and the result + is available via the json-seq monitor as well. Note that while the + stage output is part of the result it may be abbreviated. To get + an entire buildlog the consumer needs to simply log the calls to + "log()" which contain more detailed information as well. + """ # we may need to check pipeline ids here in the future if self._progress.sub_progress: self._progress.sub_progress.incr() + # Limit the output in the json pipeline to a "reasonable" + # length. We ran into an issue from a combination of a stage + # that produce tons of output (~256 kb, see issue#1976) and + # the consumer that used a golang scanner with a max default + # buffer of 64kb before erroring. + # + # Consumers can collect the individual log lines on their own + # if desired via the "log()" method. + max_output_len = 31_000 + if len(result.output) > max_output_len: + removed = len(result.output) - max_output_len + result.output = f"[...{removed} bytes hidden...]\n{result.output[removed:]}" + self._jsonseq(log_entry( f"Finished module {result.name}", context=self._context.with_origin("osbuild.monitor"), diff --git a/test/mod/test_monitor.py b/test/mod/test_monitor.py index 148a4957..1eef7553 100644 --- a/test/mod/test_monitor.py +++ b/test/mod/test_monitor.py @@ -19,7 +19,7 @@ import osbuild import osbuild.meta from osbuild.monitor import Context, JSONSeqMonitor, LogMonitor, Progress, log_entry from osbuild.objectstore import ObjectStore -from osbuild.pipeline import Runner +from osbuild.pipeline import BuildResult, Runner, Stage from .. import test @@ -442,3 +442,21 @@ def test_json_progress_monitor_handles_racy_writes(tmp_path): json.loads(line) except json.decoder.JSONDecodeError: pytest.fail(f"the jsonseq stream is not valid json, got {line}") + + +def test_json_progress_monitor_excessive_output_in_result(tmp_path): + index = osbuild.meta.Index(os.curdir) + info = index.get_module_info("Stage", "org.osbuild.noop") + stage = Stage(info, "source_opts", "build", "base", "options", "source_epoch") + + output_path = tmp_path / "jsonseq.log" + output = "beginning-marker-vanishes\n" + "1" * 32_000 + "\nend-marker" + build_result = BuildResult(stage, 0, output, {}) + with output_path.open("w") as fp: + mon = JSONSeqMonitor(fp.fileno(), 1) + mon.result(build_result) + with output_path.open() as fp: + line = fp.readline().strip("\x1e") + json_result = json.loads(line) + assert json_result["result"]["output"].startswith("[...1037 bytes hidden...]\n1111") + assert json_result["result"]["output"].endswith("1111\nend-marker")