monitor: limit the amount of data sent in JSONSeqMontior.result()

This commit limits the output in the json pipeline to a "reasonable"
length. We ran into issues (e.g. [0]) from a combination of a stage
that produce tons of output (dracut, ~256 kb, see issue#1976) and
the consumer ("images" osbuild/monitor.go) that used a golang scanner
with a max default buffer of 64kb before erroring. So limit it
here.

The stage result from via json is mostly for information and any error
will most likely at the end. Plus consumers can collect the individual
log lines on their own if desired via the "log()" messages that are
stream in "real-time" with the added benefit that e.g. timestamps
can be added to the logs etc.

[0] https://issues.redhat.com/browse/RHEL-77988
This commit is contained in:
Michael Vogt 2025-02-13 13:16:08 +01:00 committed by Tomáš Hozza
parent a08736be49
commit b22cbd3298
2 changed files with 40 additions and 2 deletions

View file

@ -341,12 +341,32 @@ class JSONSeqMonitor(BaseMonitor):
self._context.set_stage(module)
self.log(f"Starting module {module.name}", origin="osbuild.monitor")
# result is for modules
def result(self, result: Union[BuildResult, DownloadResult]) -> None:
""" Called when the module (stage or download) is finished
This will stream a log entry that the stage finished and the result
is available via the json-seq monitor as well. Note that while the
stage output is part of the result it may be abbreviated. To get
an entire buildlog the consumer needs to simply log the calls to
"log()" which contain more detailed information as well.
"""
# we may need to check pipeline ids here in the future
if self._progress.sub_progress:
self._progress.sub_progress.incr()
# Limit the output in the json pipeline to a "reasonable"
# length. We ran into an issue from a combination of a stage
# that produce tons of output (~256 kb, see issue#1976) and
# the consumer that used a golang scanner with a max default
# buffer of 64kb before erroring.
#
# Consumers can collect the individual log lines on their own
# if desired via the "log()" method.
max_output_len = 31_000
if len(result.output) > max_output_len:
removed = len(result.output) - max_output_len
result.output = f"[...{removed} bytes hidden...]\n{result.output[removed:]}"
self._jsonseq(log_entry(
f"Finished module {result.name}",
context=self._context.with_origin("osbuild.monitor"),

View file

@ -19,7 +19,7 @@ import osbuild
import osbuild.meta
from osbuild.monitor import Context, JSONSeqMonitor, LogMonitor, Progress, log_entry
from osbuild.objectstore import ObjectStore
from osbuild.pipeline import Runner
from osbuild.pipeline import BuildResult, Runner, Stage
from .. import test
@ -442,3 +442,21 @@ def test_json_progress_monitor_handles_racy_writes(tmp_path):
json.loads(line)
except json.decoder.JSONDecodeError:
pytest.fail(f"the jsonseq stream is not valid json, got {line}")
def test_json_progress_monitor_excessive_output_in_result(tmp_path):
index = osbuild.meta.Index(os.curdir)
info = index.get_module_info("Stage", "org.osbuild.noop")
stage = Stage(info, "source_opts", "build", "base", "options", "source_epoch")
output_path = tmp_path / "jsonseq.log"
output = "beginning-marker-vanishes\n" + "1" * 32_000 + "\nend-marker"
build_result = BuildResult(stage, 0, output, {})
with output_path.open("w") as fp:
mon = JSONSeqMonitor(fp.fileno(), 1)
mon.result(build_result)
with output_path.open() as fp:
line = fp.readline().strip("\x1e")
json_result = json.loads(line)
assert json_result["result"]["output"].startswith("[...1037 bytes hidden...]\n1111")
assert json_result["result"]["output"].endswith("1111\nend-marker")