sbom/spdx: use compliant license expressions

Introduce a new class `SpdxLicenseExpressionCreator`, responsible for
converting license texts extracted from packages, into an SPDX-compliant
license expressions. If the `license_expression` Python package is
available on the system, it is used to determine the license text
extracted from a package is a valid SPDX license expression. If it is,
it's returned as is back to the caller. If it is not, or of the package
is not available on the system, the license text is wrapped in a
`ExtractedLicensingInfo` instance.

The `SpdxLicenseExpressionCreator` object keeps track of all generated
`ExtractedLicensingInfo` instances and de-duplicates them based on the
license text. This means that if two packages use the same
SPDX-non-compliant license text, they will be wrapped by an
`ExtractedLicensingInfo` instance with the same `LicenseRef-` ID.

The reason for fallback when `license_expression` package is not
available is that it is not available on RHEL and CentOS Stream. This
implementation allows us to ship the functionality in RHEL and
optionally enabling it by installing `license_expression` from a 3rd
party repository. In any case, the generated SBOM document will always
contain valid SPDX license expressions.

Extend unit tests to cover the newly added functionality.

Signed-off-by: Tomáš Hozza <thozza@redhat.com>

FIXUP: sbom/spdx: use compliant license expressions

Signed-off-by: Tomáš Hozza <thozza@redhat.com>
This commit is contained in:
Tomáš Hozza 2024-12-17 09:33:13 +01:00 committed by Tomáš Hozza
parent 0174173175
commit a3428e282d
5 changed files with 204 additions and 35 deletions

View file

@ -3,13 +3,46 @@ import os
import pytest
import osbuild
from osbuild.util.sbom.spdx import create_spdx2_document, sbom_pkgset_to_spdx2_doc, spdx2_checksum_algorithm
from osbuild.util.sbom.spdx2.model import CreatorType, ExternalPackageRefCategory, RelationshipType
from osbuild.util.sbom.spdx import (
SpdxLicenseExpressionCreator,
create_spdx2_document,
sbom_pkgset_to_spdx2_doc,
spdx2_checksum_algorithm,
)
from osbuild.util.sbom.spdx2.model import (
CreatorType,
ExternalPackageRefCategory,
ExtractedLicensingInfo,
RelationshipType,
)
from ..test import patch_license_expression
testutil_dnf4 = pytest.importorskip("osbuild.testutil.dnf4")
bom_dnf = pytest.importorskip("osbuild.util.sbom.dnf")
@pytest.mark.parametrize("licensing_available", (True, False))
def test_spdxlicenseexpressionfactory_license_expression_availability(licensing_available):
with patch_license_expression(licensing_available) as mocked_licensing:
lf = SpdxLicenseExpressionCreator()
license_expression = lf.ensure_license_expression("MIT")
if licensing_available:
assert mocked_licensing is not None
# The license string should be a SPDX license expression string.
assert license_expression == "MIT"
assert len(lf.extracted_license_infos()) == 0
else:
assert mocked_licensing is None
# The license string should be wrapped in an ExtractedLicensingInfo,
# because the license-expression package is not available.
assert isinstance(license_expression, ExtractedLicensingInfo)
assert str(license_expression).startswith("LicenseRef-")
assert license_expression.extracted_text == "MIT"
assert len(lf.extracted_license_infos()) == 1
def test_create_spdx2_document():
doc1 = create_spdx2_document()
@ -36,41 +69,63 @@ def test_create_spdx2_document():
assert doc1_dict == doc2_dict
def test_sbom_pkgset_to_spdx2_doc():
@pytest.mark.parametrize("licensing_available", (True, False))
def test_sbom_pkgset_to_spdx2_doc(licensing_available):
dnf_pkgset = testutil_dnf4.depsolve_pkgset([os.path.abspath("./test/data/testrepos/baseos")], ["bash"])
bom_pkgset = bom_dnf.dnf_pkgset_to_sbom_pkgset(dnf_pkgset)
doc = sbom_pkgset_to_spdx2_doc(bom_pkgset)
assert len(doc.packages) == len(bom_pkgset)
for spdx_pkg, bom_pkg in zip(doc.packages, bom_pkgset):
assert spdx_pkg.spdx_id == f"SPDXRef-{bom_pkg.uuid()}"
assert spdx_pkg.name == bom_pkg.name
assert spdx_pkg.version == bom_pkg.version
assert not spdx_pkg.files_analyzed
assert spdx_pkg.license_declared == bom_pkg.license_declared
assert spdx_pkg.download_location == bom_pkg.download_url
assert spdx_pkg.homepage == bom_pkg.homepage
assert spdx_pkg.summary == bom_pkg.summary
assert spdx_pkg.description == bom_pkg.description
assert spdx_pkg.source_info == bom_pkg.source_info()
assert spdx_pkg.built_date == bom_pkg.build_date
with patch_license_expression(licensing_available) as _:
extracted_licensing_infos = set()
assert len(spdx_pkg.checksums) == 1
assert spdx_pkg.checksums[0].algorithm == spdx2_checksum_algorithm(list(bom_pkg.checksums.keys())[0])
assert spdx_pkg.checksums[0].value == list(bom_pkg.checksums.values())[0]
doc = sbom_pkgset_to_spdx2_doc(bom_pkgset)
assert len(doc.packages) == len(bom_pkgset)
for spdx_pkg, bom_pkg in zip(doc.packages, bom_pkgset):
assert spdx_pkg.spdx_id == f"SPDXRef-{bom_pkg.uuid()}"
assert spdx_pkg.name == bom_pkg.name
assert spdx_pkg.version == bom_pkg.version
assert not spdx_pkg.files_analyzed
assert spdx_pkg.download_location == bom_pkg.download_url
assert spdx_pkg.homepage == bom_pkg.homepage
assert spdx_pkg.summary == bom_pkg.summary
assert spdx_pkg.description == bom_pkg.description
assert spdx_pkg.source_info == bom_pkg.source_info()
assert spdx_pkg.built_date == bom_pkg.build_date
assert len(spdx_pkg.external_references) == 1
assert spdx_pkg.external_references[0].category == ExternalPackageRefCategory.PACKAGE_MANAGER
assert spdx_pkg.external_references[0].reference_type == "purl"
assert spdx_pkg.external_references[0].locator == bom_pkg.purl()
# If the license-expression package is available, only the "MIT" license is converted
# as a valid SPDX license expression for our testing package set.
if licensing_available and bom_pkg.license_declared == "MIT":
assert isinstance(spdx_pkg.license_declared, str)
assert spdx_pkg.license_declared == "MIT"
# If the license-expression package is not available, all licenses are converted
# to SPDX license references.
# The same applies to all licenses that are not "MIT" if the package is available,
# because the testing package set contains only "MIT" as a valid SPDX license expression.
else:
assert isinstance(spdx_pkg.license_declared, ExtractedLicensingInfo)
assert str(spdx_pkg.license_declared).startswith("LicenseRef-")
assert spdx_pkg.license_declared.extracted_text == bom_pkg.license_declared
extracted_licensing_infos.add(spdx_pkg.license_declared)
assert len([rel for rel in doc.relationships if rel.relationship_type ==
RelationshipType.DESCRIBES]) == len(bom_pkgset)
assert len(spdx_pkg.checksums) == 1
assert spdx_pkg.checksums[0].algorithm == spdx2_checksum_algorithm(list(bom_pkg.checksums.keys())[0])
assert spdx_pkg.checksums[0].value == list(bom_pkg.checksums.values())[0]
deps_count = sum(len(bom_pkg.depends_on) for bom_pkg in bom_pkgset)
assert len([rel for rel in doc.relationships if rel.relationship_type ==
RelationshipType.DEPENDS_ON]) == deps_count
assert len(spdx_pkg.external_references) == 1
assert spdx_pkg.external_references[0].category == ExternalPackageRefCategory.PACKAGE_MANAGER
assert spdx_pkg.external_references[0].reference_type == "purl"
assert spdx_pkg.external_references[0].locator == bom_pkg.purl()
optional_deps_count = sum(len(bom_pkg.optional_depends_on) for bom_pkg in bom_pkgset)
assert len([rel for rel in doc.relationships if rel.relationship_type ==
RelationshipType.OPTIONAL_DEPENDENCY_OF]) == optional_deps_count
assert len([rel for rel in doc.relationships if rel.relationship_type ==
RelationshipType.DESCRIBES]) == len(bom_pkgset)
deps_count = sum(len(bom_pkg.depends_on) for bom_pkg in bom_pkgset)
assert len([rel for rel in doc.relationships if rel.relationship_type ==
RelationshipType.DEPENDS_ON]) == deps_count
optional_deps_count = sum(len(bom_pkg.optional_depends_on) for bom_pkg in bom_pkgset)
assert len([rel for rel in doc.relationships if rel.relationship_type ==
RelationshipType.OPTIONAL_DEPENDENCY_OF]) == optional_deps_count
assert len(extracted_licensing_infos) > 0
assert sorted(extracted_licensing_infos, key=lambda x: x.license_ref_id) == \
sorted(doc.extracted_licensing_infos, key=lambda x: x.license_ref_id)

View file

@ -10,6 +10,9 @@ import subprocess
import sys
import tempfile
import unittest
from unittest.mock import patch
import pytest
import osbuild.meta
from osbuild.objectstore import ObjectStore
@ -496,3 +499,40 @@ class OSBuild(contextlib.AbstractContextManager):
"cp", "--reflink=auto", "-a",
os.path.join(from_path, "."), to_path
], check=True)
class patch_license_expression:
"""
Context manager to patch the license-expression package availability.
The context manager simulates the unavailability of the license-expression package by mocking the
`get_spdx_licensing()` module-level function. If the package should be made available
and it is available on the system, the function is passed through. Otherwise, pytest.skip() is called.
"""
PATCH_TARGET = "osbuild.util.sbom.spdx.get_spdx_licensing"
def __init__(self, make_package_available):
self.make_package_available = make_package_available
self.patcher = None
def __enter__(self):
get_spdx_licensing = None
try:
# pylint: disable=import-outside-toplevel
from license_expression import get_spdx_licensing
except ImportError:
pass
if self.make_package_available:
if get_spdx_licensing:
self.patcher = patch(self.PATCH_TARGET, new=get_spdx_licensing)
else:
pytest.skip("The license-expression package is not available.")
else:
# The package is either not available or should be made unavailable, so make sure the function var is None.
self.patcher = patch(self.PATCH_TARGET, new=None)
return self.patcher.start()
def __exit__(self, exc_type, exc_val, exc_tb):
self.patcher.stop()