sbom/spdx: use compliant license expressions
Introduce a new class `SpdxLicenseExpressionCreator`, responsible for converting license texts extracted from packages, into an SPDX-compliant license expressions. If the `license_expression` Python package is available on the system, it is used to determine the license text extracted from a package is a valid SPDX license expression. If it is, it's returned as is back to the caller. If it is not, or of the package is not available on the system, the license text is wrapped in a `ExtractedLicensingInfo` instance. The `SpdxLicenseExpressionCreator` object keeps track of all generated `ExtractedLicensingInfo` instances and de-duplicates them based on the license text. This means that if two packages use the same SPDX-non-compliant license text, they will be wrapped by an `ExtractedLicensingInfo` instance with the same `LicenseRef-` ID. The reason for fallback when `license_expression` package is not available is that it is not available on RHEL and CentOS Stream. This implementation allows us to ship the functionality in RHEL and optionally enabling it by installing `license_expression` from a 3rd party repository. In any case, the generated SBOM document will always contain valid SPDX license expressions. Extend unit tests to cover the newly added functionality. Signed-off-by: Tomáš Hozza <thozza@redhat.com> FIXUP: sbom/spdx: use compliant license expressions Signed-off-by: Tomáš Hozza <thozza@redhat.com>
This commit is contained in:
parent
0174173175
commit
a3428e282d
5 changed files with 204 additions and 35 deletions
|
|
@ -18,3 +18,5 @@ ignore_missing_imports = True
|
|||
[mypy-libdnf5.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-license_expression.*]
|
||||
ignore_missing_imports = True
|
||||
|
|
|
|||
|
|
@ -1,11 +1,73 @@
|
|||
import os
|
||||
from datetime import datetime
|
||||
from typing import List, Union
|
||||
from typing import Dict, List, Optional, Union
|
||||
from uuid import uuid4
|
||||
|
||||
import osbuild
|
||||
import osbuild.util.sbom.model as sbom_model
|
||||
import osbuild.util.sbom.spdx2 as spdx2
|
||||
|
||||
try:
|
||||
from license_expression import ExpressionError, get_spdx_licensing
|
||||
except ImportError:
|
||||
get_spdx_licensing = None
|
||||
ExpressionError = None
|
||||
|
||||
|
||||
class SpdxLicenseExpressionCreator:
|
||||
"""
|
||||
Class for creating SPDX license expressions from license strings.
|
||||
|
||||
This class uses the license-expression package to parse license strings and convert them to SPDX license, if
|
||||
possible.
|
||||
|
||||
The class object also keeps track of all extracted licensing information objects that were created during the
|
||||
conversion process. The extracted licensing information objects are stored in a dictionary, where the key is the
|
||||
license reference ID and the value is the ExtractedLicensingInfo object.
|
||||
"""
|
||||
|
||||
def __init__(self, license_index_location=None):
|
||||
self._extracted_license_infos: Dict[str, spdx2.ExtractedLicensingInfo] = {}
|
||||
self._spdx_licensing = None
|
||||
|
||||
if get_spdx_licensing:
|
||||
if license_index_location:
|
||||
self._spdx_licensing = get_spdx_licensing(license_index_location)
|
||||
else:
|
||||
self._spdx_licensing = get_spdx_licensing()
|
||||
elif license_index_location:
|
||||
raise ValueError("The license-expression package is not available. "
|
||||
"Specify the license index location has no effect.")
|
||||
|
||||
def _to_extracted_license_info(self, license_str: str) -> spdx2.ExtractedLicensingInfo:
|
||||
eli = spdx2.ExtractedLicensingInfo(license_str)
|
||||
return self._extracted_license_infos.setdefault(eli.license_ref_id, eli)
|
||||
|
||||
def ensure_license_expression(self, license_str: str) -> Union[str, spdx2.ExtractedLicensingInfo]:
|
||||
"""
|
||||
Convert a license string to a valid SPDX license expression or wrap it in an ExtractedLicensingInfo object.
|
||||
|
||||
This function uses the license-expression package to parse the license string and convert it to an SPDX license
|
||||
expression. If the license string can't be parsed and converted to an SPDX license expression, it is wrapped in an
|
||||
ExtractedLicensingInfo object.
|
||||
|
||||
If the license-expression package is not available, the license string is always wrapped in an
|
||||
ExtractedLicensingInfo object.
|
||||
"""
|
||||
if self._spdx_licensing is None:
|
||||
return self._to_extracted_license_info(license_str)
|
||||
|
||||
try:
|
||||
return str(self._spdx_licensing.parse(license_str, validate=True, strict=True))
|
||||
except ExpressionError:
|
||||
return self._to_extracted_license_info(license_str)
|
||||
|
||||
def extracted_license_infos(self) -> List[spdx2.ExtractedLicensingInfo]:
|
||||
"""
|
||||
Return a list of all extracted licensing information objects that were created during the conversion process.
|
||||
"""
|
||||
return list(self._extracted_license_infos.values())
|
||||
|
||||
|
||||
def spdx2_checksum_algorithm(algorithm: sbom_model.ChecksumAlgorithm) -> spdx2.ChecksumAlgorithm:
|
||||
if algorithm == sbom_model.ChecksumAlgorithm.SHA1:
|
||||
|
|
@ -41,9 +103,12 @@ def create_spdx2_document():
|
|||
return doc
|
||||
|
||||
|
||||
def sbom_pkgset_to_spdx2_doc(pkgset: List[sbom_model.BasePackage]) -> spdx2.Document:
|
||||
def sbom_pkgset_to_spdx2_doc(
|
||||
pkgset: List[sbom_model.BasePackage],
|
||||
license_index_location: Optional[os.PathLike] = None) -> spdx2.Document:
|
||||
doc = create_spdx2_document()
|
||||
relationships = []
|
||||
license_expr_creator = SpdxLicenseExpressionCreator(license_index_location)
|
||||
|
||||
for pkg in pkgset:
|
||||
|
||||
|
|
@ -51,13 +116,15 @@ def sbom_pkgset_to_spdx2_doc(pkgset: List[sbom_model.BasePackage]) -> spdx2.Docu
|
|||
if pkg.download_url:
|
||||
download_location = pkg.download_url
|
||||
|
||||
license_declared = license_expr_creator.ensure_license_expression(pkg.license_declared)
|
||||
|
||||
p = spdx2.Package(
|
||||
spdx_id=f"SPDXRef-{pkg.uuid()}",
|
||||
name=pkg.name,
|
||||
download_location=download_location,
|
||||
version=pkg.version,
|
||||
files_analyzed=False,
|
||||
license_declared=pkg.license_declared,
|
||||
license_declared=license_declared,
|
||||
external_references=[
|
||||
spdx2.ExternalPackageRef(
|
||||
category=spdx2.ExternalPackageRefCategory.PACKAGE_MANAGER,
|
||||
|
|
@ -120,4 +187,8 @@ def sbom_pkgset_to_spdx2_doc(pkgset: List[sbom_model.BasePackage]) -> spdx2.Docu
|
|||
|
||||
doc.relationships = relationships
|
||||
|
||||
extracted_license_infos = license_expr_creator.extracted_license_infos()
|
||||
if len(extracted_license_infos) > 0:
|
||||
doc.extracted_licensing_infos = extracted_license_infos
|
||||
|
||||
return doc
|
||||
|
|
|
|||
|
|
@ -3,13 +3,46 @@ import os
|
|||
import pytest
|
||||
|
||||
import osbuild
|
||||
from osbuild.util.sbom.spdx import create_spdx2_document, sbom_pkgset_to_spdx2_doc, spdx2_checksum_algorithm
|
||||
from osbuild.util.sbom.spdx2.model import CreatorType, ExternalPackageRefCategory, RelationshipType
|
||||
from osbuild.util.sbom.spdx import (
|
||||
SpdxLicenseExpressionCreator,
|
||||
create_spdx2_document,
|
||||
sbom_pkgset_to_spdx2_doc,
|
||||
spdx2_checksum_algorithm,
|
||||
)
|
||||
from osbuild.util.sbom.spdx2.model import (
|
||||
CreatorType,
|
||||
ExternalPackageRefCategory,
|
||||
ExtractedLicensingInfo,
|
||||
RelationshipType,
|
||||
)
|
||||
|
||||
from ..test import patch_license_expression
|
||||
|
||||
testutil_dnf4 = pytest.importorskip("osbuild.testutil.dnf4")
|
||||
bom_dnf = pytest.importorskip("osbuild.util.sbom.dnf")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("licensing_available", (True, False))
|
||||
def test_spdxlicenseexpressionfactory_license_expression_availability(licensing_available):
|
||||
with patch_license_expression(licensing_available) as mocked_licensing:
|
||||
lf = SpdxLicenseExpressionCreator()
|
||||
license_expression = lf.ensure_license_expression("MIT")
|
||||
|
||||
if licensing_available:
|
||||
assert mocked_licensing is not None
|
||||
# The license string should be a SPDX license expression string.
|
||||
assert license_expression == "MIT"
|
||||
assert len(lf.extracted_license_infos()) == 0
|
||||
else:
|
||||
assert mocked_licensing is None
|
||||
# The license string should be wrapped in an ExtractedLicensingInfo,
|
||||
# because the license-expression package is not available.
|
||||
assert isinstance(license_expression, ExtractedLicensingInfo)
|
||||
assert str(license_expression).startswith("LicenseRef-")
|
||||
assert license_expression.extracted_text == "MIT"
|
||||
assert len(lf.extracted_license_infos()) == 1
|
||||
|
||||
|
||||
def test_create_spdx2_document():
|
||||
doc1 = create_spdx2_document()
|
||||
|
||||
|
|
@ -36,41 +69,63 @@ def test_create_spdx2_document():
|
|||
assert doc1_dict == doc2_dict
|
||||
|
||||
|
||||
def test_sbom_pkgset_to_spdx2_doc():
|
||||
@pytest.mark.parametrize("licensing_available", (True, False))
|
||||
def test_sbom_pkgset_to_spdx2_doc(licensing_available):
|
||||
dnf_pkgset = testutil_dnf4.depsolve_pkgset([os.path.abspath("./test/data/testrepos/baseos")], ["bash"])
|
||||
bom_pkgset = bom_dnf.dnf_pkgset_to_sbom_pkgset(dnf_pkgset)
|
||||
doc = sbom_pkgset_to_spdx2_doc(bom_pkgset)
|
||||
|
||||
assert len(doc.packages) == len(bom_pkgset)
|
||||
for spdx_pkg, bom_pkg in zip(doc.packages, bom_pkgset):
|
||||
assert spdx_pkg.spdx_id == f"SPDXRef-{bom_pkg.uuid()}"
|
||||
assert spdx_pkg.name == bom_pkg.name
|
||||
assert spdx_pkg.version == bom_pkg.version
|
||||
assert not spdx_pkg.files_analyzed
|
||||
assert spdx_pkg.license_declared == bom_pkg.license_declared
|
||||
assert spdx_pkg.download_location == bom_pkg.download_url
|
||||
assert spdx_pkg.homepage == bom_pkg.homepage
|
||||
assert spdx_pkg.summary == bom_pkg.summary
|
||||
assert spdx_pkg.description == bom_pkg.description
|
||||
assert spdx_pkg.source_info == bom_pkg.source_info()
|
||||
assert spdx_pkg.built_date == bom_pkg.build_date
|
||||
with patch_license_expression(licensing_available) as _:
|
||||
extracted_licensing_infos = set()
|
||||
|
||||
assert len(spdx_pkg.checksums) == 1
|
||||
assert spdx_pkg.checksums[0].algorithm == spdx2_checksum_algorithm(list(bom_pkg.checksums.keys())[0])
|
||||
assert spdx_pkg.checksums[0].value == list(bom_pkg.checksums.values())[0]
|
||||
doc = sbom_pkgset_to_spdx2_doc(bom_pkgset)
|
||||
assert len(doc.packages) == len(bom_pkgset)
|
||||
for spdx_pkg, bom_pkg in zip(doc.packages, bom_pkgset):
|
||||
assert spdx_pkg.spdx_id == f"SPDXRef-{bom_pkg.uuid()}"
|
||||
assert spdx_pkg.name == bom_pkg.name
|
||||
assert spdx_pkg.version == bom_pkg.version
|
||||
assert not spdx_pkg.files_analyzed
|
||||
assert spdx_pkg.download_location == bom_pkg.download_url
|
||||
assert spdx_pkg.homepage == bom_pkg.homepage
|
||||
assert spdx_pkg.summary == bom_pkg.summary
|
||||
assert spdx_pkg.description == bom_pkg.description
|
||||
assert spdx_pkg.source_info == bom_pkg.source_info()
|
||||
assert spdx_pkg.built_date == bom_pkg.build_date
|
||||
|
||||
assert len(spdx_pkg.external_references) == 1
|
||||
assert spdx_pkg.external_references[0].category == ExternalPackageRefCategory.PACKAGE_MANAGER
|
||||
assert spdx_pkg.external_references[0].reference_type == "purl"
|
||||
assert spdx_pkg.external_references[0].locator == bom_pkg.purl()
|
||||
# If the license-expression package is available, only the "MIT" license is converted
|
||||
# as a valid SPDX license expression for our testing package set.
|
||||
if licensing_available and bom_pkg.license_declared == "MIT":
|
||||
assert isinstance(spdx_pkg.license_declared, str)
|
||||
assert spdx_pkg.license_declared == "MIT"
|
||||
# If the license-expression package is not available, all licenses are converted
|
||||
# to SPDX license references.
|
||||
# The same applies to all licenses that are not "MIT" if the package is available,
|
||||
# because the testing package set contains only "MIT" as a valid SPDX license expression.
|
||||
else:
|
||||
assert isinstance(spdx_pkg.license_declared, ExtractedLicensingInfo)
|
||||
assert str(spdx_pkg.license_declared).startswith("LicenseRef-")
|
||||
assert spdx_pkg.license_declared.extracted_text == bom_pkg.license_declared
|
||||
extracted_licensing_infos.add(spdx_pkg.license_declared)
|
||||
|
||||
assert len([rel for rel in doc.relationships if rel.relationship_type ==
|
||||
RelationshipType.DESCRIBES]) == len(bom_pkgset)
|
||||
assert len(spdx_pkg.checksums) == 1
|
||||
assert spdx_pkg.checksums[0].algorithm == spdx2_checksum_algorithm(list(bom_pkg.checksums.keys())[0])
|
||||
assert spdx_pkg.checksums[0].value == list(bom_pkg.checksums.values())[0]
|
||||
|
||||
deps_count = sum(len(bom_pkg.depends_on) for bom_pkg in bom_pkgset)
|
||||
assert len([rel for rel in doc.relationships if rel.relationship_type ==
|
||||
RelationshipType.DEPENDS_ON]) == deps_count
|
||||
assert len(spdx_pkg.external_references) == 1
|
||||
assert spdx_pkg.external_references[0].category == ExternalPackageRefCategory.PACKAGE_MANAGER
|
||||
assert spdx_pkg.external_references[0].reference_type == "purl"
|
||||
assert spdx_pkg.external_references[0].locator == bom_pkg.purl()
|
||||
|
||||
optional_deps_count = sum(len(bom_pkg.optional_depends_on) for bom_pkg in bom_pkgset)
|
||||
assert len([rel for rel in doc.relationships if rel.relationship_type ==
|
||||
RelationshipType.OPTIONAL_DEPENDENCY_OF]) == optional_deps_count
|
||||
assert len([rel for rel in doc.relationships if rel.relationship_type ==
|
||||
RelationshipType.DESCRIBES]) == len(bom_pkgset)
|
||||
|
||||
deps_count = sum(len(bom_pkg.depends_on) for bom_pkg in bom_pkgset)
|
||||
assert len([rel for rel in doc.relationships if rel.relationship_type ==
|
||||
RelationshipType.DEPENDS_ON]) == deps_count
|
||||
|
||||
optional_deps_count = sum(len(bom_pkg.optional_depends_on) for bom_pkg in bom_pkgset)
|
||||
assert len([rel for rel in doc.relationships if rel.relationship_type ==
|
||||
RelationshipType.OPTIONAL_DEPENDENCY_OF]) == optional_deps_count
|
||||
|
||||
assert len(extracted_licensing_infos) > 0
|
||||
assert sorted(extracted_licensing_infos, key=lambda x: x.license_ref_id) == \
|
||||
sorted(doc.extracted_licensing_infos, key=lambda x: x.license_ref_id)
|
||||
|
|
|
|||
40
test/test.py
40
test/test.py
|
|
@ -10,6 +10,9 @@ import subprocess
|
|||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
import osbuild.meta
|
||||
from osbuild.objectstore import ObjectStore
|
||||
|
|
@ -496,3 +499,40 @@ class OSBuild(contextlib.AbstractContextManager):
|
|||
"cp", "--reflink=auto", "-a",
|
||||
os.path.join(from_path, "."), to_path
|
||||
], check=True)
|
||||
|
||||
|
||||
class patch_license_expression:
|
||||
"""
|
||||
Context manager to patch the license-expression package availability.
|
||||
|
||||
The context manager simulates the unavailability of the license-expression package by mocking the
|
||||
`get_spdx_licensing()` module-level function. If the package should be made available
|
||||
and it is available on the system, the function is passed through. Otherwise, pytest.skip() is called.
|
||||
"""
|
||||
|
||||
PATCH_TARGET = "osbuild.util.sbom.spdx.get_spdx_licensing"
|
||||
|
||||
def __init__(self, make_package_available):
|
||||
self.make_package_available = make_package_available
|
||||
self.patcher = None
|
||||
|
||||
def __enter__(self):
|
||||
get_spdx_licensing = None
|
||||
try:
|
||||
# pylint: disable=import-outside-toplevel
|
||||
from license_expression import get_spdx_licensing
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
if self.make_package_available:
|
||||
if get_spdx_licensing:
|
||||
self.patcher = patch(self.PATCH_TARGET, new=get_spdx_licensing)
|
||||
else:
|
||||
pytest.skip("The license-expression package is not available.")
|
||||
else:
|
||||
# The package is either not available or should be made unavailable, so make sure the function var is None.
|
||||
self.patcher = patch(self.PATCH_TARGET, new=None)
|
||||
return self.patcher.start()
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.patcher.stop()
|
||||
|
|
|
|||
1
tox.ini
1
tox.ini
|
|
@ -19,6 +19,7 @@ deps =
|
|||
iniparse
|
||||
pyyaml
|
||||
toml
|
||||
license_expression
|
||||
pykickstart
|
||||
# required by pykickstart but not pulled in automatically :/
|
||||
requests
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue