diff --git a/.mypy.ini b/.mypy.ini index c1af4012..1a0e5d67 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -18,3 +18,5 @@ ignore_missing_imports = True [mypy-libdnf5.*] ignore_missing_imports = True +[mypy-license_expression.*] +ignore_missing_imports = True diff --git a/osbuild/util/sbom/spdx.py b/osbuild/util/sbom/spdx.py index 94eb787e..de9daead 100644 --- a/osbuild/util/sbom/spdx.py +++ b/osbuild/util/sbom/spdx.py @@ -1,11 +1,73 @@ +import os from datetime import datetime -from typing import List, Union +from typing import Dict, List, Optional, Union from uuid import uuid4 import osbuild import osbuild.util.sbom.model as sbom_model import osbuild.util.sbom.spdx2 as spdx2 +try: + from license_expression import ExpressionError, get_spdx_licensing +except ImportError: + get_spdx_licensing = None + ExpressionError = None + + +class SpdxLicenseExpressionCreator: + """ + Class for creating SPDX license expressions from license strings. + + This class uses the license-expression package to parse license strings and convert them to SPDX license, if + possible. + + The class object also keeps track of all extracted licensing information objects that were created during the + conversion process. The extracted licensing information objects are stored in a dictionary, where the key is the + license reference ID and the value is the ExtractedLicensingInfo object. + """ + + def __init__(self, license_index_location=None): + self._extracted_license_infos: Dict[str, spdx2.ExtractedLicensingInfo] = {} + self._spdx_licensing = None + + if get_spdx_licensing: + if license_index_location: + self._spdx_licensing = get_spdx_licensing(license_index_location) + else: + self._spdx_licensing = get_spdx_licensing() + elif license_index_location: + raise ValueError("The license-expression package is not available. " + "Specify the license index location has no effect.") + + def _to_extracted_license_info(self, license_str: str) -> spdx2.ExtractedLicensingInfo: + eli = spdx2.ExtractedLicensingInfo(license_str) + return self._extracted_license_infos.setdefault(eli.license_ref_id, eli) + + def ensure_license_expression(self, license_str: str) -> Union[str, spdx2.ExtractedLicensingInfo]: + """ + Convert a license string to a valid SPDX license expression or wrap it in an ExtractedLicensingInfo object. + + This function uses the license-expression package to parse the license string and convert it to an SPDX license + expression. If the license string can't be parsed and converted to an SPDX license expression, it is wrapped in an + ExtractedLicensingInfo object. + + If the license-expression package is not available, the license string is always wrapped in an + ExtractedLicensingInfo object. + """ + if self._spdx_licensing is None: + return self._to_extracted_license_info(license_str) + + try: + return str(self._spdx_licensing.parse(license_str, validate=True, strict=True)) + except ExpressionError: + return self._to_extracted_license_info(license_str) + + def extracted_license_infos(self) -> List[spdx2.ExtractedLicensingInfo]: + """ + Return a list of all extracted licensing information objects that were created during the conversion process. + """ + return list(self._extracted_license_infos.values()) + def spdx2_checksum_algorithm(algorithm: sbom_model.ChecksumAlgorithm) -> spdx2.ChecksumAlgorithm: if algorithm == sbom_model.ChecksumAlgorithm.SHA1: @@ -41,9 +103,12 @@ def create_spdx2_document(): return doc -def sbom_pkgset_to_spdx2_doc(pkgset: List[sbom_model.BasePackage]) -> spdx2.Document: +def sbom_pkgset_to_spdx2_doc( + pkgset: List[sbom_model.BasePackage], + license_index_location: Optional[os.PathLike] = None) -> spdx2.Document: doc = create_spdx2_document() relationships = [] + license_expr_creator = SpdxLicenseExpressionCreator(license_index_location) for pkg in pkgset: @@ -51,13 +116,15 @@ def sbom_pkgset_to_spdx2_doc(pkgset: List[sbom_model.BasePackage]) -> spdx2.Docu if pkg.download_url: download_location = pkg.download_url + license_declared = license_expr_creator.ensure_license_expression(pkg.license_declared) + p = spdx2.Package( spdx_id=f"SPDXRef-{pkg.uuid()}", name=pkg.name, download_location=download_location, version=pkg.version, files_analyzed=False, - license_declared=pkg.license_declared, + license_declared=license_declared, external_references=[ spdx2.ExternalPackageRef( category=spdx2.ExternalPackageRefCategory.PACKAGE_MANAGER, @@ -120,4 +187,8 @@ def sbom_pkgset_to_spdx2_doc(pkgset: List[sbom_model.BasePackage]) -> spdx2.Docu doc.relationships = relationships + extracted_license_infos = license_expr_creator.extracted_license_infos() + if len(extracted_license_infos) > 0: + doc.extracted_licensing_infos = extracted_license_infos + return doc diff --git a/test/mod/test_util_sbom_spdx.py b/test/mod/test_util_sbom_spdx.py index 8fd15375..d87d7ab4 100644 --- a/test/mod/test_util_sbom_spdx.py +++ b/test/mod/test_util_sbom_spdx.py @@ -3,13 +3,46 @@ import os import pytest import osbuild -from osbuild.util.sbom.spdx import create_spdx2_document, sbom_pkgset_to_spdx2_doc, spdx2_checksum_algorithm -from osbuild.util.sbom.spdx2.model import CreatorType, ExternalPackageRefCategory, RelationshipType +from osbuild.util.sbom.spdx import ( + SpdxLicenseExpressionCreator, + create_spdx2_document, + sbom_pkgset_to_spdx2_doc, + spdx2_checksum_algorithm, +) +from osbuild.util.sbom.spdx2.model import ( + CreatorType, + ExternalPackageRefCategory, + ExtractedLicensingInfo, + RelationshipType, +) + +from ..test import patch_license_expression testutil_dnf4 = pytest.importorskip("osbuild.testutil.dnf4") bom_dnf = pytest.importorskip("osbuild.util.sbom.dnf") +@pytest.mark.parametrize("licensing_available", (True, False)) +def test_spdxlicenseexpressionfactory_license_expression_availability(licensing_available): + with patch_license_expression(licensing_available) as mocked_licensing: + lf = SpdxLicenseExpressionCreator() + license_expression = lf.ensure_license_expression("MIT") + + if licensing_available: + assert mocked_licensing is not None + # The license string should be a SPDX license expression string. + assert license_expression == "MIT" + assert len(lf.extracted_license_infos()) == 0 + else: + assert mocked_licensing is None + # The license string should be wrapped in an ExtractedLicensingInfo, + # because the license-expression package is not available. + assert isinstance(license_expression, ExtractedLicensingInfo) + assert str(license_expression).startswith("LicenseRef-") + assert license_expression.extracted_text == "MIT" + assert len(lf.extracted_license_infos()) == 1 + + def test_create_spdx2_document(): doc1 = create_spdx2_document() @@ -36,41 +69,63 @@ def test_create_spdx2_document(): assert doc1_dict == doc2_dict -def test_sbom_pkgset_to_spdx2_doc(): +@pytest.mark.parametrize("licensing_available", (True, False)) +def test_sbom_pkgset_to_spdx2_doc(licensing_available): dnf_pkgset = testutil_dnf4.depsolve_pkgset([os.path.abspath("./test/data/testrepos/baseos")], ["bash"]) bom_pkgset = bom_dnf.dnf_pkgset_to_sbom_pkgset(dnf_pkgset) - doc = sbom_pkgset_to_spdx2_doc(bom_pkgset) - assert len(doc.packages) == len(bom_pkgset) - for spdx_pkg, bom_pkg in zip(doc.packages, bom_pkgset): - assert spdx_pkg.spdx_id == f"SPDXRef-{bom_pkg.uuid()}" - assert spdx_pkg.name == bom_pkg.name - assert spdx_pkg.version == bom_pkg.version - assert not spdx_pkg.files_analyzed - assert spdx_pkg.license_declared == bom_pkg.license_declared - assert spdx_pkg.download_location == bom_pkg.download_url - assert spdx_pkg.homepage == bom_pkg.homepage - assert spdx_pkg.summary == bom_pkg.summary - assert spdx_pkg.description == bom_pkg.description - assert spdx_pkg.source_info == bom_pkg.source_info() - assert spdx_pkg.built_date == bom_pkg.build_date + with patch_license_expression(licensing_available) as _: + extracted_licensing_infos = set() - assert len(spdx_pkg.checksums) == 1 - assert spdx_pkg.checksums[0].algorithm == spdx2_checksum_algorithm(list(bom_pkg.checksums.keys())[0]) - assert spdx_pkg.checksums[0].value == list(bom_pkg.checksums.values())[0] + doc = sbom_pkgset_to_spdx2_doc(bom_pkgset) + assert len(doc.packages) == len(bom_pkgset) + for spdx_pkg, bom_pkg in zip(doc.packages, bom_pkgset): + assert spdx_pkg.spdx_id == f"SPDXRef-{bom_pkg.uuid()}" + assert spdx_pkg.name == bom_pkg.name + assert spdx_pkg.version == bom_pkg.version + assert not spdx_pkg.files_analyzed + assert spdx_pkg.download_location == bom_pkg.download_url + assert spdx_pkg.homepage == bom_pkg.homepage + assert spdx_pkg.summary == bom_pkg.summary + assert spdx_pkg.description == bom_pkg.description + assert spdx_pkg.source_info == bom_pkg.source_info() + assert spdx_pkg.built_date == bom_pkg.build_date - assert len(spdx_pkg.external_references) == 1 - assert spdx_pkg.external_references[0].category == ExternalPackageRefCategory.PACKAGE_MANAGER - assert spdx_pkg.external_references[0].reference_type == "purl" - assert spdx_pkg.external_references[0].locator == bom_pkg.purl() + # If the license-expression package is available, only the "MIT" license is converted + # as a valid SPDX license expression for our testing package set. + if licensing_available and bom_pkg.license_declared == "MIT": + assert isinstance(spdx_pkg.license_declared, str) + assert spdx_pkg.license_declared == "MIT" + # If the license-expression package is not available, all licenses are converted + # to SPDX license references. + # The same applies to all licenses that are not "MIT" if the package is available, + # because the testing package set contains only "MIT" as a valid SPDX license expression. + else: + assert isinstance(spdx_pkg.license_declared, ExtractedLicensingInfo) + assert str(spdx_pkg.license_declared).startswith("LicenseRef-") + assert spdx_pkg.license_declared.extracted_text == bom_pkg.license_declared + extracted_licensing_infos.add(spdx_pkg.license_declared) - assert len([rel for rel in doc.relationships if rel.relationship_type == - RelationshipType.DESCRIBES]) == len(bom_pkgset) + assert len(spdx_pkg.checksums) == 1 + assert spdx_pkg.checksums[0].algorithm == spdx2_checksum_algorithm(list(bom_pkg.checksums.keys())[0]) + assert spdx_pkg.checksums[0].value == list(bom_pkg.checksums.values())[0] - deps_count = sum(len(bom_pkg.depends_on) for bom_pkg in bom_pkgset) - assert len([rel for rel in doc.relationships if rel.relationship_type == - RelationshipType.DEPENDS_ON]) == deps_count + assert len(spdx_pkg.external_references) == 1 + assert spdx_pkg.external_references[0].category == ExternalPackageRefCategory.PACKAGE_MANAGER + assert spdx_pkg.external_references[0].reference_type == "purl" + assert spdx_pkg.external_references[0].locator == bom_pkg.purl() - optional_deps_count = sum(len(bom_pkg.optional_depends_on) for bom_pkg in bom_pkgset) - assert len([rel for rel in doc.relationships if rel.relationship_type == - RelationshipType.OPTIONAL_DEPENDENCY_OF]) == optional_deps_count + assert len([rel for rel in doc.relationships if rel.relationship_type == + RelationshipType.DESCRIBES]) == len(bom_pkgset) + + deps_count = sum(len(bom_pkg.depends_on) for bom_pkg in bom_pkgset) + assert len([rel for rel in doc.relationships if rel.relationship_type == + RelationshipType.DEPENDS_ON]) == deps_count + + optional_deps_count = sum(len(bom_pkg.optional_depends_on) for bom_pkg in bom_pkgset) + assert len([rel for rel in doc.relationships if rel.relationship_type == + RelationshipType.OPTIONAL_DEPENDENCY_OF]) == optional_deps_count + + assert len(extracted_licensing_infos) > 0 + assert sorted(extracted_licensing_infos, key=lambda x: x.license_ref_id) == \ + sorted(doc.extracted_licensing_infos, key=lambda x: x.license_ref_id) diff --git a/test/test.py b/test/test.py index 018bc4e0..fe2eec92 100644 --- a/test/test.py +++ b/test/test.py @@ -10,6 +10,9 @@ import subprocess import sys import tempfile import unittest +from unittest.mock import patch + +import pytest import osbuild.meta from osbuild.objectstore import ObjectStore @@ -496,3 +499,40 @@ class OSBuild(contextlib.AbstractContextManager): "cp", "--reflink=auto", "-a", os.path.join(from_path, "."), to_path ], check=True) + + +class patch_license_expression: + """ + Context manager to patch the license-expression package availability. + + The context manager simulates the unavailability of the license-expression package by mocking the + `get_spdx_licensing()` module-level function. If the package should be made available + and it is available on the system, the function is passed through. Otherwise, pytest.skip() is called. + """ + + PATCH_TARGET = "osbuild.util.sbom.spdx.get_spdx_licensing" + + def __init__(self, make_package_available): + self.make_package_available = make_package_available + self.patcher = None + + def __enter__(self): + get_spdx_licensing = None + try: + # pylint: disable=import-outside-toplevel + from license_expression import get_spdx_licensing + except ImportError: + pass + + if self.make_package_available: + if get_spdx_licensing: + self.patcher = patch(self.PATCH_TARGET, new=get_spdx_licensing) + else: + pytest.skip("The license-expression package is not available.") + else: + # The package is either not available or should be made unavailable, so make sure the function var is None. + self.patcher = patch(self.PATCH_TARGET, new=None) + return self.patcher.start() + + def __exit__(self, exc_type, exc_val, exc_tb): + self.patcher.stop() diff --git a/tox.ini b/tox.ini index 87a5e7ce..e2b862ae 100644 --- a/tox.ini +++ b/tox.ini @@ -19,6 +19,7 @@ deps = iniparse pyyaml toml + license_expression pykickstart # required by pykickstart but not pulled in automatically :/ requests