diff --git a/osbuild/util/sbom/spdx2/__init__.py b/osbuild/util/sbom/spdx2/__init__.py index cc6d3647..a01aee7c 100644 --- a/osbuild/util/sbom/spdx2/__init__.py +++ b/osbuild/util/sbom/spdx2/__init__.py @@ -9,6 +9,7 @@ from .model import ( Document, ExternalPackageRef, ExternalPackageRefCategory, + ExtractedLicensingInfo, NoAssertionValue, NoneValue, Package, @@ -24,6 +25,7 @@ __all__ = [ "CreatorType", "Document", "ExternalPackageRef", + "ExtractedLicensingInfo", "ExternalPackageRefCategory", "NoAssertionValue", "NoneValue", diff --git a/osbuild/util/sbom/spdx2/model.py b/osbuild/util/sbom/spdx2/model.py index 2de5bbbd..b47f1cf4 100644 --- a/osbuild/util/sbom/spdx2/model.py +++ b/osbuild/util/sbom/spdx2/model.py @@ -3,6 +3,7 @@ A base implementation of SPDX 2.3 model, as described on: https://spdx.github.io/spdx-spec/v2.3/ """ +import hashlib import re from datetime import datetime, timezone from enum import Enum, auto @@ -216,6 +217,60 @@ class Checksum(): } +def normalize_name_for_license_id(name: str) -> str: + """ + Normalize a license name to be used within an SPDX license ID. + + The function does the following things: + - Ensures that the returned string contains only letters, numbers, "." and/or "-". + All other characters are replaced with "-". + - Deduplicates consecutive "." and "-" characters. + + See also: + https://spdx.github.io/spdx-spec/v2.3/other-licensing-information-detected/#1011-description: + """ + normalized_name = re.sub(r"[^a-zA-Z0-9.-]", "-", name) + normalized_name = re.sub(r"([.-])\1+", r"\1", normalized_name) + return normalized_name + + +def generate_license_id(extracted_text: str, name: Optional[str] = None) -> str: + """ + Generate a unique SPDX license ID by hashing the extracted text using SHA-256. + + If a license name is provided, include it in the license ID. + """ + extracted_text_hash = hashlib.sha256(extracted_text.encode()).hexdigest() + if name is not None: + return f"LicenseRef-{normalize_name_for_license_id(name)}-{extracted_text_hash}" + return f"LicenseRef-{extracted_text_hash}" + + +class ExtractedLicensingInfo(): + """ + Represents extracted licensing information for a license not on the SPDX License List. + + https://spdx.github.io/spdx-spec/v2.3/other-licensing-information-detected/ + """ + + def __init__(self, extracted_text: str, name: Optional[str] = None) -> None: + self.extracted_text = extracted_text + self.name = name + self.license_ref_id = generate_license_id(self.extracted_text, self.name) + + def __str__(self): + return self.license_ref_id + + def to_dict(self): + d = { + "licenseId": self.license_ref_id, + "extractedText": self.extracted_text, + } + if self.name: + d["name"] = self.name + return d + + # pylint: disable=too-many-instance-attributes class Package(EntityWithSpdxId): """Represents an SPDX package.""" @@ -230,7 +285,7 @@ class Package(EntityWithSpdxId): checksums: Optional[List[Checksum]] = None, homepage: Optional[Union[str, NoAssertionValue, NoneValue]] = None, source_info: Optional[str] = None, - license_declared: Optional[Union[str, NoAssertionValue, NoneValue]] = None, + license_declared: Optional[Union[str, ExtractedLicensingInfo, NoAssertionValue, NoneValue]] = None, summary: Optional[str] = None, description: Optional[str] = None, external_references: Optional[List[ExternalPackageRef]] = None, @@ -324,15 +379,19 @@ class Document(): creation_info: CreationInfo, packages: Optional[List[Package]] = None, relationships: Optional[List[Relationship]] = None, + extracted_licensing_infos: Optional[List[ExtractedLicensingInfo]] = None, ) -> None: self.creation_info = creation_info self.packages = packages or [] self.relationships = relationships or [] + self.extracted_licensing_infos = extracted_licensing_infos or [] def to_dict(self): d = self.creation_info.to_dict() for package in self.packages: d.setdefault("packages", []).append(package.to_dict()) + for extracted_licensing_info in self.extracted_licensing_infos: + d.setdefault("hasExtractedLicensingInfos", []).append(extracted_licensing_info.to_dict()) for relationship in self.relationships: d.setdefault("relationships", []).append(relationship.to_dict()) return d diff --git a/test/mod/test_util_sbom_spdx2_model.py b/test/mod/test_util_sbom_spdx2_model.py index 0c4ec8cf..cfd28af3 100644 --- a/test/mod/test_util_sbom_spdx2_model.py +++ b/test/mod/test_util_sbom_spdx2_model.py @@ -1,3 +1,5 @@ +# pylint: disable=protected-access +import hashlib import json from datetime import datetime @@ -15,12 +17,15 @@ from osbuild.util.sbom.spdx2.model import ( EntityWithSpdxId, ExternalPackageRef, ExternalPackageRefCategory, + ExtractedLicensingInfo, NoAssertionValue, NoneValue, Package, Relationship, RelationshipType, datetime_to_iso8601, + generate_license_id, + normalize_name_for_license_id, ) zoneinfo = pytest.importorskip("zoneinfo") @@ -231,6 +236,72 @@ def test_checksum_to_dict(): } +@pytest.mark.parametrize("name,expected_str", ( + ("MIT", "MIT"), + ("MIT-2.0", "MIT-2.0"), + ("My License", "My-License"), + ("%$MIT)) ((@\",))", "-MIT-"), + ("MIT.and more-12345", "MIT.and-more-12345"), + ("......-----------.......---------", ".-.-"), + ("", "")) +) +def test_normalize_name_for_license_id(name, expected_str): + assert normalize_name_for_license_id(name) == expected_str + + +@pytest.mark.parametrize("extracted_text,name", ( + ("MIT", "test"), + ("MIT", None), + ("MIT and GPLv2", "MIT and GPLv2")) +) +def test_generate_license_id(extracted_text, name): + extracted_text_hash = hashlib.sha256(extracted_text.encode()).hexdigest() + expect = f"LicenseRef-{normalize_name_for_license_id(name)}-{extracted_text_hash}" \ + if name else f"LicenseRef-{extracted_text_hash}" + assert generate_license_id(extracted_text, name) == expect + + +# The __str__ just returns the license ID, which is generated by the _generate_license_id method +# tested above. So, the test case looks the same for now. +@pytest.mark.parametrize("extracted_text,name", ( + ("MIT", "test"), + ("MIT", None), + ("MIT and GPLv2", "MIT and GPLv2")) +) +def test_extracted_licensing_info___str__(extracted_text, name): + extracted_text_hash = hashlib.sha256(extracted_text.encode()).hexdigest() + expect = f"LicenseRef-{normalize_name_for_license_id(name)}-{extracted_text_hash}" \ + if name else f"LicenseRef-{extracted_text_hash}" + assert str(ExtractedLicensingInfo(extracted_text, name)) == expect + + +@pytest.mark.parametrize("test_case", ( + { + "instance_args": { + "extracted_text": "MIT and GPLv2", + }, + "expected": { + "licenseId": "LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13", + "extractedText": "MIT and GPLv2" + } + }, + { + "instance_args": { + "extracted_text": "MIT and GPLv2", + "name": "MIT and GPLv2", + }, + "expected": { + "licenseId": "LicenseRef-MIT-and-GPLv2-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13", + "extractedText": "MIT and GPLv2", + "name": "MIT and GPLv2" + } + } +)) +def test_extracted_licensing_info_to_dict(test_case): + p = ExtractedLicensingInfo(**test_case["instance_args"]) + assert p.to_dict() == test_case["expected"] + + @pytest.mark.parametrize("test_case", ( { "instance_args": { @@ -308,6 +379,58 @@ def test_checksum_to_dict(): ], "builtDate": "2024-11-15T13:33:59Z" } + }, + # Test with ExtractedLicensingInfo + { + "instance_args": { + "spdx_id": "SPDXRef-package-1.2.3", + "name": "package", + "download_location": NoneValue(), + "files_analyzed": False, + "checksums": [ + Checksum(ChecksumAlgorithm.SHA256, "123456") + ], + "version": "1.2.3", + "homepage": "https://example.org/package", + "source_info": "https://example.org/package-1.2.3.src.rpm", + "license_declared": ExtractedLicensingInfo("MIT and GPLv2"), + "summary": "A sample package", + "description": "A sample package description", + "external_references": [ + ExternalPackageRef( + ExternalPackageRefCategory.PACKAGE_MANAGER, + "purl", + "pkg:rpm:/example/package@1.2.3-1?arch=x86_64" + ) + ], + "built_date": datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")) + }, + "expected": { + "SPDXID": "SPDXRef-package-1.2.3", + "name": "package", + "downloadLocation": "NONE", + "filesAnalyzed": False, + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "123456" + } + ], + "versionInfo": "1.2.3", + "homepage": "https://example.org/package", + "sourceInfo": "https://example.org/package-1.2.3.src.rpm", + "licenseDeclared": "LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13", + "summary": "A sample package", + "description": "A sample package description", + "externalRefs": [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": "pkg:rpm:/example/package@1.2.3-1?arch=x86_64" + } + ], + "builtDate": "2024-11-15T13:33:59Z" + } } )) def test_package_to_dict(test_case): @@ -464,6 +587,147 @@ def test_relationship_to_dict(test_case): } ] } + }, + # Test with ExtractedLicensingInfo + { + "instance_args": { + "creation_info": CreationInfo( + "SPDX-2.3", + "SPDXRef-DOCUMENT", + "Sample-Document", + "https://example.com", + [Creator(CreatorType.TOOL, "Sample-Tool-123")], + datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")), + "Public Domain" + ), + "packages": [ + Package( + "SPDXRef-packageA-1.2.3", + "package", + "https://example.org/packageA-1.2.3.rpm" + ), + Package( + "SPDXRef-packageB-3.2.1", + "package", + "https://example.org/packageB-3.2.1.rpm" + ), + Package( + "SPDXRef-package-1.2.3", + "package", + NoneValue(), + "1.2.3", + False, + [Checksum(ChecksumAlgorithm.SHA256, "123456")], + "https://example.org/package", + "https://example.org/package-1.2.3.src.rpm", + ExtractedLicensingInfo("MIT and GPLv2"), + "A sample package", + "A sample package description", + [ + ExternalPackageRef( + ExternalPackageRefCategory.PACKAGE_MANAGER, + "purl", + "pkg:rpm:/example/package@1.2.3-1?arch=x86_64" + ) + ], + datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")) + ), + ], + "relationships": [ + Relationship( + "SPDXRef-DOCUMENT", + RelationshipType.DESCRIBES, + "SPDXRef-packageA-1.2.3" + ), + Relationship( + "SPDXRef-DOCUMENT", + RelationshipType.DESCRIBES, + "SPDXRef-packageB-3.2.1" + ), + Relationship( + "SPDXRef-packageA-1.2.3", + RelationshipType.DEPENDS_ON, + "SPDXRef-packageB-3.2.1" + ) + ], + "extracted_licensing_infos": [ + ExtractedLicensingInfo("MIT and GPLv2") + ] + }, + "expected": { + "spdxVersion": "SPDX-2.3", + "SPDXID": "SPDXRef-DOCUMENT", + "name": "Sample-Document", + "dataLicense": "Public Domain", + "documentNamespace": "https://example.com", + "creationInfo": { + "created": "2024-11-15T13:33:59Z", + "creators": [ + "Tool: Sample-Tool-123" + ] + }, + "packages": [ + { + "SPDXID": "SPDXRef-packageA-1.2.3", + "name": "package", + "downloadLocation": "https://example.org/packageA-1.2.3.rpm" + }, + { + "SPDXID": "SPDXRef-packageB-3.2.1", + "name": "package", + "downloadLocation": "https://example.org/packageB-3.2.1.rpm" + }, + { + "SPDXID": "SPDXRef-package-1.2.3", + "name": "package", + "downloadLocation": "NONE", + "filesAnalyzed": False, + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "123456" + } + ], + "versionInfo": "1.2.3", + "homepage": "https://example.org/package", + "sourceInfo": "https://example.org/package-1.2.3.src.rpm", + "licenseDeclared": "LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13", + "summary": "A sample package", + "description": "A sample package description", + "externalRefs": [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": "pkg:rpm:/example/package@1.2.3-1?arch=x86_64" + } + ], + "builtDate": "2024-11-15T13:33:59Z" + } + ], + "relationships": [ + { + "spdxElementId": "SPDXRef-DOCUMENT", + "relationshipType": "DESCRIBES", + "relatedSpdxElement": "SPDXRef-packageA-1.2.3" + }, + { + "spdxElementId": "SPDXRef-DOCUMENT", + "relationshipType": "DESCRIBES", + "relatedSpdxElement": "SPDXRef-packageB-3.2.1" + }, + { + "spdxElementId": "SPDXRef-packageA-1.2.3", + "relationshipType": "DEPENDS_ON", + "relatedSpdxElement": "SPDXRef-packageB-3.2.1" + } + ], + "hasExtractedLicensingInfos": [ + { + 'extractedText': 'MIT and GPLv2', + 'licenseId': 'LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13', + } + ] + } } )) def test_document_to_dict(test_case):