sbom/spdx2/model: support ExtractedLicensingInfo

Extend the SPDX v2 model to support referencing extracted licensing
information, which is either not in the SPDX license list or can't be
expressed by the SPDX-compliant license expression.

Cover the new functionality by unit tests.

Signed-off-by: Tomáš Hozza <thozza@redhat.com>
This commit is contained in:
Tomáš Hozza 2024-12-13 16:32:42 +01:00 committed by Tomáš Hozza
parent 39bfe3ce2d
commit aaa6d8ec84
3 changed files with 326 additions and 1 deletions

View file

@ -9,6 +9,7 @@ from .model import (
Document,
ExternalPackageRef,
ExternalPackageRefCategory,
ExtractedLicensingInfo,
NoAssertionValue,
NoneValue,
Package,
@ -24,6 +25,7 @@ __all__ = [
"CreatorType",
"Document",
"ExternalPackageRef",
"ExtractedLicensingInfo",
"ExternalPackageRefCategory",
"NoAssertionValue",
"NoneValue",

View file

@ -3,6 +3,7 @@ A base implementation of SPDX 2.3 model, as described on:
https://spdx.github.io/spdx-spec/v2.3/
"""
import hashlib
import re
from datetime import datetime, timezone
from enum import Enum, auto
@ -216,6 +217,60 @@ class Checksum():
}
def normalize_name_for_license_id(name: str) -> str:
"""
Normalize a license name to be used within an SPDX license ID.
The function does the following things:
- Ensures that the returned string contains only letters, numbers, "." and/or "-".
All other characters are replaced with "-".
- Deduplicates consecutive "." and "-" characters.
See also:
https://spdx.github.io/spdx-spec/v2.3/other-licensing-information-detected/#1011-description:
"""
normalized_name = re.sub(r"[^a-zA-Z0-9.-]", "-", name)
normalized_name = re.sub(r"([.-])\1+", r"\1", normalized_name)
return normalized_name
def generate_license_id(extracted_text: str, name: Optional[str] = None) -> str:
"""
Generate a unique SPDX license ID by hashing the extracted text using SHA-256.
If a license name is provided, include it in the license ID.
"""
extracted_text_hash = hashlib.sha256(extracted_text.encode()).hexdigest()
if name is not None:
return f"LicenseRef-{normalize_name_for_license_id(name)}-{extracted_text_hash}"
return f"LicenseRef-{extracted_text_hash}"
class ExtractedLicensingInfo():
"""
Represents extracted licensing information for a license not on the SPDX License List.
https://spdx.github.io/spdx-spec/v2.3/other-licensing-information-detected/
"""
def __init__(self, extracted_text: str, name: Optional[str] = None) -> None:
self.extracted_text = extracted_text
self.name = name
self.license_ref_id = generate_license_id(self.extracted_text, self.name)
def __str__(self):
return self.license_ref_id
def to_dict(self):
d = {
"licenseId": self.license_ref_id,
"extractedText": self.extracted_text,
}
if self.name:
d["name"] = self.name
return d
# pylint: disable=too-many-instance-attributes
class Package(EntityWithSpdxId):
"""Represents an SPDX package."""
@ -230,7 +285,7 @@ class Package(EntityWithSpdxId):
checksums: Optional[List[Checksum]] = None,
homepage: Optional[Union[str, NoAssertionValue, NoneValue]] = None,
source_info: Optional[str] = None,
license_declared: Optional[Union[str, NoAssertionValue, NoneValue]] = None,
license_declared: Optional[Union[str, ExtractedLicensingInfo, NoAssertionValue, NoneValue]] = None,
summary: Optional[str] = None,
description: Optional[str] = None,
external_references: Optional[List[ExternalPackageRef]] = None,
@ -324,15 +379,19 @@ class Document():
creation_info: CreationInfo,
packages: Optional[List[Package]] = None,
relationships: Optional[List[Relationship]] = None,
extracted_licensing_infos: Optional[List[ExtractedLicensingInfo]] = None,
) -> None:
self.creation_info = creation_info
self.packages = packages or []
self.relationships = relationships or []
self.extracted_licensing_infos = extracted_licensing_infos or []
def to_dict(self):
d = self.creation_info.to_dict()
for package in self.packages:
d.setdefault("packages", []).append(package.to_dict())
for extracted_licensing_info in self.extracted_licensing_infos:
d.setdefault("hasExtractedLicensingInfos", []).append(extracted_licensing_info.to_dict())
for relationship in self.relationships:
d.setdefault("relationships", []).append(relationship.to_dict())
return d

View file

@ -1,3 +1,5 @@
# pylint: disable=protected-access
import hashlib
import json
from datetime import datetime
@ -15,12 +17,15 @@ from osbuild.util.sbom.spdx2.model import (
EntityWithSpdxId,
ExternalPackageRef,
ExternalPackageRefCategory,
ExtractedLicensingInfo,
NoAssertionValue,
NoneValue,
Package,
Relationship,
RelationshipType,
datetime_to_iso8601,
generate_license_id,
normalize_name_for_license_id,
)
zoneinfo = pytest.importorskip("zoneinfo")
@ -231,6 +236,72 @@ def test_checksum_to_dict():
}
@pytest.mark.parametrize("name,expected_str", (
("MIT", "MIT"),
("MIT-2.0", "MIT-2.0"),
("My License", "My-License"),
("%$MIT)) ((@\",))", "-MIT-"),
("MIT.and more-12345", "MIT.and-more-12345"),
("......-----------.......---------", ".-.-"),
("", ""))
)
def test_normalize_name_for_license_id(name, expected_str):
assert normalize_name_for_license_id(name) == expected_str
@pytest.mark.parametrize("extracted_text,name", (
("MIT", "test"),
("MIT", None),
("MIT and GPLv2", "MIT and GPLv2"))
)
def test_generate_license_id(extracted_text, name):
extracted_text_hash = hashlib.sha256(extracted_text.encode()).hexdigest()
expect = f"LicenseRef-{normalize_name_for_license_id(name)}-{extracted_text_hash}" \
if name else f"LicenseRef-{extracted_text_hash}"
assert generate_license_id(extracted_text, name) == expect
# The __str__ just returns the license ID, which is generated by the _generate_license_id method
# tested above. So, the test case looks the same for now.
@pytest.mark.parametrize("extracted_text,name", (
("MIT", "test"),
("MIT", None),
("MIT and GPLv2", "MIT and GPLv2"))
)
def test_extracted_licensing_info___str__(extracted_text, name):
extracted_text_hash = hashlib.sha256(extracted_text.encode()).hexdigest()
expect = f"LicenseRef-{normalize_name_for_license_id(name)}-{extracted_text_hash}" \
if name else f"LicenseRef-{extracted_text_hash}"
assert str(ExtractedLicensingInfo(extracted_text, name)) == expect
@pytest.mark.parametrize("test_case", (
{
"instance_args": {
"extracted_text": "MIT and GPLv2",
},
"expected": {
"licenseId": "LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13",
"extractedText": "MIT and GPLv2"
}
},
{
"instance_args": {
"extracted_text": "MIT and GPLv2",
"name": "MIT and GPLv2",
},
"expected": {
"licenseId": "LicenseRef-MIT-and-GPLv2-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13",
"extractedText": "MIT and GPLv2",
"name": "MIT and GPLv2"
}
}
))
def test_extracted_licensing_info_to_dict(test_case):
p = ExtractedLicensingInfo(**test_case["instance_args"])
assert p.to_dict() == test_case["expected"]
@pytest.mark.parametrize("test_case", (
{
"instance_args": {
@ -308,6 +379,58 @@ def test_checksum_to_dict():
],
"builtDate": "2024-11-15T13:33:59Z"
}
},
# Test with ExtractedLicensingInfo
{
"instance_args": {
"spdx_id": "SPDXRef-package-1.2.3",
"name": "package",
"download_location": NoneValue(),
"files_analyzed": False,
"checksums": [
Checksum(ChecksumAlgorithm.SHA256, "123456")
],
"version": "1.2.3",
"homepage": "https://example.org/package",
"source_info": "https://example.org/package-1.2.3.src.rpm",
"license_declared": ExtractedLicensingInfo("MIT and GPLv2"),
"summary": "A sample package",
"description": "A sample package description",
"external_references": [
ExternalPackageRef(
ExternalPackageRefCategory.PACKAGE_MANAGER,
"purl",
"pkg:rpm:/example/package@1.2.3-1?arch=x86_64"
)
],
"built_date": datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague"))
},
"expected": {
"SPDXID": "SPDXRef-package-1.2.3",
"name": "package",
"downloadLocation": "NONE",
"filesAnalyzed": False,
"checksums": [
{
"algorithm": "SHA256",
"checksumValue": "123456"
}
],
"versionInfo": "1.2.3",
"homepage": "https://example.org/package",
"sourceInfo": "https://example.org/package-1.2.3.src.rpm",
"licenseDeclared": "LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13",
"summary": "A sample package",
"description": "A sample package description",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": "pkg:rpm:/example/package@1.2.3-1?arch=x86_64"
}
],
"builtDate": "2024-11-15T13:33:59Z"
}
}
))
def test_package_to_dict(test_case):
@ -464,6 +587,147 @@ def test_relationship_to_dict(test_case):
}
]
}
},
# Test with ExtractedLicensingInfo
{
"instance_args": {
"creation_info": CreationInfo(
"SPDX-2.3",
"SPDXRef-DOCUMENT",
"Sample-Document",
"https://example.com",
[Creator(CreatorType.TOOL, "Sample-Tool-123")],
datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")),
"Public Domain"
),
"packages": [
Package(
"SPDXRef-packageA-1.2.3",
"package",
"https://example.org/packageA-1.2.3.rpm"
),
Package(
"SPDXRef-packageB-3.2.1",
"package",
"https://example.org/packageB-3.2.1.rpm"
),
Package(
"SPDXRef-package-1.2.3",
"package",
NoneValue(),
"1.2.3",
False,
[Checksum(ChecksumAlgorithm.SHA256, "123456")],
"https://example.org/package",
"https://example.org/package-1.2.3.src.rpm",
ExtractedLicensingInfo("MIT and GPLv2"),
"A sample package",
"A sample package description",
[
ExternalPackageRef(
ExternalPackageRefCategory.PACKAGE_MANAGER,
"purl",
"pkg:rpm:/example/package@1.2.3-1?arch=x86_64"
)
],
datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague"))
),
],
"relationships": [
Relationship(
"SPDXRef-DOCUMENT",
RelationshipType.DESCRIBES,
"SPDXRef-packageA-1.2.3"
),
Relationship(
"SPDXRef-DOCUMENT",
RelationshipType.DESCRIBES,
"SPDXRef-packageB-3.2.1"
),
Relationship(
"SPDXRef-packageA-1.2.3",
RelationshipType.DEPENDS_ON,
"SPDXRef-packageB-3.2.1"
)
],
"extracted_licensing_infos": [
ExtractedLicensingInfo("MIT and GPLv2")
]
},
"expected": {
"spdxVersion": "SPDX-2.3",
"SPDXID": "SPDXRef-DOCUMENT",
"name": "Sample-Document",
"dataLicense": "Public Domain",
"documentNamespace": "https://example.com",
"creationInfo": {
"created": "2024-11-15T13:33:59Z",
"creators": [
"Tool: Sample-Tool-123"
]
},
"packages": [
{
"SPDXID": "SPDXRef-packageA-1.2.3",
"name": "package",
"downloadLocation": "https://example.org/packageA-1.2.3.rpm"
},
{
"SPDXID": "SPDXRef-packageB-3.2.1",
"name": "package",
"downloadLocation": "https://example.org/packageB-3.2.1.rpm"
},
{
"SPDXID": "SPDXRef-package-1.2.3",
"name": "package",
"downloadLocation": "NONE",
"filesAnalyzed": False,
"checksums": [
{
"algorithm": "SHA256",
"checksumValue": "123456"
}
],
"versionInfo": "1.2.3",
"homepage": "https://example.org/package",
"sourceInfo": "https://example.org/package-1.2.3.src.rpm",
"licenseDeclared": "LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13",
"summary": "A sample package",
"description": "A sample package description",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": "pkg:rpm:/example/package@1.2.3-1?arch=x86_64"
}
],
"builtDate": "2024-11-15T13:33:59Z"
}
],
"relationships": [
{
"spdxElementId": "SPDXRef-DOCUMENT",
"relationshipType": "DESCRIBES",
"relatedSpdxElement": "SPDXRef-packageA-1.2.3"
},
{
"spdxElementId": "SPDXRef-DOCUMENT",
"relationshipType": "DESCRIBES",
"relatedSpdxElement": "SPDXRef-packageB-3.2.1"
},
{
"spdxElementId": "SPDXRef-packageA-1.2.3",
"relationshipType": "DEPENDS_ON",
"relatedSpdxElement": "SPDXRef-packageB-3.2.1"
}
],
"hasExtractedLicensingInfos": [
{
'extractedText': 'MIT and GPLv2',
'licenseId': 'LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13',
}
]
}
}
))
def test_document_to_dict(test_case):