sbom/spdx2/model: support ExtractedLicensingInfo
Extend the SPDX v2 model to support referencing extracted licensing information, which is either not in the SPDX license list or can't be expressed by the SPDX-compliant license expression. Cover the new functionality by unit tests. Signed-off-by: Tomáš Hozza <thozza@redhat.com>
This commit is contained in:
parent
39bfe3ce2d
commit
aaa6d8ec84
3 changed files with 326 additions and 1 deletions
|
|
@ -9,6 +9,7 @@ from .model import (
|
|||
Document,
|
||||
ExternalPackageRef,
|
||||
ExternalPackageRefCategory,
|
||||
ExtractedLicensingInfo,
|
||||
NoAssertionValue,
|
||||
NoneValue,
|
||||
Package,
|
||||
|
|
@ -24,6 +25,7 @@ __all__ = [
|
|||
"CreatorType",
|
||||
"Document",
|
||||
"ExternalPackageRef",
|
||||
"ExtractedLicensingInfo",
|
||||
"ExternalPackageRefCategory",
|
||||
"NoAssertionValue",
|
||||
"NoneValue",
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ A base implementation of SPDX 2.3 model, as described on:
|
|||
https://spdx.github.io/spdx-spec/v2.3/
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum, auto
|
||||
|
|
@ -216,6 +217,60 @@ class Checksum():
|
|||
}
|
||||
|
||||
|
||||
def normalize_name_for_license_id(name: str) -> str:
|
||||
"""
|
||||
Normalize a license name to be used within an SPDX license ID.
|
||||
|
||||
The function does the following things:
|
||||
- Ensures that the returned string contains only letters, numbers, "." and/or "-".
|
||||
All other characters are replaced with "-".
|
||||
- Deduplicates consecutive "." and "-" characters.
|
||||
|
||||
See also:
|
||||
https://spdx.github.io/spdx-spec/v2.3/other-licensing-information-detected/#1011-description:
|
||||
"""
|
||||
normalized_name = re.sub(r"[^a-zA-Z0-9.-]", "-", name)
|
||||
normalized_name = re.sub(r"([.-])\1+", r"\1", normalized_name)
|
||||
return normalized_name
|
||||
|
||||
|
||||
def generate_license_id(extracted_text: str, name: Optional[str] = None) -> str:
|
||||
"""
|
||||
Generate a unique SPDX license ID by hashing the extracted text using SHA-256.
|
||||
|
||||
If a license name is provided, include it in the license ID.
|
||||
"""
|
||||
extracted_text_hash = hashlib.sha256(extracted_text.encode()).hexdigest()
|
||||
if name is not None:
|
||||
return f"LicenseRef-{normalize_name_for_license_id(name)}-{extracted_text_hash}"
|
||||
return f"LicenseRef-{extracted_text_hash}"
|
||||
|
||||
|
||||
class ExtractedLicensingInfo():
|
||||
"""
|
||||
Represents extracted licensing information for a license not on the SPDX License List.
|
||||
|
||||
https://spdx.github.io/spdx-spec/v2.3/other-licensing-information-detected/
|
||||
"""
|
||||
|
||||
def __init__(self, extracted_text: str, name: Optional[str] = None) -> None:
|
||||
self.extracted_text = extracted_text
|
||||
self.name = name
|
||||
self.license_ref_id = generate_license_id(self.extracted_text, self.name)
|
||||
|
||||
def __str__(self):
|
||||
return self.license_ref_id
|
||||
|
||||
def to_dict(self):
|
||||
d = {
|
||||
"licenseId": self.license_ref_id,
|
||||
"extractedText": self.extracted_text,
|
||||
}
|
||||
if self.name:
|
||||
d["name"] = self.name
|
||||
return d
|
||||
|
||||
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
class Package(EntityWithSpdxId):
|
||||
"""Represents an SPDX package."""
|
||||
|
|
@ -230,7 +285,7 @@ class Package(EntityWithSpdxId):
|
|||
checksums: Optional[List[Checksum]] = None,
|
||||
homepage: Optional[Union[str, NoAssertionValue, NoneValue]] = None,
|
||||
source_info: Optional[str] = None,
|
||||
license_declared: Optional[Union[str, NoAssertionValue, NoneValue]] = None,
|
||||
license_declared: Optional[Union[str, ExtractedLicensingInfo, NoAssertionValue, NoneValue]] = None,
|
||||
summary: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
external_references: Optional[List[ExternalPackageRef]] = None,
|
||||
|
|
@ -324,15 +379,19 @@ class Document():
|
|||
creation_info: CreationInfo,
|
||||
packages: Optional[List[Package]] = None,
|
||||
relationships: Optional[List[Relationship]] = None,
|
||||
extracted_licensing_infos: Optional[List[ExtractedLicensingInfo]] = None,
|
||||
) -> None:
|
||||
self.creation_info = creation_info
|
||||
self.packages = packages or []
|
||||
self.relationships = relationships or []
|
||||
self.extracted_licensing_infos = extracted_licensing_infos or []
|
||||
|
||||
def to_dict(self):
|
||||
d = self.creation_info.to_dict()
|
||||
for package in self.packages:
|
||||
d.setdefault("packages", []).append(package.to_dict())
|
||||
for extracted_licensing_info in self.extracted_licensing_infos:
|
||||
d.setdefault("hasExtractedLicensingInfos", []).append(extracted_licensing_info.to_dict())
|
||||
for relationship in self.relationships:
|
||||
d.setdefault("relationships", []).append(relationship.to_dict())
|
||||
return d
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
# pylint: disable=protected-access
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
|
|
@ -15,12 +17,15 @@ from osbuild.util.sbom.spdx2.model import (
|
|||
EntityWithSpdxId,
|
||||
ExternalPackageRef,
|
||||
ExternalPackageRefCategory,
|
||||
ExtractedLicensingInfo,
|
||||
NoAssertionValue,
|
||||
NoneValue,
|
||||
Package,
|
||||
Relationship,
|
||||
RelationshipType,
|
||||
datetime_to_iso8601,
|
||||
generate_license_id,
|
||||
normalize_name_for_license_id,
|
||||
)
|
||||
|
||||
zoneinfo = pytest.importorskip("zoneinfo")
|
||||
|
|
@ -231,6 +236,72 @@ def test_checksum_to_dict():
|
|||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name,expected_str", (
|
||||
("MIT", "MIT"),
|
||||
("MIT-2.0", "MIT-2.0"),
|
||||
("My License", "My-License"),
|
||||
("%$MIT)) ((@\",))", "-MIT-"),
|
||||
("MIT.and more-12345", "MIT.and-more-12345"),
|
||||
("......-----------.......---------", ".-.-"),
|
||||
("", ""))
|
||||
)
|
||||
def test_normalize_name_for_license_id(name, expected_str):
|
||||
assert normalize_name_for_license_id(name) == expected_str
|
||||
|
||||
|
||||
@pytest.mark.parametrize("extracted_text,name", (
|
||||
("MIT", "test"),
|
||||
("MIT", None),
|
||||
("MIT and GPLv2", "MIT and GPLv2"))
|
||||
)
|
||||
def test_generate_license_id(extracted_text, name):
|
||||
extracted_text_hash = hashlib.sha256(extracted_text.encode()).hexdigest()
|
||||
expect = f"LicenseRef-{normalize_name_for_license_id(name)}-{extracted_text_hash}" \
|
||||
if name else f"LicenseRef-{extracted_text_hash}"
|
||||
assert generate_license_id(extracted_text, name) == expect
|
||||
|
||||
|
||||
# The __str__ just returns the license ID, which is generated by the _generate_license_id method
|
||||
# tested above. So, the test case looks the same for now.
|
||||
@pytest.mark.parametrize("extracted_text,name", (
|
||||
("MIT", "test"),
|
||||
("MIT", None),
|
||||
("MIT and GPLv2", "MIT and GPLv2"))
|
||||
)
|
||||
def test_extracted_licensing_info___str__(extracted_text, name):
|
||||
extracted_text_hash = hashlib.sha256(extracted_text.encode()).hexdigest()
|
||||
expect = f"LicenseRef-{normalize_name_for_license_id(name)}-{extracted_text_hash}" \
|
||||
if name else f"LicenseRef-{extracted_text_hash}"
|
||||
assert str(ExtractedLicensingInfo(extracted_text, name)) == expect
|
||||
|
||||
|
||||
@pytest.mark.parametrize("test_case", (
|
||||
{
|
||||
"instance_args": {
|
||||
"extracted_text": "MIT and GPLv2",
|
||||
},
|
||||
"expected": {
|
||||
"licenseId": "LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13",
|
||||
"extractedText": "MIT and GPLv2"
|
||||
}
|
||||
},
|
||||
{
|
||||
"instance_args": {
|
||||
"extracted_text": "MIT and GPLv2",
|
||||
"name": "MIT and GPLv2",
|
||||
},
|
||||
"expected": {
|
||||
"licenseId": "LicenseRef-MIT-and-GPLv2-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13",
|
||||
"extractedText": "MIT and GPLv2",
|
||||
"name": "MIT and GPLv2"
|
||||
}
|
||||
}
|
||||
))
|
||||
def test_extracted_licensing_info_to_dict(test_case):
|
||||
p = ExtractedLicensingInfo(**test_case["instance_args"])
|
||||
assert p.to_dict() == test_case["expected"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("test_case", (
|
||||
{
|
||||
"instance_args": {
|
||||
|
|
@ -308,6 +379,58 @@ def test_checksum_to_dict():
|
|||
],
|
||||
"builtDate": "2024-11-15T13:33:59Z"
|
||||
}
|
||||
},
|
||||
# Test with ExtractedLicensingInfo
|
||||
{
|
||||
"instance_args": {
|
||||
"spdx_id": "SPDXRef-package-1.2.3",
|
||||
"name": "package",
|
||||
"download_location": NoneValue(),
|
||||
"files_analyzed": False,
|
||||
"checksums": [
|
||||
Checksum(ChecksumAlgorithm.SHA256, "123456")
|
||||
],
|
||||
"version": "1.2.3",
|
||||
"homepage": "https://example.org/package",
|
||||
"source_info": "https://example.org/package-1.2.3.src.rpm",
|
||||
"license_declared": ExtractedLicensingInfo("MIT and GPLv2"),
|
||||
"summary": "A sample package",
|
||||
"description": "A sample package description",
|
||||
"external_references": [
|
||||
ExternalPackageRef(
|
||||
ExternalPackageRefCategory.PACKAGE_MANAGER,
|
||||
"purl",
|
||||
"pkg:rpm:/example/package@1.2.3-1?arch=x86_64"
|
||||
)
|
||||
],
|
||||
"built_date": datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague"))
|
||||
},
|
||||
"expected": {
|
||||
"SPDXID": "SPDXRef-package-1.2.3",
|
||||
"name": "package",
|
||||
"downloadLocation": "NONE",
|
||||
"filesAnalyzed": False,
|
||||
"checksums": [
|
||||
{
|
||||
"algorithm": "SHA256",
|
||||
"checksumValue": "123456"
|
||||
}
|
||||
],
|
||||
"versionInfo": "1.2.3",
|
||||
"homepage": "https://example.org/package",
|
||||
"sourceInfo": "https://example.org/package-1.2.3.src.rpm",
|
||||
"licenseDeclared": "LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13",
|
||||
"summary": "A sample package",
|
||||
"description": "A sample package description",
|
||||
"externalRefs": [
|
||||
{
|
||||
"referenceCategory": "PACKAGE-MANAGER",
|
||||
"referenceType": "purl",
|
||||
"referenceLocator": "pkg:rpm:/example/package@1.2.3-1?arch=x86_64"
|
||||
}
|
||||
],
|
||||
"builtDate": "2024-11-15T13:33:59Z"
|
||||
}
|
||||
}
|
||||
))
|
||||
def test_package_to_dict(test_case):
|
||||
|
|
@ -464,6 +587,147 @@ def test_relationship_to_dict(test_case):
|
|||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
# Test with ExtractedLicensingInfo
|
||||
{
|
||||
"instance_args": {
|
||||
"creation_info": CreationInfo(
|
||||
"SPDX-2.3",
|
||||
"SPDXRef-DOCUMENT",
|
||||
"Sample-Document",
|
||||
"https://example.com",
|
||||
[Creator(CreatorType.TOOL, "Sample-Tool-123")],
|
||||
datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")),
|
||||
"Public Domain"
|
||||
),
|
||||
"packages": [
|
||||
Package(
|
||||
"SPDXRef-packageA-1.2.3",
|
||||
"package",
|
||||
"https://example.org/packageA-1.2.3.rpm"
|
||||
),
|
||||
Package(
|
||||
"SPDXRef-packageB-3.2.1",
|
||||
"package",
|
||||
"https://example.org/packageB-3.2.1.rpm"
|
||||
),
|
||||
Package(
|
||||
"SPDXRef-package-1.2.3",
|
||||
"package",
|
||||
NoneValue(),
|
||||
"1.2.3",
|
||||
False,
|
||||
[Checksum(ChecksumAlgorithm.SHA256, "123456")],
|
||||
"https://example.org/package",
|
||||
"https://example.org/package-1.2.3.src.rpm",
|
||||
ExtractedLicensingInfo("MIT and GPLv2"),
|
||||
"A sample package",
|
||||
"A sample package description",
|
||||
[
|
||||
ExternalPackageRef(
|
||||
ExternalPackageRefCategory.PACKAGE_MANAGER,
|
||||
"purl",
|
||||
"pkg:rpm:/example/package@1.2.3-1?arch=x86_64"
|
||||
)
|
||||
],
|
||||
datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague"))
|
||||
),
|
||||
],
|
||||
"relationships": [
|
||||
Relationship(
|
||||
"SPDXRef-DOCUMENT",
|
||||
RelationshipType.DESCRIBES,
|
||||
"SPDXRef-packageA-1.2.3"
|
||||
),
|
||||
Relationship(
|
||||
"SPDXRef-DOCUMENT",
|
||||
RelationshipType.DESCRIBES,
|
||||
"SPDXRef-packageB-3.2.1"
|
||||
),
|
||||
Relationship(
|
||||
"SPDXRef-packageA-1.2.3",
|
||||
RelationshipType.DEPENDS_ON,
|
||||
"SPDXRef-packageB-3.2.1"
|
||||
)
|
||||
],
|
||||
"extracted_licensing_infos": [
|
||||
ExtractedLicensingInfo("MIT and GPLv2")
|
||||
]
|
||||
},
|
||||
"expected": {
|
||||
"spdxVersion": "SPDX-2.3",
|
||||
"SPDXID": "SPDXRef-DOCUMENT",
|
||||
"name": "Sample-Document",
|
||||
"dataLicense": "Public Domain",
|
||||
"documentNamespace": "https://example.com",
|
||||
"creationInfo": {
|
||||
"created": "2024-11-15T13:33:59Z",
|
||||
"creators": [
|
||||
"Tool: Sample-Tool-123"
|
||||
]
|
||||
},
|
||||
"packages": [
|
||||
{
|
||||
"SPDXID": "SPDXRef-packageA-1.2.3",
|
||||
"name": "package",
|
||||
"downloadLocation": "https://example.org/packageA-1.2.3.rpm"
|
||||
},
|
||||
{
|
||||
"SPDXID": "SPDXRef-packageB-3.2.1",
|
||||
"name": "package",
|
||||
"downloadLocation": "https://example.org/packageB-3.2.1.rpm"
|
||||
},
|
||||
{
|
||||
"SPDXID": "SPDXRef-package-1.2.3",
|
||||
"name": "package",
|
||||
"downloadLocation": "NONE",
|
||||
"filesAnalyzed": False,
|
||||
"checksums": [
|
||||
{
|
||||
"algorithm": "SHA256",
|
||||
"checksumValue": "123456"
|
||||
}
|
||||
],
|
||||
"versionInfo": "1.2.3",
|
||||
"homepage": "https://example.org/package",
|
||||
"sourceInfo": "https://example.org/package-1.2.3.src.rpm",
|
||||
"licenseDeclared": "LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13",
|
||||
"summary": "A sample package",
|
||||
"description": "A sample package description",
|
||||
"externalRefs": [
|
||||
{
|
||||
"referenceCategory": "PACKAGE-MANAGER",
|
||||
"referenceType": "purl",
|
||||
"referenceLocator": "pkg:rpm:/example/package@1.2.3-1?arch=x86_64"
|
||||
}
|
||||
],
|
||||
"builtDate": "2024-11-15T13:33:59Z"
|
||||
}
|
||||
],
|
||||
"relationships": [
|
||||
{
|
||||
"spdxElementId": "SPDXRef-DOCUMENT",
|
||||
"relationshipType": "DESCRIBES",
|
||||
"relatedSpdxElement": "SPDXRef-packageA-1.2.3"
|
||||
},
|
||||
{
|
||||
"spdxElementId": "SPDXRef-DOCUMENT",
|
||||
"relationshipType": "DESCRIBES",
|
||||
"relatedSpdxElement": "SPDXRef-packageB-3.2.1"
|
||||
},
|
||||
{
|
||||
"spdxElementId": "SPDXRef-packageA-1.2.3",
|
||||
"relationshipType": "DEPENDS_ON",
|
||||
"relatedSpdxElement": "SPDXRef-packageB-3.2.1"
|
||||
}
|
||||
],
|
||||
"hasExtractedLicensingInfos": [
|
||||
{
|
||||
'extractedText': 'MIT and GPLv2',
|
||||
'licenseId': 'LicenseRef-7805d4303e817ddd5f86dcf6541af84daac5c5b4a8ad1fb4cd14def8a4ca3d13',
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
))
|
||||
def test_document_to_dict(test_case):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue