Add initial SBOM library implementation

Add implementation of standard-agnostic model for SBOM, and simple SPDX
v2.3 model. Also add convenience functions for converting DNF4 package
set to the standard-agnostic model and for converting it to SPDX model.

Cover the functionality with unit tests.

Signed-off-by: Tomáš Hozza <thozza@redhat.com>
This commit is contained in:
Tomáš Hozza 2024-06-26 13:22:00 +02:00 committed by Simon de Vlieger
parent 75b6fb4abe
commit 0b68f8123b
11 changed files with 1436 additions and 1 deletions

View file

@ -0,0 +1 @@
"""Module for working with Software Bill of Materials (SBOM) files."""

106
osbuild/util/sbom/dnf.py Normal file
View file

@ -0,0 +1,106 @@
from datetime import datetime
from typing import Dict, List
import dnf
import hawkey
import osbuild.util.sbom.model as sbom_model
def bom_chksum_algorithm_from_hawkey(chksum_type: int) -> sbom_model.ChecksumAlgorithm:
"""
Convert a hawkey checksum type number to an SBOM checksum algorithm.
"""
if chksum_type == hawkey.CHKSUM_MD5:
return sbom_model.ChecksumAlgorithm.MD5
if chksum_type == hawkey.CHKSUM_SHA1:
return sbom_model.ChecksumAlgorithm.SHA1
if chksum_type == hawkey.CHKSUM_SHA256:
return sbom_model.ChecksumAlgorithm.SHA256
if chksum_type == hawkey.CHKSUM_SHA384:
return sbom_model.ChecksumAlgorithm.SHA384
if chksum_type == hawkey.CHKSUM_SHA512:
return sbom_model.ChecksumAlgorithm.SHA512
raise ValueError(f"Unknown Hawkey checksum type: {chksum_type}")
# pylint: disable=too-many-branches
def dnf_pkgset_to_sbom_pkgset(dnf_pkgset: List[dnf.package.Package]) -> List[sbom_model.BasePackage]:
"""
Convert a dnf package set to a SBOM package set.
"""
pkgs_by_name = {}
pkgs_by_provides: Dict[str, List[sbom_model.BasePackage]] = {}
for dnf_pkg in dnf_pkgset:
pkg = sbom_model.RPMPackage(
name=dnf_pkg.name,
version=dnf_pkg.version,
release=dnf_pkg.release,
architecture=dnf_pkg.arch,
epoch=dnf_pkg.epoch,
license_declared=dnf_pkg.license,
vendor=dnf_pkg.vendor,
build_date=datetime.fromtimestamp(dnf_pkg.buildtime),
summary=dnf_pkg.summary,
description=dnf_pkg.description,
source_rpm=dnf_pkg.sourcerpm,
homepage=dnf_pkg.url,
)
if dnf_pkg.chksum:
pkg.checksums = {
bom_chksum_algorithm_from_hawkey(dnf_pkg.chksum[0]): dnf_pkg.chksum[1].hex()
}
if dnf_pkg.remote_location():
pkg.download_url = dnf_pkg.remote_location()
# if dnf_pkg.from_repo is empty, the pkg is not installed. determine from remote_location
# if dnf_pkg.from_repo is "@commanddline", the pkg was installed from the command line, there is no repo URL
# if dnf_pkg.reponame is "@System", the package is installed and there is no repo URL
# if dnf_pkg.from_repo is a string with repo ID, determine the repo URL from the repo configuration
if not dnf_pkg.from_repo and dnf_pkg.remote_location():
pkg.repository_url = dnf_pkg.remote_location()[:-len("/" + dnf_pkg.relativepath)]
elif dnf_pkg.from_repo != "@commandline" and dnf_pkg.reponame != "@System":
repo_url = ""
if dnf_pkg.repo.baseurl:
repo_url = dnf_pkg.repo.baseurl
elif dnf_pkg.repo.metalink:
repo_url = dnf_pkg.repo.metalink
elif dnf_pkg.repo.mirrorlist:
repo_url = dnf_pkg.repo.mirrorlist
pkg.repository_url = repo_url
pkg.rpm_provides = [sbom_model.RPMDependency(r.name, r.relation, r.version) for r in dnf_pkg.provides]
pkg.rpm_requires = [sbom_model.RPMDependency(r.name, r.relation, r.version) for r in dnf_pkg.requires]
pkg.rpm_recommends = [sbom_model.RPMDependency(r.name, r.relation, r.version) for r in dnf_pkg.recommends]
pkg.rpm_suggests = [sbom_model.RPMDependency(r.name, r.relation, r.version) for r in dnf_pkg.suggests]
# The dnf_pkgset is not sorted by package dependencies. We need to determine relationships in two steps:
# 1. Collect all packages that provide a certain capability
# 2. Resolve dependencies for each package using previously constructed list of capabilities by package.
# Doing this in two steps ensures that all soft dependencies satisfied by a package from the same set are
# resolved.
for provide in pkg.rpm_provides:
pkgs_by_provides.setdefault(provide.name, []).append(pkg)
# Packages can also depend directly on files provided by other packages. Collect these as well.
for provided_file in dnf_pkg.files:
pkgs_by_provides.setdefault(provided_file, []).append(pkg)
pkgs_by_name[pkg.name] = pkg
for pkg in pkgs_by_name.values():
for require in pkg.rpm_requires:
# skip conditional dependencies if the required package is not in the set
# "relation" contains whitespace on both sides
if require.relation.strip() == "if" and pkgs_by_name.get(require.version) is None:
continue
for provider_pkg in pkgs_by_provides.get(require.name, []):
pkg.depends_on.add(provider_pkg)
for soft_dep in pkg.rpm_recommends + pkg.rpm_suggests:
for provider_pkg in pkgs_by_provides.get(soft_dep.name, []):
pkg.optional_depends_on.add(provider_pkg)
return list(pkgs_by_name.values())

185
osbuild/util/sbom/model.py Normal file
View file

@ -0,0 +1,185 @@
"""Defines standard-agnostic data model for an SBOM."""
import abc
import urllib.parse
import uuid
from datetime import datetime
from enum import Enum, auto
from typing import Dict, List, Optional, Set
class ChecksumAlgorithm(Enum):
SHA1 = auto()
SHA224 = auto()
SHA256 = auto()
SHA384 = auto()
SHA512 = auto()
MD5 = auto()
class BasePackage(abc.ABC):
"""Represents a software package."""
# pylint: disable=too-many-instance-attributes
def __init__(
self,
name: str,
version: str,
filename: str = "",
license_declared: str = "",
vendor: str = "",
checksums: Optional[Dict[ChecksumAlgorithm, str]] = None,
homepage: str = "",
download_url: str = "",
build_date: Optional[datetime] = None,
summary: str = "",
description: str = "",
depends_on: Optional[Set["BasePackage"]] = None,
optional_depends_on: Optional[Set["BasePackage"]] = None,
) -> None:
self.name = name
self.version = version
self.filename = filename
self.license_declared = license_declared
self.vendor = vendor
self.checksums = checksums or {}
self.homepage = homepage
self.download_url = download_url
self.build_date = build_date
self.summary = summary
self.description = description
self.depends_on = depends_on or set()
self.optional_depends_on = optional_depends_on or set()
@abc.abstractmethod
def uuid(self) -> str:
"""
Returns a stable UUID for the package.
"""
@abc.abstractmethod
def source_info(self) -> str:
"""
Return a string describing the source of the package.
"""
@abc.abstractmethod
def purl(self) -> str:
"""
Return a Package URL for the package.
The PURL format is:
pkg:<type>/<namespace>/<name>@<version>?<qualifiers>#<subpath>
Core PURL spec is defined at:
https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst
"""
class RPMDependency:
"""Represents an RPM dependency or provided capability."""
def __init__(self, name: str, relation: str = "", version: str = "") -> None:
self.name = name
self.relation = relation
self.version = version
def __str__(self) -> str:
return f"{self.name} {self.relation} {self.version}"
class RPMPackage(BasePackage):
"""Represents an RPM package."""
def __init__(
self,
name: str,
version: str,
release: str,
architecture: str,
epoch: int = 0,
filename: str = "",
license_declared: str = "",
vendor: str = "",
checksums: Optional[Dict[ChecksumAlgorithm, str]] = None,
homepage: str = "",
download_url: str = "",
build_date: Optional[datetime] = None,
summary: str = "",
description: str = "",
depends_on: Optional[Set["BasePackage"]] = None,
optional_depends_on: Optional[Set["BasePackage"]] = None,
repository_url: str = "",
source_rpm: str = "",
rpm_provides: Optional[List[RPMDependency]] = None,
rpm_requires: Optional[List[RPMDependency]] = None,
rpm_recommends: Optional[List[RPMDependency]] = None,
rpm_suggests: Optional[List[RPMDependency]] = None,
) -> None:
super().__init__(
name,
version,
filename,
license_declared,
vendor,
checksums,
homepage,
download_url,
build_date,
summary,
description,
depends_on,
optional_depends_on,
)
self.release = release
self.architecture = architecture
self.epoch = epoch
self.repository_url = repository_url
self.source_rpm = source_rpm
self.rpm_provides = rpm_provides or []
self.rpm_requires = rpm_requires or []
self.rpm_recommends = rpm_recommends or []
self.rpm_suggests = rpm_suggests or []
def source_info(self) -> str:
"""
Return a string describing the source of the RPM package.
"""
if self.source_rpm:
return f"Source RPM: {self.source_rpm}"
return ""
def uuid(self) -> str:
"""
Returns a stable UUID for the same RPM package as defined by the PURL.
"""
return str(uuid.uuid3(uuid.NAMESPACE_URL, self._purl(with_repo_url=False)))
def _purl(self, with_repo_url=True) -> str:
"""
Return a Package URL for the RPM package.
Optionally don't include the repository URL in the PURL. This is useful
to generate a PURL that can be used to identify the same package, regardless
of the repository it was found in.
PURL spec for RPMs is defined at:
https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst#rpm
"""
namespace = ""
if self.vendor:
namespace = f"{urllib.parse.quote(self.vendor.lower())}/"
purl = f"pkg:rpm/{namespace}{self.name}@{self.version}-{self.release}?arch={self.architecture}"
if self.epoch:
purl += f"&epoch={self.epoch}"
if with_repo_url and self.repository_url:
# https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#character-encoding
purl += f"&repository_url={urllib.parse.quote(self.repository_url, safe='/:=')}"
return purl
def purl(self) -> str:
return self._purl()

123
osbuild/util/sbom/spdx.py Normal file
View file

@ -0,0 +1,123 @@
from datetime import datetime
from typing import List, Union
from uuid import uuid4
import osbuild
import osbuild.util.sbom.model as sbom_model
import osbuild.util.sbom.spdx2 as spdx2
def spdx2_checksum_algorithm(algorithm: sbom_model.ChecksumAlgorithm) -> spdx2.ChecksumAlgorithm:
if algorithm == sbom_model.ChecksumAlgorithm.SHA1:
return spdx2.ChecksumAlgorithm.SHA1
if algorithm == sbom_model.ChecksumAlgorithm.SHA224:
return spdx2.ChecksumAlgorithm.SHA224
if algorithm == sbom_model.ChecksumAlgorithm.SHA256:
return spdx2.ChecksumAlgorithm.SHA256
if algorithm == sbom_model.ChecksumAlgorithm.SHA384:
return spdx2.ChecksumAlgorithm.SHA384
if algorithm == sbom_model.ChecksumAlgorithm.SHA512:
return spdx2.ChecksumAlgorithm.SHA512
if algorithm == sbom_model.ChecksumAlgorithm.MD5:
return spdx2.ChecksumAlgorithm.MD5
raise ValueError(f"Unknown checksum algorithm: {algorithm}")
def create_spdx2_document():
tool = f"osbuild-{osbuild.__version__}"
doc_name = f"sbom-by-{tool}"
ci = spdx2.CreationInfo(
spdx_version="SPDX-2.3",
spdx_id="SPDXRef-DOCUMENT",
name=doc_name,
data_license="CC0-1.0",
document_namespace=f"https://osbuild.org/spdxdocs/{doc_name}-{uuid4()}",
creators=[spdx2.Creator(spdx2.CreatorType.TOOL, tool)],
created=datetime.now(),
)
doc = spdx2.Document(ci)
return doc
def bom_pkgset_to_spdx2_doc(pkgset: List[sbom_model.BasePackage]) -> spdx2.Document:
doc = create_spdx2_document()
relationships = []
for pkg in pkgset:
download_location: Union[str, spdx2.NoAssertionValue] = spdx2.NoAssertionValue()
if pkg.download_url:
download_location = pkg.download_url
p = spdx2.Package(
spdx_id=f"SPDXRef-{pkg.uuid()}",
name=pkg.name,
download_location=download_location,
version=pkg.version,
files_analyzed=False,
license_declared=pkg.license_declared,
external_references=[
spdx2.ExternalPackageRef(
category=spdx2.ExternalPackageRefCategory.PACKAGE_MANAGER,
reference_type="purl",
locator=pkg.purl(),
)
]
)
if pkg.homepage:
p.homepage = pkg.homepage
if pkg.summary:
p.summary = pkg.summary
if pkg.description:
p.description = pkg.description
if pkg.source_info():
p.source_info = pkg.source_info()
for hash_type, hash_value in pkg.checksums.items():
p.checksums.append(
spdx2.Checksum(
algorithm=spdx2_checksum_algorithm(hash_type),
value=hash_value,
)
)
if pkg.build_date:
p.built_date = pkg.build_date
doc.packages.append(p)
relationships.append(
spdx2.Relationship(
spdx_element_id=doc.creation_info.spdx_id,
relationship_type=spdx2.RelationshipType.DESCRIBES,
related_spdx_element_id=p.spdx_id,
)
)
for dep in sorted(pkg.depends_on, key=lambda x: x.uuid()):
relationships.append(
spdx2.Relationship(
spdx_element_id=p.spdx_id,
relationship_type=spdx2.RelationshipType.DEPENDS_ON,
related_spdx_element_id=f"SPDXRef-{dep.uuid()}",
)
)
for optional_dep in sorted(pkg.optional_depends_on, key=lambda x: x.uuid()):
relationships.append(
spdx2.Relationship(
spdx_element_id=f"SPDXRef-{optional_dep.uuid()}",
relationship_type=spdx2.RelationshipType.OPTIONAL_DEPENDENCY_OF,
related_spdx_element_id=p.spdx_id,
)
)
doc.relationships = relationships
return doc

View file

@ -0,0 +1,33 @@
"""Module for creating SPDX spec v2 Software Bill of Materials (SBOM) files."""
from .model import (
Checksum,
ChecksumAlgorithm,
CreationInfo,
Creator,
CreatorType,
Document,
ExternalPackageRef,
ExternalPackageRefCategory,
NoAssertionValue,
NoneValue,
Package,
Relationship,
RelationshipType,
)
__all__ = [
"Checksum",
"ChecksumAlgorithm",
"CreationInfo",
"Creator",
"CreatorType",
"Document",
"ExternalPackageRef",
"ExternalPackageRefCategory",
"NoAssertionValue",
"NoneValue",
"Package",
"Relationship",
"RelationshipType"
]

View file

@ -0,0 +1,338 @@
"""
A base implementation of SPDX 2.3 model, as described on:
https://spdx.github.io/spdx-spec/v2.3/
"""
import re
from datetime import datetime, timezone
from enum import Enum, auto
from typing import Dict, List, Optional, Union
class CreatorType(Enum):
"""Enumeration of SPDX actor types."""
PERSON = auto()
ORGANIZATION = auto()
TOOL = auto()
def __str__(self) -> str:
return self.name.capitalize()
class Creator():
"""Represents a Creator in SPDX."""
def __init__(self, creator_type: CreatorType, name: str, email: Optional[str] = None) -> None:
self.creator_type = creator_type
self.name = name
self.email = email
def __str__(self):
email_str = f" ({self.email})" if self.email else ""
return f"{self.creator_type}: {self.name}{email_str}"
class EntityWithSpdxId():
"""
Represents an SPDX entity with an SPDX ID.
https://spdx.github.io/spdx-spec/v2.3/package-information/#72-package-spdx-identifier-field
"""
def __init__(self, spdx_id: str) -> None:
id_regex = re.compile(r"^SPDXRef-[a-zA-Z0-9\.\-]+$")
if not id_regex.match(spdx_id):
raise ValueError(f"Invalid SPDX ID '{spdx_id}'")
self.spdx_id = spdx_id
def datetime_to_iso8601(dt: datetime) -> str:
"""
Converts a datetime object to an SPDX-compliant ISO8601 string.
This means that:
- The timezone is UTC
- The microsecond part is removed
https://spdx.github.io/spdx-spec/v2.3/document-creation-information/#69-created-field
"""
date = dt.astimezone(timezone.utc)
date = date.replace(tzinfo=None)
# Microseconds are not supported by SPDX
date = date.replace(microsecond=0)
return date.isoformat() + "Z"
class CreationInfo(EntityWithSpdxId):
"""
Represents SPDX creation information.
https://spdx.github.io/spdx-spec/v2.3/document-creation-information/
"""
def __init__(
self,
spdx_version: str,
spdx_id: str,
name: str,
document_namespace: str,
creators: List[Creator],
created: datetime,
data_license: str = "CC0-1.0",
) -> None:
super().__init__(spdx_id)
if not spdx_version.startswith("SPDX-"):
raise ValueError(f"Invalid SPDX version '{spdx_version}'")
if spdx_id != "SPDXRef-DOCUMENT":
raise ValueError(f"Invalid SPDX ID '{spdx_id}'")
self.spdx_version = spdx_version
self.name = name
self.data_license = data_license
self.document_namespace = document_namespace
self.creators = creators
self.created = created
def to_dict(self):
return {
"SPDXID": self.spdx_id,
"creationInfo": {
"created": datetime_to_iso8601(self.created),
"creators": [str(creator) for creator in self.creators],
},
"dataLicense": self.data_license,
"name": self.name,
"spdxVersion": self.spdx_version,
"documentNamespace": self.document_namespace,
}
class NoAssertionValue():
"""Represents the SPDX No Assertion value."""
VALUE = "NOASSERTION"
def __str__(self):
return self.VALUE
class NoneValue():
"""Represents the SPDX None value."""
VALUE = "NONE"
def __str__(self):
return self.VALUE
class ExternalPackageRefCategory(Enum):
"""Enumeration of external package reference categories."""
SECURITY = auto()
PACKAGE_MANAGER = auto()
PERSISTENT_ID = auto()
OTHER = auto()
def __str__(self) -> str:
return self.name.replace("_", "-")
CATEGORY_TO_REPOSITORY_TYPE: Dict[ExternalPackageRefCategory, List[str]] = {
ExternalPackageRefCategory.SECURITY: ["cpe22Type", "cpe23Type", "advisory", "fix", "url", "swid"],
ExternalPackageRefCategory.PACKAGE_MANAGER: ["maven-central", "nuget", "bower", "purl"],
ExternalPackageRefCategory.PERSISTENT_ID: ["swh", "gitoid"],
ExternalPackageRefCategory.OTHER: [],
}
class ExternalPackageRef():
"""
Represents an external package reference.
https://spdx.github.io/spdx-spec/v2.3/package-information/#721-external-reference-field
"""
def __init__(self, category: ExternalPackageRefCategory, reference_type: str, locator: str) -> None:
if len(CATEGORY_TO_REPOSITORY_TYPE[category]
) > 0 and reference_type not in CATEGORY_TO_REPOSITORY_TYPE[category]:
raise ValueError(f"Invalid repository type '{reference_type}' for category '{category}'")
self.category = category
self.reference_type = reference_type
self.locator = locator
def to_dict(self):
return {
"referenceCategory": str(self.category),
"referenceType": self.reference_type,
"referenceLocator": self.locator,
}
class ChecksumAlgorithm(Enum):
"""Enumeration of SPDX checksum algorithms."""
SHA1 = auto()
SHA224 = auto()
SHA256 = auto()
SHA384 = auto()
SHA512 = auto()
SHA3_256 = auto()
SHA3_384 = auto()
SHA3_512 = auto()
BLAKE2b_256 = auto()
BLAKE2b_384 = auto()
BLAKE2b_512 = auto()
BLAKE3 = auto()
MD2 = auto()
MD4 = auto()
MD5 = auto()
MD6 = auto()
ADLER32 = auto()
def __str__(self) -> str:
return self.name.replace("_", "-")
class Checksum():
"""
Represents a checksum.
https://spdx.github.io/spdx-spec/v2.3/package-information/#72-checksum-fields
"""
def __init__(self, algorithm: ChecksumAlgorithm, value: str) -> None:
self.algorithm = algorithm
self.value = value
def to_dict(self):
return {
"algorithm": str(self.algorithm),
"checksumValue": self.value,
}
# pylint: disable=too-many-instance-attributes
class Package(EntityWithSpdxId):
"""Represents an SPDX package."""
def __init__(
self,
spdx_id: str,
name: str,
download_location: Union[str, NoAssertionValue, NoneValue],
version: Optional[str] = None,
files_analyzed: Optional[bool] = None,
checksums: Optional[List[Checksum]] = None,
homepage: Optional[Union[str, NoAssertionValue, NoneValue]] = None,
source_info: Optional[str] = None,
license_declared: Optional[Union[str, NoAssertionValue, NoneValue]] = None,
summary: Optional[str] = None,
description: Optional[str] = None,
external_references: Optional[List[ExternalPackageRef]] = None,
built_date: Optional[datetime] = None,
) -> None:
super().__init__(spdx_id)
self.name = name
self.download_location = download_location
self.version = version
self.files_analyzed = files_analyzed
self.checksums = checksums or []
self.homepage = homepage
self.source_info = source_info
self.license_declared = license_declared
self.summary = summary
self.description = description
self.external_references = external_references or []
self.built_date = built_date
def to_dict(self):
d = {
"SPDXID": self.spdx_id,
"name": self.name,
"downloadLocation": str(self.download_location)
}
if self.files_analyzed is not None:
d["filesAnalyzed"] = self.files_analyzed
if self.version:
d["versionInfo"] = self.version
if self.checksums:
d["checksums"] = [checksum.to_dict() for checksum in self.checksums]
if self.homepage:
d["homepage"] = str(self.homepage)
if self.source_info:
d["sourceInfo"] = self.source_info
if self.license_declared:
d["licenseDeclared"] = str(self.license_declared)
if self.summary:
d["summary"] = self.summary
if self.description:
d["description"] = self.description
if self.external_references:
d["externalRefs"] = [ref.to_dict() for ref in self.external_references]
if self.built_date:
d["builtDate"] = datetime_to_iso8601(self.built_date)
return d
class RelationshipType(Enum):
"""Enumeration of SPDX relationship types."""
DESCRIBES = auto()
DEPENDS_ON = auto()
OPTIONAL_DEPENDENCY_OF = auto()
def __str__(self) -> str:
return self.name
class Relationship():
"""Represents a relationship between SPDX elements."""
def __init__(
self,
spdx_element_id: str,
relationship_type: RelationshipType,
related_spdx_element_id: Union[str, NoneValue, NoAssertionValue],
comment: Optional[str] = None,
) -> None:
self.spdx_element_id = spdx_element_id
self.relationship_type = relationship_type
self.related_spdx_element_id = related_spdx_element_id
self.comment = comment
def to_dict(self):
d = {
"spdxElementId": self.spdx_element_id,
"relationshipType": str(self.relationship_type),
"relatedSpdxElement": str(self.related_spdx_element_id),
}
if self.comment:
d["comment"] = self.comment
return d
class Document():
"""Represents an SPDX document."""
def __init__(
self,
creation_info: CreationInfo,
packages: Optional[List[Package]] = None,
relationships: Optional[List[Relationship]] = None,
) -> None:
self.creation_info = creation_info
self.packages = packages or []
self.relationships = relationships or []
def to_dict(self):
d = self.creation_info.to_dict()
for package in self.packages:
d.setdefault("packages", []).append(package.to_dict())
for relationship in self.relationships:
d.setdefault("relationships", []).append(relationship.to_dict())
return d

View file

@ -4,7 +4,14 @@ setuptools.setup(
name="osbuild",
version="129",
description="A build system for OS images",
packages=["osbuild", "osbuild.formats", "osbuild.util","osbuild.solver"],
packages=[
"osbuild",
"osbuild.formats",
"osbuild.solver",
"osbuild.util",
"osbuild.util.sbom",
"osbuild.util.sbom.spdx2",
],
license='Apache-2.0',
install_requires=[
"jsonschema",

View file

@ -0,0 +1,48 @@
import os
from datetime import datetime
import pytest
testutil_dnf4 = pytest.importorskip("osbuild.testutil.dnf4")
bom_dnf = pytest.importorskip("osbuild.util.bom.dnf")
def test_dnf_pkgset_to_sbom_pkgset():
dnf_pkgset = testutil_dnf4.depsolve_pkgset([os.path.abspath("./test/data/testrepos/baseos")], ["bash"])
bom_pkgset = bom_dnf.dnf_pkgset_to_sbom_pkgset(dnf_pkgset)
assert len(bom_pkgset) == len(dnf_pkgset)
for bom_pkg, dnf_pkg in zip(bom_pkgset, dnf_pkgset):
assert bom_pkg.name == dnf_pkg.name
assert bom_pkg.version == dnf_pkg.version
assert bom_pkg.release == dnf_pkg.release
assert bom_pkg.architecture == dnf_pkg.arch
assert bom_pkg.epoch == dnf_pkg.epoch
assert bom_pkg.license_declared == dnf_pkg.license
assert bom_pkg.vendor == dnf_pkg.vendor
assert bom_pkg.build_date == datetime.fromtimestamp(dnf_pkg.buildtime)
assert bom_pkg.summary == dnf_pkg.summary
assert bom_pkg.description == dnf_pkg.description
assert bom_pkg.source_rpm == dnf_pkg.sourcerpm
assert bom_pkg.homepage == dnf_pkg.url
assert bom_pkg.checksums == {
bom_dnf.bom_chksum_algorithm_from_hawkey(dnf_pkg.chksum[0]): dnf_pkg.chksum[1].hex()
}
assert bom_pkg.download_url == dnf_pkg.remote_location()
assert bom_pkg.repository_url == dnf_pkg.remote_location()[:-len("/" + dnf_pkg.relativepath)]
assert [dep.name for dep in bom_pkg.rpm_provides] == [dep.name for dep in dnf_pkg.provides]
assert [dep.name for dep in bom_pkg.rpm_requires] == [dep.name for dep in dnf_pkg.requires]
assert [dep.name for dep in bom_pkg.rpm_recommends] == [dep.name for dep in dnf_pkg.recommends]
assert [dep.name for dep in bom_pkg.rpm_suggests] == [dep.name for dep in dnf_pkg.suggests]
# smoke test the inter-package relationships on bash
bash = [pkg for pkg in bom_pkgset if pkg.name == "bash"][0]
assert len(bash.depends_on) == 3
assert sorted(
bash.depends_on,
key=lambda x: x.name) == sorted(
[pkg for pkg in bom_pkgset if pkg.name in ["filesystem", "glibc", "ncurses-libs"]],
key=lambda x: x.name)
assert len(bash.optional_depends_on) == 0

View file

@ -0,0 +1,49 @@
import pytest
from osbuild.util.sbom.model import RPMPackage
def test_rpmpackage_uuid():
pkg_a = RPMPackage("PackageA", "1.0.0", "1.fc40", "x86_64")
pkg_a_duplicate = RPMPackage("PackageA", "1.0.0", "1.fc40", "x86_64")
pkg_a_v2 = RPMPackage("PackageA", "2.0.0", "1.fc40", "x86_64")
pkg_a_fc41 = RPMPackage("PackageA", "1.0.0", "1.fc41", "x86_64")
pkg_a_aarch64 = RPMPackage("PackageA", "1.0.0", "1.fc40", "aarch64")
pkg_b = RPMPackage("PackageB", "1.0.0", "1.fc40", "x86_64")
assert pkg_a.uuid() == pkg_a_duplicate.uuid()
for pkg in [pkg_a_v2, pkg_a_fc41, pkg_a_aarch64, pkg_b]:
assert pkg_a.uuid() != pkg.uuid()
@pytest.mark.parametrize("package,purl", (
(
RPMPackage("PackageA", "1.0.0", "1.fc40", "x86_64"),
"pkg:rpm/PackageA@1.0.0-1.fc40?arch=x86_64"
),
(
RPMPackage("PackageA", "1.0.0", "1.fc40", "x86_64", epoch=123),
"pkg:rpm/PackageA@1.0.0-1.fc40?arch=x86_64&epoch=123"
),
(
RPMPackage("PackageA", "1.0.0", "1.fc40", "x86_64", vendor="Fedora Project"),
"pkg:rpm/fedora%20project/PackageA@1.0.0-1.fc40?arch=x86_64"
),
(
RPMPackage("PackageA", "1.0.0", "1.el9", "x86_64", vendor="CentOS"),
"pkg:rpm/centos/PackageA@1.0.0-1.el9?arch=x86_64"
),
(
RPMPackage("PackageA", "1.0.0", "1.el9", "x86_64", vendor="Red Hat, Inc."),
"pkg:rpm/red%20hat%2C%20inc./PackageA@1.0.0-1.el9?arch=x86_64"
),
(
RPMPackage("PackageA", "1.0.0", "1.fc40", "x86_64", vendor="Fedora Project", repository_url="https://example.org/repo/"),
"pkg:rpm/fedora%20project/PackageA@1.0.0-1.fc40?arch=x86_64&repository_url=https://example.org/repo/"
),
))
def test_rpmpackage_purl(package, purl):
assert package.purl() == purl

View file

@ -0,0 +1,76 @@
import os
import pytest
import osbuild
from osbuild.util.sbom.spdx import bom_pkgset_to_spdx2_doc, create_spdx2_document, spdx2_checksum_algorithm
from osbuild.util.sbom.spdx2.model import CreatorType, ExternalPackageRefCategory, RelationshipType
testutil_dnf4 = pytest.importorskip("osbuild.testutil.dnf4")
bom_dnf = pytest.importorskip("osbuild.util.bom.dnf")
def test_create_spdx2_document():
doc1 = create_spdx2_document()
assert doc1.creation_info.spdx_version == "SPDX-2.3"
assert doc1.creation_info.spdx_id == "SPDXRef-DOCUMENT"
assert doc1.creation_info.name == f"sbom-by-osbuild-{osbuild.__version__}"
assert doc1.creation_info.data_license == "CC0-1.0"
assert doc1.creation_info.document_namespace.startswith("https://osbuild.org/spdxdocs/sbom-by-osbuild-")
assert len(doc1.creation_info.creators) == 1
assert doc1.creation_info.creators[0].creator_type == CreatorType.TOOL
assert doc1.creation_info.creators[0].name == f"osbuild-{osbuild.__version__}"
assert doc1.creation_info.created
doc2 = create_spdx2_document()
assert doc1.creation_info.document_namespace != doc2.creation_info.document_namespace
assert doc1.creation_info.created != doc2.creation_info.created
doc1_dict = doc1.to_dict()
doc2_dict = doc2.to_dict()
del doc1_dict["creationInfo"]["created"]
del doc2_dict["creationInfo"]["created"]
del doc1_dict["documentNamespace"]
del doc2_dict["documentNamespace"]
assert doc1_dict == doc2_dict
def test_sbom_pkgset_to_spdx2_doc():
dnf_pkgset = testutil_dnf4.depsolve_pkgset([os.path.abspath("./test/data/testrepos/baseos")], ["bash"])
bom_pkgset = bom_dnf.dnf_pkgset_to_sbom_pkgset(dnf_pkgset)
doc = bom_pkgset_to_spdx2_doc(bom_pkgset)
assert len(doc.packages) == len(bom_pkgset)
for spdx_pkg, bom_pkg in zip(doc.packages, bom_pkgset):
assert spdx_pkg.spdx_id == f"SPDXRef-{bom_pkg.uuid()}"
assert spdx_pkg.name == bom_pkg.name
assert spdx_pkg.version == bom_pkg.version
assert not spdx_pkg.files_analyzed
assert spdx_pkg.license_declared == bom_pkg.license_declared
assert spdx_pkg.download_location == bom_pkg.download_url
assert spdx_pkg.homepage == bom_pkg.homepage
assert spdx_pkg.summary == bom_pkg.summary
assert spdx_pkg.description == bom_pkg.description
assert spdx_pkg.source_info == bom_pkg.source_info()
assert spdx_pkg.built_date == bom_pkg.build_date
assert len(spdx_pkg.checksums) == 1
assert spdx_pkg.checksums[0].algorithm == spdx2_checksum_algorithm(list(bom_pkg.checksums.keys())[0])
assert spdx_pkg.checksums[0].value == list(bom_pkg.checksums.values())[0]
assert len(spdx_pkg.external_references) == 1
assert spdx_pkg.external_references[0].category == ExternalPackageRefCategory.PACKAGE_MANAGER
assert spdx_pkg.external_references[0].reference_type == "purl"
assert spdx_pkg.external_references[0].locator == bom_pkg.purl()
assert len([rel for rel in doc.relationships if rel.relationship_type ==
RelationshipType.DESCRIBES]) == len(bom_pkgset)
deps_count = sum(len(bom_pkg.depends_on) for bom_pkg in bom_pkgset)
assert len([rel for rel in doc.relationships if rel.relationship_type ==
RelationshipType.DEPENDS_ON]) == deps_count
optional_deps_count = sum(len(bom_pkg.optional_depends_on) for bom_pkg in bom_pkgset)
assert len([rel for rel in doc.relationships if rel.relationship_type ==
RelationshipType.OPTIONAL_DEPENDENCY_OF]) == optional_deps_count

View file

@ -0,0 +1,469 @@
from datetime import datetime
import pytest
from osbuild.util.sbom.spdx2.model import (
CATEGORY_TO_REPOSITORY_TYPE,
Checksum,
ChecksumAlgorithm,
CreationInfo,
Creator,
CreatorType,
Document,
EntityWithSpdxId,
ExternalPackageRef,
ExternalPackageRefCategory,
NoAssertionValue,
NoneValue,
Package,
Relationship,
RelationshipType,
datetime_to_iso8601,
)
zoneinfo = pytest.importorskip("zoneinfo")
def test_creator_type_str():
assert str(CreatorType.PERSON) == "Person"
assert str(CreatorType.ORGANIZATION) == "Organization"
assert str(CreatorType.TOOL) == "Tool"
@pytest.mark.parametrize("test_object,expected_str", (
(
Creator(CreatorType.TOOL, "Sample-Tool-123"),
"Tool: Sample-Tool-123"
),
(
Creator(CreatorType.ORGANIZATION, "Sample Organization"),
"Organization: Sample Organization"
),
(
Creator(CreatorType.ORGANIZATION, "Sample Organization", "email@example.com"),
"Organization: Sample Organization (email@example.com)"
),
(
Creator(CreatorType.PERSON, "John Foo"),
"Person: John Foo"
),
(
Creator(CreatorType.PERSON, "John Foo", "email@example.com"),
"Person: John Foo (email@example.com)"
)
))
def test_creator_str(test_object, expected_str):
assert str(test_object) == expected_str
@pytest.mark.parametrize("test_spdx_id,error", (
("SPDXRef-DOCUMENT", False),
("SPDXRef-package-1.2.3", False),
("SPDXRef-package-1.2.3-0ec6114d-8d46-4553-a310-4df502c29082", False),
("", True),
("SPDXRef-", True),
("SPDxRef-DOCUMENT", True),
("SPDXRef-createrepo_c-1.2.3-1", True)
))
def test_entity_with_spdx_id(test_spdx_id, error):
if error:
with pytest.raises(ValueError):
_ = EntityWithSpdxId(test_spdx_id)
else:
_ = EntityWithSpdxId(test_spdx_id)
@pytest.mark.parametrize("test_date,expected_str", (
(datetime(2024, 11, 15, 14, 33, tzinfo=zoneinfo.ZoneInfo("UTC")), "2024-11-15T14:33:00Z"),
(datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("UTC")), "2024-11-15T14:33:59Z"),
(datetime(2024, 11, 15, 14, 33, 59, 123456, tzinfo=zoneinfo.ZoneInfo("UTC")), "2024-11-15T14:33:59Z"),
(datetime(2024, 11, 15, 14, 33, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")), "2024-11-15T13:33:00Z"),
(datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")), "2024-11-15T13:33:59Z")
))
def test_datetime_to_iso8601(test_date, expected_str):
assert datetime_to_iso8601(test_date) == expected_str
@pytest.mark.parametrize("test_case", (
{
"instance_args": {
"spdx_version": "SPDX-2.3",
"spdx_id": "SPDXRef-DOCUMENT",
"name": "Sample-Document",
"document_namespace": "https://example.com",
"creators": [Creator(CreatorType.TOOL, "Sample-Tool-123")],
"created": datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")),
"data_license": "Public Domain"
},
"expected": {
"spdxVersion": "SPDX-2.3",
"SPDXID": "SPDXRef-DOCUMENT",
"name": "Sample-Document",
"dataLicense": "Public Domain",
"documentNamespace": "https://example.com",
"creationInfo": {
"created": "2024-11-15T13:33:59Z",
"creators": [
"Tool: Sample-Tool-123"
]
}
},
},
{
"instance_args": {
"spdx_version": "SPDX-2.3",
"spdx_id": "SPDXRef-DOCUMENT",
"name": "Sample-Document",
"document_namespace": "https://example.com",
"creators": [Creator(CreatorType.TOOL, "Sample-Tool-123")],
"created": datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")),
},
"expected": {
"spdxVersion": "SPDX-2.3",
"SPDXID": "SPDXRef-DOCUMENT",
"name": "Sample-Document",
"dataLicense": "CC0-1.0",
"documentNamespace": "https://example.com",
"creationInfo": {
"created": "2024-11-15T13:33:59Z",
"creators": [
"Tool: Sample-Tool-123"
]
}
}
},
{
"instance_args": {
"spdx_version": "SPDX-2.3",
"spdx_id": "DOCUMENT",
"name": "Sample-Document",
"document_namespace": "https://example.com",
"creators": [Creator(CreatorType.TOOL, "Sample-Tool-123")],
"created": datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")),
},
"error": True
},
{
"instance_args": {
"spdx_version": "SPDX-2.3",
"spdx_id": "SPDXRef-YOLO",
"name": "Sample-Document",
"document_namespace": "https://example.com",
"creators": [Creator(CreatorType.TOOL, "Sample-Tool-123")],
"created": datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")),
},
"error": True
}
))
def test_creation_info_to_dict(test_case):
if test_case.get("error", False):
with pytest.raises(ValueError):
CreationInfo(**test_case["instance_args"])
else:
ci = CreationInfo(**test_case["instance_args"])
assert ci.to_dict() == test_case["expected"]
def test_no_assertion_value_str():
assert str(NoAssertionValue()) == "NOASSERTION"
def test_none_value_str():
assert str(NoneValue()) == "NONE"
def test_external_package_ref_category_str():
assert str(ExternalPackageRefCategory.SECURITY) == "SECURITY"
assert str(ExternalPackageRefCategory.PACKAGE_MANAGER) == "PACKAGE-MANAGER"
assert str(ExternalPackageRefCategory.PERSISTENT_ID) == "PERSISTENT-ID"
assert str(ExternalPackageRefCategory.OTHER) == "OTHER"
def test_external_package_ref_cat_type_combinations():
for category, types in CATEGORY_TO_REPOSITORY_TYPE.items():
if category == ExternalPackageRefCategory.OTHER:
_ = ExternalPackageRef(category, "made-up", "https://example.com")
_ = ExternalPackageRef(category, "yolo-type", "https://example.com")
continue
for ref_type in types:
_ = ExternalPackageRef(category, ref_type, "https://example.com")
with pytest.raises(ValueError):
_ = ExternalPackageRef(category, "made-up", "https://example.com")
def test_external_package_ref_to_dict():
ref = ExternalPackageRef(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl", "https://example.com")
assert ref.to_dict() == {
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": "https://example.com"
}
def test_checksum_algorithm_str():
assert str(ChecksumAlgorithm.SHA1) == "SHA1"
assert str(ChecksumAlgorithm.SHA224) == "SHA224"
assert str(ChecksumAlgorithm.SHA256) == "SHA256"
assert str(ChecksumAlgorithm.SHA384) == "SHA384"
assert str(ChecksumAlgorithm.SHA512) == "SHA512"
assert str(ChecksumAlgorithm.SHA3_256) == "SHA3-256"
assert str(ChecksumAlgorithm.SHA3_384) == "SHA3-384"
assert str(ChecksumAlgorithm.SHA3_512) == "SHA3-512"
assert str(ChecksumAlgorithm.BLAKE2b_256) == "BLAKE2b-256"
assert str(ChecksumAlgorithm.BLAKE2b_384) == "BLAKE2b-384"
assert str(ChecksumAlgorithm.BLAKE2b_512) == "BLAKE2b-512"
assert str(ChecksumAlgorithm.BLAKE3) == "BLAKE3"
assert str(ChecksumAlgorithm.MD2) == "MD2"
assert str(ChecksumAlgorithm.MD4) == "MD4"
assert str(ChecksumAlgorithm.MD5) == "MD5"
assert str(ChecksumAlgorithm.MD6) == "MD6"
assert str(ChecksumAlgorithm.ADLER32) == "ADLER32"
def test_checksum_to_dict():
assert Checksum(ChecksumAlgorithm.SHA1, "123456").to_dict() == {
"algorithm": "SHA1",
"checksumValue": "123456"
}
@pytest.mark.parametrize("test_case", (
{
"instance_args": {
"spdx_id": "SPDXRef-package-1.2.3",
"name": "package",
"download_location": "https://example.org/package-1.2.3.rpm"
},
"expected": {
"SPDXID": "SPDXRef-package-1.2.3",
"name": "package",
"downloadLocation": "https://example.org/package-1.2.3.rpm"
}
},
{
"instance_args": {
"spdx_id": "SPDXRef-package-1.2.3",
"name": "package",
"download_location": NoAssertionValue(),
"files_analyzed": True
},
"expected": {
"SPDXID": "SPDXRef-package-1.2.3",
"name": "package",
"downloadLocation": "NOASSERTION",
"filesAnalyzed": True
}
},
{
"instance_args": {
"spdx_id": "SPDXRef-package-1.2.3",
"name": "package",
"download_location": NoneValue(),
"files_analyzed": False,
"checksums": [
Checksum(ChecksumAlgorithm.SHA256, "123456")
],
"version": "1.2.3",
"homepage": "https://example.org/package",
"source_info": "https://example.org/package-1.2.3.src.rpm",
"license_declared": "MIT",
"summary": "A sample package",
"description": "A sample package description",
"external_references": [
ExternalPackageRef(
ExternalPackageRefCategory.PACKAGE_MANAGER,
"purl",
"pkg:rpm:/example/package@1.2.3-1?arch=x86_64"
)
],
"built_date": datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague"))
},
"expected": {
"SPDXID": "SPDXRef-package-1.2.3",
"name": "package",
"downloadLocation": "NONE",
"filesAnalyzed": False,
"checksums": [
{
"algorithm": "SHA256",
"checksumValue": "123456"
}
],
"versionInfo": "1.2.3",
"homepage": "https://example.org/package",
"sourceInfo": "https://example.org/package-1.2.3.src.rpm",
"licenseDeclared": "MIT",
"summary": "A sample package",
"description": "A sample package description",
"externalRefs": [
{
"referenceCategory": "PACKAGE-MANAGER",
"referenceType": "purl",
"referenceLocator": "pkg:rpm:/example/package@1.2.3-1?arch=x86_64"
}
],
"builtDate": "2024-11-15T13:33:59Z"
}
}
))
def test_package_to_dict(test_case):
p = Package(**test_case["instance_args"])
assert p.to_dict() == test_case["expected"]
def test_relationship_type_str():
assert str(RelationshipType.DESCRIBES) == "DESCRIBES"
assert str(RelationshipType.DEPENDS_ON) == "DEPENDS_ON"
assert str(RelationshipType.OPTIONAL_DEPENDENCY_OF) == "OPTIONAL_DEPENDENCY_OF"
@pytest.mark.parametrize("test_case", (
{
"instance_args": {
"spdx_element_id": "SPDXRef-packageA-1.2.3",
"relationship_type": RelationshipType.DEPENDS_ON,
"related_spdx_element_id": "SPDXRef-packageB-3.2.1"
},
"expected": {
"spdxElementId": "SPDXRef-packageA-1.2.3",
"relationshipType": "DEPENDS_ON",
"relatedSpdxElement": "SPDXRef-packageB-3.2.1"
}
},
{
"instance_args": {
"spdx_element_id": "SPDXRef-DOCUMENT",
"relationship_type": RelationshipType.DESCRIBES,
"related_spdx_element_id": "SPDXRef-packageB-3.2.1",
"comment": "This document describes package B"
},
"expected": {
"spdxElementId": "SPDXRef-DOCUMENT",
"relationshipType": "DESCRIBES",
"relatedSpdxElement": "SPDXRef-packageB-3.2.1",
"comment": "This document describes package B"
}
},
))
def test_relationship_to_dict(test_case):
r = Relationship(**test_case["instance_args"])
assert r.to_dict() == test_case["expected"]
@pytest.mark.parametrize("test_case", (
{
"instance_args": {
"creation_info": CreationInfo(
"SPDX-2.3",
"SPDXRef-DOCUMENT",
"Sample-Document",
"https://example.com",
[Creator(CreatorType.TOOL, "Sample-Tool-123")],
datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")),
"Public Domain"
)
},
"expected": {
"spdxVersion": "SPDX-2.3",
"SPDXID": "SPDXRef-DOCUMENT",
"name": "Sample-Document",
"dataLicense": "Public Domain",
"documentNamespace": "https://example.com",
"creationInfo": {
"created": "2024-11-15T13:33:59Z",
"creators": [
"Tool: Sample-Tool-123"
]
}
}
},
{
"instance_args": {
"creation_info": CreationInfo(
"SPDX-2.3",
"SPDXRef-DOCUMENT",
"Sample-Document",
"https://example.com",
[Creator(CreatorType.TOOL, "Sample-Tool-123")],
datetime(2024, 11, 15, 14, 33, 59, tzinfo=zoneinfo.ZoneInfo("Europe/Prague")),
"Public Domain"
),
"packages": [
Package(
"SPDXRef-packageA-1.2.3",
"package",
"https://example.org/packageA-1.2.3.rpm"
),
Package(
"SPDXRef-packageB-3.2.1",
"package",
"https://example.org/packageB-3.2.1.rpm"
),
],
"relationships": [
Relationship(
"SPDXRef-DOCUMENT",
RelationshipType.DESCRIBES,
"SPDXRef-packageA-1.2.3"
),
Relationship(
"SPDXRef-DOCUMENT",
RelationshipType.DESCRIBES,
"SPDXRef-packageB-3.2.1"
),
Relationship(
"SPDXRef-packageA-1.2.3",
RelationshipType.DEPENDS_ON,
"SPDXRef-packageB-3.2.1"
)
]
},
"expected": {
"spdxVersion": "SPDX-2.3",
"SPDXID": "SPDXRef-DOCUMENT",
"name": "Sample-Document",
"dataLicense": "Public Domain",
"documentNamespace": "https://example.com",
"creationInfo": {
"created": "2024-11-15T13:33:59Z",
"creators": [
"Tool: Sample-Tool-123"
]
},
"packages": [
{
"SPDXID": "SPDXRef-packageA-1.2.3",
"name": "package",
"downloadLocation": "https://example.org/packageA-1.2.3.rpm"
},
{
"SPDXID": "SPDXRef-packageB-3.2.1",
"name": "package",
"downloadLocation": "https://example.org/packageB-3.2.1.rpm"
}
],
"relationships": [
{
"spdxElementId": "SPDXRef-DOCUMENT",
"relationshipType": "DESCRIBES",
"relatedSpdxElement": "SPDXRef-packageA-1.2.3"
},
{
"spdxElementId": "SPDXRef-DOCUMENT",
"relationshipType": "DESCRIBES",
"relatedSpdxElement": "SPDXRef-packageB-3.2.1"
},
{
"spdxElementId": "SPDXRef-packageA-1.2.3",
"relationshipType": "DEPENDS_ON",
"relatedSpdxElement": "SPDXRef-packageB-3.2.1"
}
]
}
}
))
def test_document_to_dict(test_case):
d = Document(**test_case["instance_args"])
assert d.to_dict() == test_case["expected"]