debian-forge/artifact_manager.py
robojerk 48c31fa24f
Some checks are pending
Checks / Spelling (push) Waiting to run
Checks / Python Linters (push) Waiting to run
Checks / Shell Linters (push) Waiting to run
Checks / 📦 Packit config lint (push) Waiting to run
Checks / 🔍 Check for valid snapshot urls (push) Waiting to run
Checks / 🔍 Check JSON files for formatting consistency (push) Waiting to run
Generate / Documentation (push) Waiting to run
Generate / Test Data (push) Waiting to run
Tests / Unittest (push) Waiting to run
Tests / Assembler test (legacy) (push) Waiting to run
Tests / Smoke run: unittest as normal user on default runner (push) Waiting to run
Implement enhanced build orchestration and artifact management
- Add build status tracking with state machine
- Implement build logging and monitoring system
- Add build progress tracking and cancellation support
- Create artifact management system with SQLite database
- Fix stage file extensions for proper Python imports
- Enhance resource allocation with actual resource tracking
- Add comprehensive testing for all components
2025-08-22 18:45:17 -07:00

395 lines
14 KiB
Python

#!/usr/bin/python3
"""
Debian Forge Artifact Manager
Manages build artifacts, storage, and provides artifact discovery for Debian atomic builds.
"""
import os
import json
import shutil
import hashlib
import sqlite3
from datetime import datetime
from typing import Dict, List, Optional, Any, Tuple
from pathlib import Path
from dataclasses import dataclass, asdict
@dataclass
class Artifact:
"""Represents a build artifact"""
id: str
build_id: str
name: str
path: str
size: int
checksum: str
artifact_type: str
created_at: datetime
metadata: Optional[Dict[str, Any]] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization"""
data = asdict(self)
data['created_at'] = self.created_at.isoformat()
return data
class ArtifactStorage:
"""Manages artifact storage and organization"""
def __init__(self, base_dir: str = "artifacts"):
self.base_dir = Path(base_dir)
self.base_dir.mkdir(exist_ok=True)
# Create subdirectories
(self.base_dir / "debian-packages").mkdir(exist_ok=True)
(self.base_dir / "ostree-commits").mkdir(exist_ok=True)
(self.base_dir / "images").mkdir(exist_ok=True)
(self.base_dir / "logs").mkdir(exist_ok=True)
(self.base_dir / "metadata").mkdir(exist_ok=True)
def get_artifact_path(self, artifact_type: str, filename: str) -> Path:
"""Get the full path for an artifact"""
return self.base_dir / artifact_type / filename
def store_artifact(self, source_path: str, artifact_type: str, filename: str) -> str:
"""Store an artifact and return the full path"""
dest_path = self.get_artifact_path(artifact_type, filename)
# Copy the artifact
shutil.copy2(source_path, dest_path)
return str(dest_path)
def remove_artifact(self, artifact_type: str, filename: str) -> bool:
"""Remove an artifact"""
artifact_path = self.get_artifact_path(artifact_type, filename)
if artifact_path.exists():
artifact_path.unlink()
return True
return False
def get_artifact_info(self, artifact_path: str) -> Tuple[int, str]:
"""Get artifact size and checksum"""
path = Path(artifact_path)
if not path.exists():
raise FileNotFoundError(f"Artifact not found: {artifact_path}")
# Get file size
size = path.stat().st_size
# Calculate SHA256 checksum
sha256_hash = hashlib.sha256()
with open(artifact_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
sha256_hash.update(chunk)
checksum = sha256_hash.hexdigest()
return size, checksum
class ArtifactDatabase:
"""SQLite database for artifact metadata"""
def __init__(self, db_path: str = "artifacts.db"):
self.db_path = db_path
self.init_database()
def init_database(self):
"""Initialize the database schema"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
# Create artifacts table
cursor.execute("""
CREATE TABLE IF NOT EXISTS artifacts (
id TEXT PRIMARY KEY,
build_id TEXT NOT NULL,
name TEXT NOT NULL,
path TEXT NOT NULL,
size INTEGER NOT NULL,
checksum TEXT NOT NULL,
artifact_type TEXT NOT NULL,
created_at TEXT NOT NULL,
metadata TEXT
)
""")
# Create builds table for reference
cursor.execute("""
CREATE TABLE IF NOT EXISTS builds (
build_id TEXT PRIMARY KEY,
manifest_path TEXT NOT NULL,
status TEXT NOT NULL,
created_at TEXT NOT NULL,
completed_at TEXT
)
""")
# Create indexes
cursor.execute("CREATE INDEX IF NOT EXISTS idx_artifacts_build_id ON artifacts(build_id)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_artifacts_type ON artifacts(artifact_type)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_artifacts_created ON artifacts(created_at)")
conn.commit()
def add_artifact(self, artifact: Artifact):
"""Add an artifact to the database"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
INSERT OR REPLACE INTO artifacts
(id, build_id, name, path, size, checksum, artifact_type, created_at, metadata)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
artifact.id,
artifact.build_id,
artifact.name,
artifact.path,
artifact.size,
artifact.checksum,
artifact.artifact_type,
artifact.created_at.isoformat(),
json.dumps(artifact.metadata) if artifact.metadata else None
))
conn.commit()
def get_artifact(self, artifact_id: str) -> Optional[Artifact]:
"""Get an artifact by ID"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT id, build_id, name, path, size, checksum, artifact_type, created_at, metadata
FROM artifacts WHERE id = ?
""", (artifact_id,))
row = cursor.fetchone()
if row:
return self._row_to_artifact(row)
return None
def get_artifacts_by_build(self, build_id: str) -> List[Artifact]:
"""Get all artifacts for a specific build"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT id, build_id, name, path, size, checksum, artifact_type, created_at, metadata
FROM artifacts WHERE build_id = ? ORDER BY created_at DESC
""", (build_id,))
return [self._row_to_artifact(row) for row in cursor.fetchall()]
def get_artifacts_by_type(self, artifact_type: str) -> List[Artifact]:
"""Get all artifacts of a specific type"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT id, build_id, name, path, size, checksum, artifact_type, created_at, metadata
FROM artifacts WHERE artifact_type = ? ORDER BY created_at DESC
""", (artifact_type,))
return [self._row_to_artifact(row) for row in cursor.fetchall()]
def search_artifacts(self, query: str) -> List[Artifact]:
"""Search artifacts by name or metadata"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
SELECT id, build_id, name, path, size, checksum, artifact_type, created_at, metadata
FROM artifacts
WHERE name LIKE ? OR metadata LIKE ?
ORDER BY created_at DESC
""", (f"%{query}%", f"%{query}%"))
return [self._row_to_artifact(row) for row in cursor.fetchall()]
def remove_artifact(self, artifact_id: str) -> bool:
"""Remove an artifact from the database"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("DELETE FROM artifacts WHERE id = ?", (artifact_id,))
conn.commit()
return cursor.rowcount > 0
def _row_to_artifact(self, row: Tuple) -> Artifact:
"""Convert database row to Artifact object"""
metadata = json.loads(row[8]) if row[8] else None
return Artifact(
id=row[0],
build_id=row[1],
name=row[2],
path=row[3],
size=row[4],
checksum=row[5],
artifact_type=row[6],
created_at=datetime.fromisoformat(row[7]),
metadata=metadata
)
class ArtifactManager:
"""Main artifact management system"""
def __init__(self, base_dir: str = "artifacts"):
self.storage = ArtifactStorage(base_dir)
self.database = ArtifactDatabase()
def register_artifact(self, build_id: str, source_path: str, artifact_type: str,
name: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None) -> str:
"""Register and store an artifact"""
# Generate artifact ID
artifact_id = f"{artifact_type}-{build_id}-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
# Use provided name or generate from source path
if name is None:
name = os.path.basename(source_path)
# Store the artifact
artifact_path = self.storage.store_artifact(source_path, artifact_type, name)
# Get artifact info
size, checksum = self.storage.get_artifact_info(artifact_path)
# Create artifact record
artifact = Artifact(
id=artifact_id,
build_id=build_id,
name=name,
path=artifact_path,
size=size,
checksum=checksum,
artifact_type=artifact_type,
created_at=datetime.now(),
metadata=metadata
)
# Store in database
self.database.add_artifact(artifact)
return artifact_id
def get_artifact(self, artifact_id: str) -> Optional[Artifact]:
"""Get an artifact by ID"""
return self.database.get_artifact(artifact_id)
def get_build_artifacts(self, build_id: str) -> List[Artifact]:
"""Get all artifacts for a build"""
return self.database.get_artifacts_by_build(build_id)
def get_artifacts_by_type(self, artifact_type: str) -> List[Artifact]:
"""Get all artifacts of a specific type"""
return self.database.get_artifacts_by_type(artifact_type)
def search_artifacts(self, query: str) -> List[Artifact]:
"""Search artifacts"""
return self.database.search_artifacts(query)
def remove_artifact(self, artifact_id: str) -> bool:
"""Remove an artifact"""
artifact = self.database.get_artifact(artifact_id)
if artifact:
# Remove from storage
self.storage.remove_artifact(artifact.artifact_type, artifact.name)
# Remove from database
return self.database.remove_artifact(artifact_id)
return False
def get_storage_stats(self) -> Dict[str, Any]:
"""Get storage statistics"""
stats = {
"total_artifacts": 0,
"total_size": 0,
"by_type": {},
"storage_path": str(self.storage.base_dir)
}
# Count artifacts by type
for artifact_type in ["debian-packages", "ostree-commits", "images", "logs", "metadata"]:
artifacts = self.database.get_artifacts_by_type(artifact_type)
stats["by_type"][artifact_type] = {
"count": len(artifacts),
"size": sum(a.size for a in artifacts)
}
stats["total_artifacts"] += len(artifacts)
stats["total_size"] += sum(a.size for a in artifacts)
return stats
def cleanup_old_artifacts(self, days_old: int = 30) -> int:
"""Clean up artifacts older than specified days"""
cutoff_date = datetime.now().timestamp() - (days_old * 24 * 60 * 60)
removed_count = 0
# Get all artifacts
all_artifacts = self.database.search_artifacts("")
for artifact in all_artifacts:
if artifact.created_at.timestamp() < cutoff_date:
if self.remove_artifact(artifact.id):
removed_count += 1
return removed_count
def main():
"""Example usage of the artifact manager"""
print("Debian Forge Artifact Manager")
print("=" * 40)
# Create artifact manager
manager = ArtifactManager()
# Example: Register a build artifact
build_id = "build-000001"
test_file = "test-debian-manifest.json"
if os.path.exists(test_file):
print(f"Registering artifact from {test_file}")
artifact_id = manager.register_artifact(
build_id=build_id,
source_path=test_file,
artifact_type="metadata",
name="debian-manifest.json",
metadata={"description": "Debian atomic manifest", "version": "1.0"}
)
print(f"Registered artifact: {artifact_id}")
# Get artifact info
artifact = manager.get_artifact(artifact_id)
if artifact:
print(f"Artifact: {artifact.name}")
print(f"Size: {artifact.size} bytes")
print(f"Checksum: {artifact.checksum}")
print(f"Type: {artifact.artifact_type}")
# Get build artifacts
build_artifacts = manager.get_build_artifacts(build_id)
print(f"Build {build_id} has {len(build_artifacts)} artifacts")
# Get storage stats
stats = manager.get_storage_stats()
print(f"Storage stats: {stats['total_artifacts']} artifacts, {stats['total_size']} bytes")
else:
print(f"Test file {test_file} not found")
if __name__ == "__main__":
main()