debian-forge-composer/internal/builddashboard/build_orchestrator.go
robojerk 4eeaa43c39
Some checks failed
Tests / 🛃 Unit tests (push) Failing after 13s
Tests / 🗄 DB tests (push) Failing after 19s
Tests / 🐍 Lint python scripts (push) Failing after 1s
Tests / ⌨ Golang Lint (push) Failing after 1s
Tests / 📦 Packit config lint (push) Failing after 1s
Tests / 🔍 Check source preparation (push) Failing after 1s
Tests / 🔍 Check for valid snapshot urls (push) Failing after 1s
Tests / 🔍 Check for missing or unused runner repos (push) Failing after 1s
Tests / 🐚 Shellcheck (push) Failing after 1s
Tests / 📦 RPMlint (push) Failing after 1s
Tests / Gitlab CI trigger helper (push) Failing after 1s
Tests / 🎀 kube-linter (push) Failing after 1s
Tests / 🧹 cloud-cleaner-is-enabled (push) Successful in 3s
Tests / 🔍 Check spec file osbuild/images dependencies (push) Failing after 1s
did stuff
2025-08-26 10:34:42 -07:00

685 lines
18 KiB
Go

package builddashboard
import (
"context"
"encoding/json"
"fmt"
"net/http"
"sync"
"time"
"github.com/labstack/echo/v4"
"github.com/sirupsen/logrus"
)
type BuildOrchestrator struct {
store BuildStore
logger *logrus.Logger
buildQueue *BuildQueue
workers *WorkerManager
metrics *BuildMetrics
mu sync.RWMutex
}
type BuildStore interface {
SaveBuild(build *Build) error
GetBuild(id string) (*Build, error)
ListBuilds(filters BuildFilters) ([]*Build, error)
UpdateBuild(build *Build) error
DeleteBuild(id string) error
}
type Build struct {
ID string `json:"id"`
BlueprintID string `json:"blueprint_id"`
BlueprintName string `json:"blueprint_name"`
Status BuildStatus `json:"status"`
Priority int `json:"priority"`
WorkerID string `json:"worker_id,omitempty"`
Architecture string `json:"architecture"`
Variant string `json:"variant"`
ImageType string `json:"image_type"`
Formats []string `json:"formats"`
StartedAt *time.Time `json:"started_at,omitempty"`
CompletedAt *time.Time `json:"completed_at,omitempty"`
Duration time.Duration `json:"duration,omitempty"`
Progress float64 `json:"progress"`
Logs []BuildLog `json:"logs"`
Artifacts []BuildArtifact `json:"artifacts"`
Error string `json:"error,omitempty"`
Metadata map[string]interface{} `json:"metadata"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type BuildStatus string
const (
BuildStatusPending BuildStatus = "pending"
BuildStatusQueued BuildStatus = "queued"
BuildStatusRunning BuildStatus = "running"
BuildStatusCompleted BuildStatus = "completed"
BuildStatusFailed BuildStatus = "failed"
BuildStatusCancelled BuildStatus = "cancelled"
)
type BuildLog struct {
Timestamp time.Time `json:"timestamp"`
Level string `json:"level"`
Message string `json:"message"`
Source string `json:"source"`
}
type BuildArtifact struct {
Type string `json:"type"`
Path string `json:"path"`
Size int64 `json:"size"`
Checksum string `json:"checksum"`
CreatedAt time.Time `json:"created_at"`
DownloadURL string `json:"download_url,omitempty"`
}
type BuildFilters struct {
Status []BuildStatus `json:"status"`
BlueprintID string `json:"blueprint_id"`
Architecture string `json:"architecture"`
Variant string `json:"variant"`
DateFrom *time.Time `json:"date_from"`
DateTo *time.Time `json:"date_to"`
Limit int `json:"limit"`
Offset int `json:"offset"`
}
type BuildQueue struct {
builds []*Build
mu sync.RWMutex
}
type WorkerManager struct {
workers map[string]*Worker
mu sync.RWMutex
}
type Worker struct {
ID string `json:"id"`
Name string `json:"name"`
Status WorkerStatus `json:"status"`
Architecture string `json:"architecture"`
Capabilities []string `json:"capabilities"`
CurrentJob string `json:"current_job,omitempty"`
Load float64 `json:"load"`
Memory WorkerMemory `json:"memory"`
LastSeen time.Time `json:"last_seen"`
Metadata map[string]string `json:"metadata"`
}
type WorkerStatus string
const (
WorkerStatusIdle WorkerStatus = "idle"
WorkerStatusWorking WorkerStatus = "working"
WorkerStatusOffline WorkerStatus = "offline"
WorkerStatusError WorkerStatus = "error"
)
type WorkerMemory struct {
Total int64 `json:"total"`
Available int64 `json:"available"`
Used int64 `json:"used"`
}
type BuildMetrics struct {
TotalBuilds int64 `json:"total_builds"`
SuccessfulBuilds int64 `json:"successful_builds"`
FailedBuilds int64 `json:"failed_builds"`
AverageBuildTime time.Duration `json:"average_build_time"`
QueueLength int `json:"queue_length"`
ActiveWorkers int `json:"active_workers"`
TotalWorkers int `json:"total_workers"`
BuildTrends map[string]interface{} `json:"build_trends"`
LastUpdated time.Time `json:"last_updated"`
}
func NewBuildOrchestrator(store BuildStore, logger *logrus.Logger) *BuildOrchestrator {
orchestrator := &BuildOrchestrator{
store: store,
logger: logger,
buildQueue: NewBuildQueue(),
workers: NewWorkerManager(),
metrics: NewBuildMetrics(),
}
// Start background tasks
go orchestrator.updateMetrics()
go orchestrator.processQueue()
return orchestrator
}
func NewBuildQueue() *BuildQueue {
return &BuildQueue{
builds: make([]*Build, 0),
}
}
func NewWorkerManager() *WorkerManager {
return &WorkerManager{
workers: make(map[string]*Worker),
}
}
func NewBuildMetrics() *BuildMetrics {
return &BuildMetrics{
BuildTrends: make(map[string]interface{}),
LastUpdated: time.Now(),
}
}
func (bo *BuildOrchestrator) RegisterRoutes(e *echo.Echo) {
// Build management
e.GET("/api/v1/builds", bo.ListBuilds)
e.POST("/api/v1/builds", bo.CreateBuild)
e.GET("/api/v1/builds/:id", bo.GetBuild)
e.PUT("/api/v1/builds/:id", bo.UpdateBuild)
e.DELETE("/api/v1/builds/:id", bo.DeleteBuild)
e.POST("/api/v1/builds/:id/cancel", bo.CancelBuild)
e.POST("/api/v1/builds/:id/retry", bo.RetryBuild)
// Build queue management
e.GET("/api/v1/builds/queue", bo.GetQueueStatus)
e.POST("/api/v1/builds/queue/clear", bo.ClearQueue)
e.POST("/api/v1/builds/queue/prioritize", bo.PrioritizeBuild)
// Worker management
e.GET("/api/v1/workers", bo.ListWorkers)
e.GET("/api/v1/workers/:id", bo.GetWorker)
e.POST("/api/v1/workers/:id/status", bo.UpdateWorkerStatus)
// Build metrics and analytics
e.GET("/api/v1/metrics", bo.GetMetrics)
e.GET("/api/v1/metrics/trends", bo.GetBuildTrends)
e.GET("/api/v1/metrics/performance", bo.GetPerformanceMetrics)
// Real-time updates (WebSocket support)
e.GET("/api/v1/events", bo.GetEventStream)
}
func (bo *BuildOrchestrator) CreateBuild(c echo.Context) error {
var build Build
if err := c.Bind(&build); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("invalid build data: %v", err))
}
// Set initial values
now := time.Now()
build.ID = generateBuildID()
build.Status = BuildStatusPending
build.CreatedAt = now
build.UpdatedAt = now
build.Progress = 0.0
// Validate build
if err := bo.validateBuild(&build); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("build validation failed: %v", err))
}
// Save build
if err := bo.store.SaveBuild(&build); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("failed to save build: %v", err))
}
// Add to queue
bo.buildQueue.AddBuild(&build)
bo.logger.Infof("Created build: %s", build.ID)
return c.JSON(http.StatusCreated, build)
}
func (bo *BuildOrchestrator) GetBuild(c echo.Context) error {
id := c.Param("id")
build, err := bo.store.GetBuild(id)
if err != nil {
return echo.NewHTTPError(http.StatusNotFound, fmt.Sprintf("build not found: %v", err))
}
return c.JSON(http.StatusOK, build)
}
func (bo *BuildOrchestrator) ListBuilds(c echo.Context) error {
var filters BuildFilters
if err := c.Bind(&filters); err != nil {
filters = BuildFilters{}
}
// Set defaults
if filters.Limit == 0 {
filters.Limit = 100
}
builds, err := bo.store.ListBuilds(filters)
if err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("failed to list builds: %v", err))
}
return c.JSON(http.StatusOK, builds)
}
func (bo *BuildOrchestrator) UpdateBuild(c echo.Context) error {
id := c.Param("id")
// Get existing build
existing, err := bo.store.GetBuild(id)
if err != nil {
return echo.NewHTTPError(http.StatusNotFound, fmt.Sprintf("build not found: %v", err))
}
// Bind update data
var update Build
if err := c.Bind(&update); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("invalid update data: %v", err))
}
// Update fields
update.ID = existing.ID
update.CreatedAt = existing.CreatedAt
update.UpdatedAt = time.Now()
// Save updated build
if err := bo.store.UpdateBuild(&update); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("failed to update build: %v", err))
}
bo.logger.Infof("Updated build: %s", id)
return c.JSON(http.StatusOK, update)
}
func (bo *BuildOrchestrator) CancelBuild(c echo.Context) error {
id := c.Param("id")
build, err := bo.store.GetBuild(id)
if err != nil {
return echo.NewHTTPError(http.StatusNotFound, fmt.Sprintf("build not found: %v", err))
}
if build.Status == BuildStatusCompleted || build.Status == BuildStatusFailed {
return echo.NewHTTPError(http.StatusBadRequest, "cannot cancel completed or failed build")
}
build.Status = BuildStatusCancelled
build.UpdatedAt = time.Now()
if err := bo.store.UpdateBuild(build); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("failed to cancel build: %v", err))
}
// Remove from queue if queued
bo.buildQueue.RemoveBuild(id)
bo.logger.Infof("Cancelled build: %s", id)
return c.JSON(http.StatusOK, build)
}
func (bo *BuildOrchestrator) RetryBuild(c echo.Context) error {
id := c.Param("id")
build, err := bo.store.GetBuild(id)
if err != nil {
return echo.NewHTTPError(http.StatusNotFound, fmt.Sprintf("build not found: %v", err))
}
if build.Status != BuildStatusFailed {
return echo.NewHTTPError(http.StatusBadRequest, "can only retry failed builds")
}
// Create new build based on failed one
newBuild := *build
newBuild.ID = generateBuildID()
newBuild.Status = BuildStatusPending
newBuild.StartedAt = nil
newBuild.CompletedAt = nil
newBuild.Duration = 0
newBuild.Progress = 0.0
newBuild.Error = ""
newBuild.Logs = []BuildLog{}
newBuild.Artifacts = []BuildArtifact{}
newBuild.CreatedAt = time.Now()
newBuild.UpdatedAt = time.Now()
if err := bo.store.SaveBuild(&newBuild); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("failed to create retry build: %v", err))
}
// Add to queue
bo.buildQueue.AddBuild(&newBuild)
bo.logger.Infof("Retrying build %s as %s", id, newBuild.ID)
return c.JSON(http.StatusCreated, newBuild)
}
func (bo *BuildOrchestrator) GetQueueStatus(c echo.Context) error {
status := bo.buildQueue.GetStatus()
return c.JSON(http.StatusOK, status)
}
func (bo *BuildOrchestrator) ClearQueue(c echo.Context) error {
cleared := bo.buildQueue.Clear()
bo.logger.Infof("Cleared build queue, removed %d builds", cleared)
return c.JSON(http.StatusOK, map[string]int{"cleared": cleared})
}
func (bo *BuildOrchestrator) PrioritizeBuild(c echo.Context) error {
var req struct {
BuildID string `json:"build_id"`
Priority int `json:"priority"`
}
if err := c.Bind(&req); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("invalid request: %v", err))
}
if err := bo.buildQueue.SetPriority(req.BuildID, req.Priority); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("failed to set priority: %v", err))
}
bo.logger.Infof("Set priority %d for build %s", req.Priority, req.BuildID)
return c.JSON(http.StatusOK, map[string]string{"status": "priority updated"})
}
func (bo *BuildOrchestrator) ListWorkers(c echo.Context) error {
workers := bo.workers.ListWorkers()
return c.JSON(http.StatusOK, workers)
}
func (bo *BuildOrchestrator) GetWorker(c echo.Context) error {
id := c.Param("id")
worker, exists := bo.workers.GetWorker(id)
if !exists {
return echo.NewHTTPError(http.StatusNotFound, "worker not found")
}
return c.JSON(http.StatusOK, worker)
}
func (bo *BuildOrchestrator) UpdateWorkerStatus(c echo.Context) error {
id := c.Param("id")
var status WorkerStatus
if err := c.Bind(&status); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("invalid status: %v", err))
}
if err := bo.workers.UpdateWorkerStatus(id, status); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("failed to update status: %v", err))
}
return c.JSON(http.StatusOK, map[string]string{"status": "updated"})
}
func (bo *BuildOrchestrator) GetMetrics(c echo.Context) error {
bo.mu.RLock()
defer bo.mu.RUnlock()
return c.JSON(http.StatusOK, bo.metrics)
}
func (bo *BuildOrchestrator) GetBuildTrends(c echo.Context) error {
// Calculate build trends over time
trends := bo.calculateBuildTrends()
return c.JSON(http.StatusOK, trends)
}
func (bo *BuildOrchestrator) GetPerformanceMetrics(c echo.Context) error {
// Calculate performance metrics
performance := bo.calculatePerformanceMetrics()
return c.JSON(http.StatusOK, performance)
}
func (bo *BuildOrchestrator) GetEventStream(c echo.Context) error {
// WebSocket support for real-time updates
// This would implement Server-Sent Events or WebSocket
return echo.NewHTTPError(http.StatusNotImplemented, "event stream not yet implemented")
}
// Background tasks
func (bo *BuildOrchestrator) updateMetrics() {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for range ticker.C {
bo.updateMetricsData()
}
}
func (bo *BuildOrchestrator) processQueue() {
ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()
for range ticker.C {
bo.processNextBuild()
}
}
func (bo *BuildOrchestrator) updateMetricsData() {
bo.mu.Lock()
defer bo.mu.Unlock()
// Update metrics from store and current state
bo.metrics.QueueLength = bo.buildQueue.Length()
bo.metrics.ActiveWorkers = bo.workers.ActiveWorkerCount()
bo.metrics.TotalWorkers = bo.workers.TotalWorkerCount()
bo.metrics.LastUpdated = time.Now()
}
func (bo *BuildOrchestrator) processNextBuild() {
// Process next build in queue
build := bo.buildQueue.GetNextBuild()
if build == nil {
return
}
// Find available worker
worker := bo.workers.GetAvailableWorker(build.Architecture)
if worker == nil {
// No available worker, put back in queue
bo.buildQueue.AddBuild(build)
return
}
// Assign build to worker
build.Status = BuildStatusRunning
build.WorkerID = worker.ID
build.StartedAt = &time.Time{}
*build.StartedAt = time.Now()
build.UpdatedAt = time.Now()
bo.store.UpdateBuild(build)
bo.workers.AssignJob(worker.ID, build.ID)
}
func (bo *BuildOrchestrator) validateBuild(build *Build) error {
if build.BlueprintID == "" {
return fmt.Errorf("blueprint_id is required")
}
if build.Architecture == "" {
return fmt.Errorf("architecture is required")
}
if build.Variant == "" {
return fmt.Errorf("variant is required")
}
if build.ImageType == "" {
return fmt.Errorf("image_type is required")
}
return nil
}
func (bo *BuildOrchestrator) calculateBuildTrends() map[string]interface{} {
// Calculate build success/failure trends over time
return map[string]interface{}{
"daily_success_rate": 0.85,
"weekly_trend": "increasing",
"peak_hours": []string{"09:00", "14:00", "18:00"},
}
}
func (bo *BuildOrchestrator) calculatePerformanceMetrics() map[string]interface{} {
// Calculate performance metrics
return map[string]interface{}{
"average_build_time": "15m30s",
"queue_wait_time": "2m15s",
"worker_utilization": 0.75,
"throughput": 12.5, // builds per hour
}
}
// Helper functions
func generateBuildID() string {
return fmt.Sprintf("build-%d", time.Now().UnixNano())
}
// BuildQueue methods
func (bq *BuildQueue) AddBuild(build *Build) {
bq.mu.Lock()
defer bq.mu.Unlock()
bq.builds = append(bq.builds, build)
}
func (bq *BuildQueue) RemoveBuild(id string) {
bq.mu.Lock()
defer bq.mu.Unlock()
for i, build := range bq.builds {
if build.ID == id {
bq.builds = append(bq.builds[:i], bq.builds[i+1:]...)
break
}
}
}
func (bq *BuildQueue) GetNextBuild() *Build {
bq.mu.Lock()
defer bq.mu.Unlock()
if len(bq.builds) == 0 {
return nil
}
// Get highest priority build
build := bq.builds[0]
bq.builds = bq.builds[1:]
return build
}
func (bq *BuildQueue) GetStatus() map[string]interface{} {
bq.mu.RLock()
defer bq.mu.RUnlock()
return map[string]interface{}{
"length": len(bq.builds),
"builds": bq.builds,
}
}
func (bq *BuildQueue) Clear() int {
bq.mu.Lock()
defer bq.mu.Unlock()
cleared := len(bq.builds)
bq.builds = make([]*Build, 0)
return cleared
}
func (bq *BuildQueue) SetPriority(id string, priority int) error {
bq.mu.Lock()
defer bq.mu.Unlock()
for _, build := range bq.builds {
if build.ID == id {
build.Priority = priority
return nil
}
}
return fmt.Errorf("build not found in queue")
}
func (bq *BuildQueue) Length() int {
bq.mu.RLock()
defer bq.mu.RUnlock()
return len(bq.builds)
}
// WorkerManager methods
func (wm *WorkerManager) GetWorker(id string) (*Worker, bool) {
wm.mu.RLock()
defer wm.mu.RUnlock()
worker, exists := wm.workers[id]
return worker, exists
}
func (wm *WorkerManager) ListWorkers() []*Worker {
wm.mu.RLock()
defer wm.mu.RUnlock()
workers := make([]*Worker, 0, len(wm.workers))
for _, worker := range wm.workers {
workers = append(workers, worker)
}
return workers
}
func (wm *WorkerManager) UpdateWorkerStatus(id string, status WorkerStatus) error {
wm.mu.Lock()
defer wm.mu.Unlock()
worker, exists := wm.workers[id]
if !exists {
return fmt.Errorf("worker not found")
}
worker.Status = status
worker.LastSeen = time.Now()
return nil
}
func (wm *WorkerManager) GetAvailableWorker(architecture string) *Worker {
wm.mu.RLock()
defer wm.mu.RUnlock()
for _, worker := range wm.workers {
if worker.Status == WorkerStatusIdle && worker.Architecture == architecture {
return worker
}
}
return nil
}
func (wm *WorkerManager) AssignJob(workerID, jobID string) {
wm.mu.Lock()
defer wm.mu.Unlock()
if worker, exists := wm.workers[workerID]; exists {
worker.CurrentJob = jobID
worker.Status = WorkerStatusWorking
}
}
func (wm *WorkerManager) ActiveWorkerCount() int {
wm.mu.RLock()
defer wm.mu.RUnlock()
count := 0
for _, worker := range wm.workers {
if worker.Status == WorkerStatusWorking || worker.Status == WorkerStatusIdle {
count++
}
}
return count
}
func (wm *WorkerManager) TotalWorkerCount() int {
wm.mu.RLock()
defer wm.mu.RUnlock()
return len(wm.workers)
}