debian-forge-composer/internal/monitoring/build_analytics.go
robojerk 4eeaa43c39
Some checks failed
Tests / 🛃 Unit tests (push) Failing after 13s
Tests / 🗄 DB tests (push) Failing after 19s
Tests / 🐍 Lint python scripts (push) Failing after 1s
Tests / ⌨ Golang Lint (push) Failing after 1s
Tests / 📦 Packit config lint (push) Failing after 1s
Tests / 🔍 Check source preparation (push) Failing after 1s
Tests / 🔍 Check for valid snapshot urls (push) Failing after 1s
Tests / 🔍 Check for missing or unused runner repos (push) Failing after 1s
Tests / 🐚 Shellcheck (push) Failing after 1s
Tests / 📦 RPMlint (push) Failing after 1s
Tests / Gitlab CI trigger helper (push) Failing after 1s
Tests / 🎀 kube-linter (push) Failing after 1s
Tests / 🧹 cloud-cleaner-is-enabled (push) Successful in 3s
Tests / 🔍 Check spec file osbuild/images dependencies (push) Failing after 1s
did stuff
2025-08-26 10:34:42 -07:00

703 lines
20 KiB
Go

package monitoring
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"sync"
"time"
"github.com/sirupsen/logrus"
)
type BuildAnalytics struct {
logger *logrus.Logger
config *AnalyticsConfig
buildTracker *BuildTracker
performance *PerformanceAnalyzer
capacity *CapacityPlanner
dashboard *AnalyticsDashboard
storage *AnalyticsStorage
mu sync.RWMutex
}
type AnalyticsConfig struct {
Enabled bool `json:"enabled"`
DataPath string `json:"data_path"`
RetentionDays int `json:"retention_days"`
MetricsPath string `json:"metrics_path"`
DashboardPath string `json:"dashboard_path"`
Metadata map[string]string `json:"metadata"`
}
type BuildTracker struct {
builds map[string]BuildRecord
workers map[string]WorkerStats
queues map[string]QueueStats
mu sync.RWMutex
}
type BuildRecord struct {
ID string `json:"id"`
Blueprint string `json:"blueprint"`
Variant string `json:"variant"`
Status string `json:"status"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Duration time.Duration `json:"duration"`
WorkerID string `json:"worker_id"`
Priority int `json:"priority"`
QueueTime time.Duration `json:"queue_time"`
ResourceUsage ResourceUsage `json:"resource_usage"`
Error string `json:"error,omitempty"`
Metadata map[string]interface{} `json:"metadata"`
}
type WorkerStats struct {
ID string `json:"id"`
Status string `json:"status"`
CurrentBuild string `json:"current_build"`
TotalBuilds int `json:"total_builds"`
SuccessfulBuilds int `json:"successful_builds"`
FailedBuilds int `json:"failed_builds"`
Uptime time.Duration `json:"uptime"`
LastSeen time.Time `json:"last_seen"`
ResourceUsage ResourceUsage `json:"resource_usage"`
Metadata map[string]interface{} `json:"metadata"`
}
type QueueStats struct {
Name string `json:"name"`
Length int `json:"length"`
Priority int `json:"priority"`
AverageWaitTime time.Duration `json:"average_wait_time"`
TotalProcessed int `json:"total_processed"`
Metadata map[string]interface{} `json:"metadata"`
}
type ResourceUsage struct {
CPUUsage float64 `json:"cpu_usage"`
MemoryUsage float64 `json:"memory_usage"`
DiskUsage float64 `json:"disk_usage"`
NetworkIO float64 `json:"network_io"`
}
type PerformanceAnalyzer struct {
trends map[string]PerformanceTrend
benchmarks map[string]Benchmark
mu sync.RWMutex
}
type PerformanceTrend struct {
Metric string `json:"metric"`
TimeRange string `json:"time_range"`
DataPoints []DataPoint `json:"data_points"`
Trend string `json:"trend"`
Slope float64 `json:"slope"`
Confidence float64 `json:"confidence"`
Metadata map[string]interface{} `json:"metadata"`
}
type DataPoint struct {
Timestamp time.Time `json:"timestamp"`
Value float64 `json:"value"`
}
type Benchmark struct {
Name string `json:"name"`
Description string `json:"description"`
Category string `json:"category"`
Baseline float64 `json:"baseline"`
Current float64 `json:"current"`
Improvement float64 `json:"improvement"`
Unit string `json:"unit"`
Metadata map[string]interface{} `json:"metadata"`
}
type CapacityPlanner struct {
recommendations []CapacityRecommendation
forecasts map[string]CapacityForecast
mu sync.RWMutex
}
type CapacityRecommendation struct {
ID string `json:"id"`
Type string `json:"type"`
Priority string `json:"priority"`
Description string `json:"description"`
Impact string `json:"impact"`
Effort string `json:"effort"`
Timeline string `json:"timeline"`
Metadata map[string]interface{} `json:"metadata"`
}
type CapacityForecast struct {
Resource string `json:"resource"`
TimeRange string `json:"time_range"`
CurrentUsage float64 `json:"current_usage"`
ProjectedUsage float64 `json:"projected_usage"`
PeakUsage float64 `json:"peak_usage"`
RiskLevel string `json:"risk_level"`
Metadata map[string]interface{} `json:"metadata"`
}
type AnalyticsDashboard struct {
config *DashboardConfig
templates map[string]DashboardTemplate
mu sync.RWMutex
}
type DashboardConfig struct {
RefreshInterval time.Duration `json:"refresh_interval"`
Theme string `json:"theme"`
Layout string `json:"layout"`
Widgets []DashboardWidget `json:"widgets"`
Metadata map[string]string `json:"metadata"`
}
type DashboardWidget struct {
ID string `json:"id"`
Type string `json:"type"`
Title string `json:"title"`
Position WidgetPosition `json:"position"`
Size WidgetSize `json:"size"`
Config map[string]interface{} `json:"config"`
Enabled bool `json:"enabled"`
Metadata map[string]interface{} `json:"metadata"`
}
type WidgetPosition struct {
X int `json:"x"`
Y int `json:"y"`
}
type WidgetSize struct {
Width int `json:"width"`
Height int `json:"height"`
}
type AnalyticsStorage struct {
path string
retention time.Duration
mu sync.RWMutex
}
func NewBuildAnalytics(config *AnalyticsConfig, logger *logrus.Logger) *BuildAnalytics {
analytics := &BuildAnalytics{
logger: logger,
config: config,
buildTracker: NewBuildTracker(),
performance: NewPerformanceAnalyzer(),
capacity: NewCapacityPlanner(),
dashboard: NewAnalyticsDashboard(),
storage: NewAnalyticsStorage(config.DataPath, time.Duration(config.RetentionDays)*24*time.Hour),
}
return analytics
}
func NewBuildTracker() *BuildTracker {
return &BuildTracker{
builds: make(map[string]BuildRecord),
workers: make(map[string]WorkerStats),
queues: make(map[string]QueueStats),
}
}
func NewPerformanceAnalyzer() *PerformanceAnalyzer {
return &PerformanceAnalyzer{
trends: make(map[string]PerformanceTrend),
benchmarks: make(map[string]Benchmark),
}
}
func NewCapacityPlanner() *CapacityPlanner {
return &CapacityPlanner{
recommendations: []CapacityRecommendation{},
forecasts: make(map[string]CapacityForecast),
}
}
func NewAnalyticsDashboard() *AnalyticsDashboard {
return &AnalyticsDashboard{
config: &DashboardConfig{},
templates: make(map[string]DashboardTemplate),
}
}
func NewAnalyticsStorage(path string, retention time.Duration) *AnalyticsStorage {
return &AnalyticsStorage{
path: path,
retention: retention,
}
}
func (ba *BuildAnalytics) TrackBuild(build BuildRecord) error {
ba.logger.Infof("Tracking build: %s (blueprint: %s, variant: %s)", build.ID, build.Blueprint, build.Variant)
ba.buildTracker.mu.Lock()
defer ba.buildTracker.mu.Unlock()
// Store build record
ba.buildTracker.builds[build.ID] = build
// Update worker stats
if worker, exists := ba.buildTracker.workers[build.WorkerID]; exists {
worker.TotalBuilds++
if build.Status == "success" {
worker.SuccessfulBuilds++
} else if build.Status == "failed" {
worker.FailedBuilds++
}
worker.LastSeen = time.Now()
ba.buildTracker.workers[build.WorkerID] = worker
}
// Store to persistent storage
return ba.storage.storeBuildRecord(build)
}
func (ba *BuildAnalytics) UpdateBuildStatus(buildID string, status string, endTime time.Time, error string) error {
ba.buildTracker.mu.Lock()
defer ba.buildTracker.mu.Unlock()
if build, exists := ba.buildTracker.builds[buildID]; exists {
build.Status = status
build.EndTime = endTime
build.Duration = endTime.Sub(build.StartTime)
if error != "" {
build.Error = error
}
ba.buildTracker.builds[buildID] = build
// Update performance trends
go ba.performance.updateTrends(build)
// Update capacity forecasts
go ba.capacity.updateForecasts(build)
return ba.storage.updateBuildRecord(build)
}
return fmt.Errorf("build not found: %s", buildID)
}
func (ba *BuildAnalytics) GetBuildStats(timeRange string) *BuildStats {
ba.buildTracker.mu.RLock()
defer ba.buildTracker.mu.RUnlock()
stats := &BuildStats{
TimeRange: timeRange,
Timestamp: time.Now(),
Metadata: make(map[string]interface{}),
}
// Calculate time range
var startTime time.Time
switch timeRange {
case "1h":
startTime = time.Now().Add(-1 * time.Hour)
case "24h":
startTime = time.Now().Add(-24 * time.Hour)
case "7d":
startTime = time.Now().AddDate(0, 0, -7)
case "30d":
startTime = time.Now().AddDate(0, 0, -30)
default:
startTime = time.Now().Add(-24 * time.Hour)
}
// Count builds by status
for _, build := range ba.buildTracker.builds {
if build.StartTime.After(startTime) {
switch build.Status {
case "success":
stats.SuccessfulBuilds++
case "failed":
stats.FailedBuilds++
case "running":
stats.RunningBuilds++
case "queued":
stats.QueuedBuilds++
}
stats.TotalBuilds++
stats.TotalDuration += build.Duration
// Track average build time
if build.Status == "success" || build.Status == "failed" {
stats.AverageBuildTime += build.Duration
stats.CompletedBuilds++
}
}
}
// Calculate averages
if stats.CompletedBuilds > 0 {
stats.AverageBuildTime = stats.AverageBuildTime / time.Duration(stats.CompletedBuilds)
}
// Calculate success rate
if stats.TotalBuilds > 0 {
stats.SuccessRate = float64(stats.SuccessfulBuilds) / float64(stats.TotalBuilds) * 100.0
}
return stats
}
func (ba *BuildAnalytics) GetPerformanceTrends(metric string, timeRange string) *PerformanceTrend {
ba.performance.mu.RLock()
defer ba.performance.mu.RUnlock()
trendKey := fmt.Sprintf("%s_%s", metric, timeRange)
if trend, exists := ba.performance.trends[trendKey]; exists {
return &trend
}
// Generate trend if it doesn't exist
return ba.performance.generateTrend(metric, timeRange)
}
func (ba *BuildAnalytics) GetCapacityRecommendations() []CapacityRecommendation {
ba.capacity.mu.RLock()
defer ba.capacity.mu.RUnlock()
// Sort recommendations by priority
recommendations := make([]CapacityRecommendation, len(ba.capacity.recommendations))
copy(recommendations, ba.capacity.recommendations)
sort.Slice(recommendations, func(i, j int) bool {
priorityOrder := map[string]int{"critical": 0, "high": 1, "medium": 2, "low": 3}
return priorityOrder[recommendations[i].Priority] < priorityOrder[recommendations[j].Priority]
})
return recommendations
}
func (ba *BuildAnalytics) GetCapacityForecasts() map[string]CapacityForecast {
ba.capacity.mu.RLock()
defer ba.capacity.mu.RUnlock()
forecasts := make(map[string]CapacityForecast)
for k, v := range ba.capacity.forecasts {
forecasts[k] = v
}
return forecasts
}
func (ba *BuildAnalytics) GenerateDashboard() (*DashboardData, error) {
ba.logger.Info("Generating analytics dashboard")
dashboard := &DashboardData{
Timestamp: time.Now(),
Widgets: make(map[string]WidgetData),
Metadata: make(map[string]interface{}),
}
// Generate build statistics widget
if buildStats := ba.GetBuildStats("24h"); buildStats != nil {
dashboard.Widgets["build_stats"] = WidgetData{
Type: "build_statistics",
Data: buildStats,
}
}
// Generate performance trends widget
if trends := ba.GetPerformanceTrends("build_duration", "7d"); trends != nil {
dashboard.Widgets["performance_trends"] = WidgetData{
Type: "performance_trends",
Data: trends,
}
}
// Generate capacity recommendations widget
if recommendations := ba.GetCapacityRecommendations(); len(recommendations) > 0 {
dashboard.Widgets["capacity_recommendations"] = WidgetData{
Type: "capacity_recommendations",
Data: recommendations,
}
}
// Generate worker status widget
if workerStats := ba.GetWorkerStats(); len(workerStats) > 0 {
dashboard.Widgets["worker_status"] = WidgetData{
Type: "worker_status",
Data: workerStats,
}
}
// Store dashboard data
if err := ba.storage.storeDashboardData(dashboard); err != nil {
ba.logger.Warnf("Failed to store dashboard data: %v", err)
}
return dashboard, nil
}
func (ba *BuildAnalytics) GetWorkerStats() map[string]WorkerStats {
ba.buildTracker.mu.RLock()
defer ba.buildTracker.mu.RUnlock()
workerStats := make(map[string]WorkerStats)
for k, v := range ba.buildTracker.workers {
workerStats[k] = v
}
return workerStats
}
// PerformanceAnalyzer methods
func (pa *PerformanceAnalyzer) updateTrends(build BuildRecord) {
pa.mu.Lock()
defer pa.mu.Unlock()
// Update build duration trend
trendKey := "build_duration_7d"
if trend, exists := pa.trends[trendKey]; exists {
dataPoint := DataPoint{
Timestamp: build.EndTime,
Value: float64(build.Duration.Milliseconds()),
}
trend.DataPoints = append(trend.DataPoints, dataPoint)
// Keep only last 7 days of data
cutoff := time.Now().AddDate(0, 0, -7)
var filteredPoints []DataPoint
for _, point := range trend.DataPoints {
if point.Timestamp.After(cutoff) {
filteredPoints = append(filteredPoints, point)
}
}
trend.DataPoints = filteredPoints
// Calculate trend
trend = pa.calculateTrend(trend)
pa.trends[trendKey] = trend
}
}
func (pa *PerformanceAnalyzer) generateTrend(metric string, timeRange string) *PerformanceTrend {
// This is a placeholder for trend generation
// In production, implement actual trend calculation logic
return &PerformanceTrend{
Metric: metric,
TimeRange: timeRange,
DataPoints: []DataPoint{},
Trend: "stable",
Slope: 0.0,
Confidence: 0.0,
Metadata: make(map[string]interface{}),
}
}
func (pa *PerformanceAnalyzer) calculateTrend(trend PerformanceTrend) PerformanceTrend {
if len(trend.DataPoints) < 2 {
trend.Trend = "insufficient_data"
return trend
}
// Simple linear regression for trend calculation
var sumX, sumY, sumXY, sumX2 float64
n := float64(len(trend.DataPoints))
for i, point := range trend.DataPoints {
x := float64(i)
y := point.Value
sumX += x
sumY += y
sumXY += x * y
sumX2 += x * x
}
// Calculate slope
slope := (n*sumXY - sumX*sumY) / (n*sumX2 - sumX*sumX)
trend.Slope = slope
// Determine trend direction
if slope > 0.1 {
trend.Trend = "increasing"
} else if slope < -0.1 {
trend.Trend = "decreasing"
} else {
trend.Trend = "stable"
}
// Calculate confidence (simplified)
trend.Confidence = 0.8 // Placeholder
return trend
}
// CapacityPlanner methods
func (cp *CapacityPlanner) updateForecasts(build BuildRecord) {
cp.mu.Lock()
defer cp.mu.Unlock()
// Update resource usage forecasts
forecastKey := "cpu_usage_7d"
if forecast, exists := cp.forecasts[forecastKey]; exists {
// Update current usage based on build
forecast.CurrentUsage = build.ResourceUsage.CPUUsage
// Simple projection (in production, use more sophisticated forecasting)
forecast.ProjectedUsage = forecast.CurrentUsage * 1.1
// Determine risk level
if forecast.ProjectedUsage > 80.0 {
forecast.RiskLevel = "high"
} else if forecast.ProjectedUsage > 60.0 {
forecast.RiskLevel = "medium"
} else {
forecast.RiskLevel = "low"
}
cp.forecasts[forecastKey] = forecast
}
// Generate recommendations if needed
cp.generateRecommendations()
}
func (cp *CapacityPlanner) generateRecommendations() {
// Check CPU usage
if forecast, exists := cp.forecasts["cpu_usage_7d"]; exists {
if forecast.RiskLevel == "high" {
recommendation := CapacityRecommendation{
ID: generateRecommendationID(),
Type: "scale_up",
Priority: "high",
Description: "CPU usage is projected to exceed 80% within 7 days",
Impact: "high",
Effort: "medium",
Timeline: "1-2 weeks",
Metadata: make(map[string]interface{}),
}
cp.recommendations = append(cp.recommendations, recommendation)
}
}
}
// AnalyticsStorage methods
func (as *AnalyticsStorage) storeBuildRecord(build BuildRecord) error {
as.mu.Lock()
defer as.mu.Unlock()
// Create data directory if it doesn't exist
if err := os.MkdirAll(as.path, 0755); err != nil {
return fmt.Errorf("failed to create data directory: %w", err)
}
// Store build record with timestamp
timestamp := build.StartTime.Format("2006-01-02_15-04-05")
filename := filepath.Join(as.path, fmt.Sprintf("build_%s_%s.json", build.ID, timestamp))
data, err := json.MarshalIndent(build, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal build record: %w", err)
}
if err := os.WriteFile(filename, data, 0644); err != nil {
return fmt.Errorf("failed to write build record: %w", err)
}
return nil
}
func (as *AnalyticsStorage) updateBuildRecord(build BuildRecord) error {
// Find and update existing build record file
files, err := os.ReadDir(as.path)
if err != nil {
return fmt.Errorf("failed to read data directory: %w", err)
}
for _, file := range files {
if strings.Contains(file.Name(), fmt.Sprintf("build_%s_", build.ID)) {
filePath := filepath.Join(as.path, file.Name())
data, err := json.MarshalIndent(build, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal updated build record: %w", err)
}
if err := os.WriteFile(filePath, data, 0644); err != nil {
return fmt.Errorf("failed to update build record: %w", err)
}
return nil
}
}
return fmt.Errorf("build record file not found for ID: %s", build.ID)
}
func (as *AnalyticsStorage) storeDashboardData(dashboard *DashboardData) error {
as.mu.Lock()
defer as.mu.Unlock()
// Create dashboard directory if it doesn't exist
dashboardPath := filepath.Join(as.path, "dashboard")
if err := os.MkdirAll(dashboardPath, 0755); err != nil {
return fmt.Errorf("failed to create dashboard directory: %w", err)
}
// Store dashboard data with timestamp
timestamp := dashboard.Timestamp.Format("2006-01-02_15-04-05")
filename := filepath.Join(dashboardPath, fmt.Sprintf("dashboard_%s.json", timestamp))
data, err := json.MarshalIndent(dashboard, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal dashboard data: %w", err)
}
if err := os.WriteFile(filename, data, 0644); err != nil {
return fmt.Errorf("failed to write dashboard data: %w", err)
}
return nil
}
// Dashboard types
type DashboardData struct {
Timestamp time.Time `json:"timestamp"`
Widgets map[string]WidgetData `json:"widgets"`
Metadata map[string]interface{} `json:"metadata"`
}
type WidgetData struct {
Type string `json:"type"`
Data interface{} `json:"data"`
}
type DashboardTemplate struct {
ID string `json:"id"`
Name string `json:"name"`
Template string `json:"template"`
Metadata map[string]interface{} `json:"metadata"`
}
type BuildStats struct {
TimeRange string `json:"time_range"`
Timestamp time.Time `json:"timestamp"`
TotalBuilds int `json:"total_builds"`
SuccessfulBuilds int `json:"successful_builds"`
FailedBuilds int `json:"failed_builds"`
RunningBuilds int `json:"running_builds"`
QueuedBuilds int `json:"queued_builds"`
CompletedBuilds int `json:"completed_builds"`
TotalDuration time.Duration `json:"total_duration"`
AverageBuildTime time.Duration `json:"average_build_time"`
SuccessRate float64 `json:"success_rate"`
Metadata map[string]interface{} `json:"metadata"`
}
// Helper functions
func generateRecommendationID() string {
return fmt.Sprintf("rec-%d", time.Now().UnixNano())
}