debian-forge-composer/internal/monitoring/operations_cli.go
robojerk 4eeaa43c39
Some checks failed
Tests / 🛃 Unit tests (push) Failing after 13s
Tests / 🗄 DB tests (push) Failing after 19s
Tests / 🐍 Lint python scripts (push) Failing after 1s
Tests / ⌨ Golang Lint (push) Failing after 1s
Tests / 📦 Packit config lint (push) Failing after 1s
Tests / 🔍 Check source preparation (push) Failing after 1s
Tests / 🔍 Check for valid snapshot urls (push) Failing after 1s
Tests / 🔍 Check for missing or unused runner repos (push) Failing after 1s
Tests / 🐚 Shellcheck (push) Failing after 1s
Tests / 📦 RPMlint (push) Failing after 1s
Tests / Gitlab CI trigger helper (push) Failing after 1s
Tests / 🎀 kube-linter (push) Failing after 1s
Tests / 🧹 cloud-cleaner-is-enabled (push) Successful in 3s
Tests / 🔍 Check spec file osbuild/images dependencies (push) Failing after 1s
did stuff
2025-08-26 10:34:42 -07:00

559 lines
16 KiB
Go

package monitoring
import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)
// OperationsCLI provides command-line interface for operations management
type OperationsCLI struct {
manager *OperationsManager
configPath string
logger *logrus.Logger
}
// NewOperationsCLI creates a new operations CLI
func NewOperationsCLI(configPath string, logger *logrus.Logger) *OperationsCLI {
return &OperationsCLI{
configPath: configPath,
logger: logger,
}
}
// CreateRootCommand creates the root operations command
func (cli *OperationsCLI) CreateRootCommand() *cobra.Command {
rootCmd := &cobra.Command{
Use: "operations",
Short: "Debian Forge Operations Management",
Long: "Manage backup, recovery, and testing operations for Debian Forge",
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
return cli.initializeManager()
},
}
// Add subcommands
rootCmd.AddCommand(cli.createBackupCommand())
rootCmd.AddCommand(cli.createRecoveryCommand())
rootCmd.AddCommand(cli.createTestingCommand())
rootCmd.AddCommand(cli.createConfigCommand())
rootCmd.AddCommand(cli.createStatusCommand())
return rootCmd
}
// initializeManager initializes the operations manager
func (cli *OperationsCLI) initializeManager() error {
// Load configuration
config, err := LoadOperationsConfig(cli.configPath)
if err != nil {
return fmt.Errorf("failed to load configuration: %w", err)
}
// Validate configuration
configManager := &OperationsConfigManager{configPath: cli.configPath, config: config}
if err := configManager.ValidateConfig(); err != nil {
return fmt.Errorf("configuration validation failed: %w", err)
}
// Create operations manager
cli.manager = NewOperationsManager(config, cli.logger)
return nil
}
// createBackupCommand creates the backup command
func (cli *OperationsCLI) createBackupCommand() *cobra.Command {
backupCmd := &cobra.Command{
Use: "backup",
Short: "Manage backup operations",
Long: "Create, list, and manage backup operations",
}
// Create backup subcommand
createCmd := &cobra.Command{
Use: "create [strategy]",
Short: "Create a new backup",
Long: "Create a new backup using the specified strategy",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return cli.createBackup(args[0])
},
}
// List backups subcommand
listCmd := &cobra.Command{
Use: "list",
Short: "List available backups",
Long: "List all available backup strategies and recent backups",
RunE: func(cmd *cobra.Command, args []string) error {
return cli.listBackups()
},
}
// Schedule backup subcommand
scheduleCmd := &cobra.Command{
Use: "schedule [schedule]",
Short: "Schedule a backup",
Long: "Schedule a backup using the specified schedule",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return cli.scheduleBackup(args[0])
},
}
backupCmd.AddCommand(createCmd, listCmd, scheduleCmd)
return backupCmd
}
// createRecoveryCommand creates the recovery command
func (cli *OperationsCLI) createRecoveryCommand() *cobra.Command {
recoveryCmd := &cobra.Command{
Use: "recovery",
Short: "Manage recovery operations",
Long: "Execute recovery plans and manage recovery procedures",
}
// Execute recovery subcommand
executeCmd := &cobra.Command{
Use: "execute [plan] [backup]",
Short: "Execute a recovery plan",
Long: "Execute a recovery plan using the specified backup",
Args: cobra.ExactArgs(2),
RunE: func(cmd *cobra.Command, args []string) error {
return cli.executeRecovery(args[0], args[1])
},
}
// List recovery plans subcommand
listCmd := &cobra.Command{
Use: "list",
Short: "List recovery plans",
Long: "List all available recovery plans",
RunE: func(cmd *cobra.Command, args []string) error {
return cli.listRecoveryPlans()
},
}
// Show recovery procedure subcommand
showCmd := &cobra.Command{
Use: "show [procedure]",
Short: "Show recovery procedure details",
Long: "Show detailed information about a recovery procedure",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return cli.showRecoveryProcedure(args[0])
},
}
recoveryCmd.AddCommand(executeCmd, listCmd, showCmd)
return recoveryCmd
}
// createTestingCommand creates the testing command
func (cli *OperationsCLI) createTestingCommand() *cobra.Command {
testingCmd := &cobra.Command{
Use: "testing",
Short: "Manage recovery testing",
Long: "Run and manage recovery testing scenarios",
}
// Run test subcommand
runCmd := &cobra.Command{
Use: "run [scenario]",
Short: "Run a test scenario",
Long: "Run a recovery test scenario",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return cli.runTest(args[0])
},
}
// List test scenarios subcommand
listCmd := &cobra.Command{
Use: "list",
Short: "List test scenarios",
Long: "List all available test scenarios",
RunE: func(cmd *cobra.Command, args []string) error {
return cli.listTestScenarios()
},
}
// Show test results subcommand
resultsCmd := &cobra.Command{
Use: "results [test-id]",
Short: "Show test results",
Long: "Show results for a specific test",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return cli.showTestResults(args[0])
},
}
testingCmd.AddCommand(runCmd, listCmd, resultsCmd)
return testingCmd
}
// createConfigCommand creates the configuration command
func (cli *OperationsCLI) createConfigCommand() *cobra.Command {
configCmd := &cobra.Command{
Use: "config",
Short: "Manage operations configuration",
Long: "View and modify operations configuration",
}
// Show configuration subcommand
showCmd := &cobra.Command{
Use: "show",
Short: "Show current configuration",
Long: "Show current operations configuration",
RunE: func(cmd *cobra.Command, args []string) error {
return cli.showConfig()
},
}
// Update configuration subcommand
updateCmd := &cobra.Command{
Use: "update [key] [value]",
Short: "Update configuration",
Long: "Update a configuration value",
Args: cobra.ExactArgs(2),
RunE: func(cmd *cobra.Command, args []string) error {
return cli.updateConfig(args[0], args[1])
},
}
// Validate configuration subcommand
validateCmd := &cobra.Command{
Use: "validate",
Short: "Validate configuration",
Long: "Validate current configuration",
RunE: func(cmd *cobra.Command, args []string) error {
return cli.validateConfig()
},
}
configCmd.AddCommand(showCmd, updateCmd, validateCmd)
return configCmd
}
// createStatusCommand creates the status command
func (cli *OperationsCLI) createStatusCommand() *cobra.Command {
statusCmd := &cobra.Command{
Use: "status",
Short: "Show operations status",
Long: "Show current status of operations systems",
RunE: func(cmd *cobra.Command, args []string) error {
return cli.showStatus()
},
}
return statusCmd
}
// Backup operations
func (cli *OperationsCLI) createBackup(strategyID string) error {
cli.logger.Infof("Creating backup using strategy: %s", strategyID)
job, err := cli.manager.backup.CreateBackup(strategyID)
if err != nil {
return fmt.Errorf("backup creation failed: %w", err)
}
fmt.Printf("Backup created successfully:\n")
fmt.Printf(" ID: %s\n", job.ID)
fmt.Printf(" Strategy: %s\n", job.StrategyID)
fmt.Printf(" Status: %s\n", job.Status)
fmt.Printf(" Size: %d bytes\n", job.Size)
fmt.Printf(" Duration: %v\n", job.Duration)
fmt.Printf(" Path: %s\n", job.Path)
if job.Checksum != "" {
fmt.Printf(" Checksum: %s\n", job.Checksum)
}
return nil
}
func (cli *OperationsCLI) listBackups() error {
fmt.Printf("Available Backup Strategies:\n")
fmt.Printf("============================\n")
for id, strategy := range cli.manager.backup.strategies {
fmt.Printf(" %s:\n", id)
fmt.Printf(" Name: %s\n", strategy.Name)
fmt.Printf(" Description: %s\n", strategy.Description)
fmt.Printf(" Type: %s\n", strategy.Type)
fmt.Printf(" Enabled: %t\n", strategy.Enabled)
fmt.Printf(" Compression: %t\n", strategy.Compression)
fmt.Printf(" Encryption: %t\n", strategy.Encryption)
fmt.Printf(" Paths: %v\n", strategy.Paths)
fmt.Printf(" Exclude: %v\n", strategy.Exclude)
fmt.Printf("\n")
}
fmt.Printf("Backup Schedules:\n")
fmt.Printf("=================\n")
for id, schedule := range cli.manager.backup.schedules {
fmt.Printf(" %s:\n", id)
fmt.Printf(" Name: %s\n", schedule.Name)
fmt.Printf(" Description: %s\n", schedule.Description)
fmt.Printf(" Type: %s\n", schedule.Type)
fmt.Printf(" Interval: %v\n", schedule.Interval)
fmt.Printf(" Enabled: %t\n", schedule.Enabled)
fmt.Printf(" Next Run: %v\n", schedule.NextRun)
fmt.Printf("\n")
}
return nil
}
func (cli *OperationsCLI) scheduleBackup(scheduleID string) error {
schedule, exists := cli.manager.backup.schedules[scheduleID]
if !exists {
return fmt.Errorf("backup schedule not found: %s", scheduleID)
}
if !schedule.Enabled {
return fmt.Errorf("backup schedule is disabled: %s", scheduleID)
}
fmt.Printf("Scheduling backup for: %s\n", schedule.Name)
fmt.Printf(" Type: %s\n", schedule.Type)
fmt.Printf(" Interval: %v\n", schedule.Interval)
fmt.Printf(" Next Run: %v\n", schedule.NextRun)
// In production, this would actually schedule the backup
cli.logger.Infof("Backup scheduled for: %s", scheduleID)
return nil
}
// Recovery operations
func (cli *OperationsCLI) executeRecovery(planID string, backupID string) error {
cli.logger.Infof("Executing recovery plan: %s with backup: %s", planID, backupID)
if err := cli.manager.recovery.ExecuteRecovery(planID, backupID); err != nil {
return fmt.Errorf("recovery execution failed: %w", err)
}
fmt.Printf("Recovery plan executed successfully: %s\n", planID)
return nil
}
func (cli *OperationsCLI) listRecoveryPlans() error {
fmt.Printf("Available Recovery Plans:\n")
fmt.Printf("=========================\n")
for id, plan := range cli.manager.recovery.plans {
fmt.Printf(" %s:\n", id)
fmt.Printf(" Name: %s\n", plan.Name)
fmt.Printf(" Description: %s\n", plan.Description)
fmt.Printf(" Priority: %s\n", plan.Priority)
fmt.Printf(" RTO: %v\n", plan.RTO)
fmt.Printf(" RPO: %v\n", plan.RPO)
fmt.Printf(" Enabled: %t\n", plan.Enabled)
fmt.Printf(" Procedures: %v\n", plan.Procedures)
fmt.Printf("\n")
}
return nil
}
func (cli *OperationsCLI) showRecoveryProcedure(procedureID string) error {
procedure, exists := cli.manager.recovery.procedures[procedureID]
if !exists {
return fmt.Errorf("recovery procedure not found: %s", procedureID)
}
fmt.Printf("Recovery Procedure: %s\n", procedure.Name)
fmt.Printf("=====================\n")
fmt.Printf(" ID: %s\n", procedure.ID)
fmt.Printf(" Description: %s\n", procedure.Description)
fmt.Printf(" Type: %s\n", procedure.Type)
fmt.Printf(" Risk Level: %s\n", procedure.RiskLevel)
fmt.Printf(" Estimated Time: %v\n", procedure.EstimatedTime)
fmt.Printf(" Enabled: %t\n", procedure.Enabled)
fmt.Printf(" Prerequisites: %v\n", procedure.Prerequisites)
fmt.Printf("\n Steps:\n")
for i, step := range procedure.Steps {
fmt.Printf(" %d. %s\n", i+1, step.Name)
fmt.Printf(" Description: %s\n", step.Description)
fmt.Printf(" Command: %s %v\n", step.Command, step.Args)
fmt.Printf(" Timeout: %v\n", step.Timeout)
if step.Rollback != "" {
fmt.Printf(" Rollback: %s\n", step.Rollback)
}
fmt.Printf("\n")
}
return nil
}
// Testing operations
func (cli *OperationsCLI) runTest(scenarioID string) error {
cli.logger.Infof("Running test scenario: %s", scenarioID)
result, err := cli.manager.testing.RunTest(scenarioID)
if err != nil {
return fmt.Errorf("test execution failed: %w", err)
}
fmt.Printf("Test scenario completed successfully:\n")
fmt.Printf(" ID: %s\n", result.ID)
fmt.Printf(" Scenario: %s\n", result.ScenarioID)
fmt.Printf(" Status: %s\n", result.Status)
fmt.Printf(" Duration: %v\n", result.Duration)
fmt.Printf(" Results: %v\n", result.Results)
return nil
}
func (cli *OperationsCLI) listTestScenarios() error {
fmt.Printf("Available Test Scenarios:\n")
fmt.Printf("=========================\n")
for id, scenario := range cli.manager.testing.scenarios {
fmt.Printf(" %s:\n", id)
fmt.Printf(" Name: %s\n", scenario.Name)
fmt.Printf(" Description: %s\n", scenario.Description)
fmt.Printf(" Type: %s\n", scenario.Type)
fmt.Printf(" Enabled: %t\n", scenario.Enabled)
fmt.Printf(" Steps: %d\n", len(scenario.Steps))
fmt.Printf(" Expected: %v\n", scenario.Expected)
fmt.Printf("\n")
}
return nil
}
func (cli *OperationsCLI) showTestResults(testID string) error {
result, exists := cli.manager.testing.results[testID]
if !exists {
return fmt.Errorf("test result not found: %s", testID)
}
fmt.Printf("Test Result: %s\n", testID)
fmt.Printf("============\n")
fmt.Printf(" Scenario: %s\n", result.ScenarioID)
fmt.Printf(" Status: %s\n", result.Status)
fmt.Printf(" Start Time: %v\n", result.StartTime)
fmt.Printf(" End Time: %v\n", result.EndTime)
fmt.Printf(" Duration: %v\n", result.Duration)
if result.Error != "" {
fmt.Printf(" Error: %s\n", result.Error)
}
fmt.Printf(" Results: %v\n", result.Results)
fmt.Printf(" Metadata: %v\n", result.Metadata)
return nil
}
// Configuration operations
func (cli *OperationsCLI) showConfig() error {
if cli.manager.config == nil {
return fmt.Errorf("no configuration loaded")
}
fmt.Printf("Operations Configuration:\n")
fmt.Printf("========================\n")
fmt.Printf(" Enabled: %t\n", cli.manager.config.Enabled)
fmt.Printf(" Backup Path: %s\n", cli.manager.config.BackupPath)
fmt.Printf(" Recovery Path: %s\n", cli.manager.config.RecoveryPath)
fmt.Printf(" Retention Days: %d\n", cli.manager.config.RetentionDays)
fmt.Printf(" Compression: %t\n", cli.manager.config.Compression)
fmt.Printf(" Encryption: %t\n", cli.manager.config.Encryption)
if len(cli.manager.config.Metadata) > 0 {
fmt.Printf(" Metadata:\n")
for key, value := range cli.manager.config.Metadata {
fmt.Printf(" %s: %s\n", key, value)
}
}
return nil
}
func (cli *OperationsCLI) updateConfig(key string, value string) error {
configManager := &OperationsConfigManager{configPath: cli.configPath, config: cli.manager.config}
updates := make(map[string]interface{})
// Parse value based on key type
switch key {
case "enabled", "compression", "encryption":
if boolVal, err := strconv.ParseBool(value); err == nil {
updates[key] = boolVal
} else {
return fmt.Errorf("invalid boolean value for %s: %s", key, value)
}
case "retention_days":
if intVal, err := strconv.Atoi(value); err == nil {
updates[key] = intVal
} else {
return fmt.Errorf("invalid integer value for %s: %s", key, value)
}
case "backup_path", "recovery_path":
updates[key] = value
default:
return fmt.Errorf("unknown configuration key: %s", key)
}
if err := configManager.UpdateConfig(updates); err != nil {
return fmt.Errorf("failed to update configuration: %w", err)
}
fmt.Printf("Configuration updated: %s = %s\n", key, value)
return nil
}
func (cli *OperationsCLI) validateConfig() error {
configManager := &OperationsConfigManager{configPath: cli.configPath, config: cli.manager.config}
if err := configManager.ValidateConfig(); err != nil {
return fmt.Errorf("configuration validation failed: %w", err)
}
fmt.Printf("Configuration validation passed\n")
return nil
}
// Status operations
func (cli *OperationsCLI) showStatus() error {
fmt.Printf("Operations System Status:\n")
fmt.Printf("=========================\n")
// Backup system status
fmt.Printf("Backup System:\n")
fmt.Printf(" Status: Active\n")
fmt.Printf(" Strategies: %d\n", len(cli.manager.backup.strategies))
fmt.Printf(" Schedules: %d\n", len(cli.manager.backup.schedules))
fmt.Printf(" Storage Path: %s\n", cli.manager.backup.storage.path)
// Recovery system status
fmt.Printf("\nRecovery System:\n")
fmt.Printf(" Status: Active\n")
fmt.Printf(" Procedures: %d\n", len(cli.manager.recovery.procedures))
fmt.Printf(" Plans: %d\n", len(cli.manager.recovery.plans))
// Testing system status
fmt.Printf("\nTesting System:\n")
fmt.Printf(" Status: Active\n")
fmt.Printf(" Scenarios: %d\n", len(cli.manager.testing.scenarios))
fmt.Printf(" Results: %d\n", len(cli.manager.testing.results))
// Data persistence status
fmt.Printf("\nData Persistence:\n")
fmt.Printf(" Status: Active\n")
fmt.Printf(" Replication: %t\n", cli.manager.persistence.config.Replication)
fmt.Printf(" Replica Count: %d\n", cli.manager.persistence.config.ReplicaCount)
return nil
}