274 lines
6.3 KiB
Go
274 lines
6.3 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// SystemHealthCheck checks system-level health
|
|
type SystemHealthCheck struct {
|
|
logger *logrus.Logger
|
|
}
|
|
|
|
// NewSystemHealthCheck creates a new system health check
|
|
func NewSystemHealthCheck() *SystemHealthCheck {
|
|
return &SystemHealthCheck{
|
|
logger: logrus.New(),
|
|
}
|
|
}
|
|
|
|
func (s *SystemHealthCheck) Name() string {
|
|
return "system"
|
|
}
|
|
|
|
func (s *SystemHealthCheck) IsCritical() bool {
|
|
return true
|
|
}
|
|
|
|
func (s *SystemHealthCheck) Check() (*HealthCheck, error) {
|
|
var m runtime.MemStats
|
|
runtime.ReadMemStats(&m)
|
|
|
|
// Check memory usage
|
|
memoryUsage := float64(m.Alloc) / float64(m.Sys) * 100
|
|
memoryStatus := HealthStatusHealthy
|
|
if memoryUsage > 90 {
|
|
memoryStatus = HealthStatusUnhealthy
|
|
} else if memoryUsage > 75 {
|
|
memoryStatus = HealthStatusDegraded
|
|
}
|
|
|
|
// Determine overall status
|
|
overallStatus := memoryStatus
|
|
|
|
details := map[string]interface{}{
|
|
"memory_alloc": m.Alloc,
|
|
"memory_sys": m.Sys,
|
|
"memory_usage_pct": memoryUsage,
|
|
"goroutines": runtime.NumGoroutine(),
|
|
}
|
|
|
|
return &HealthCheck{
|
|
Name: s.Name(),
|
|
Status: overallStatus,
|
|
Message: fmt.Sprintf("System health: memory %.1f%%", memoryUsage),
|
|
Details: details,
|
|
Critical: s.IsCritical(),
|
|
}, nil
|
|
}
|
|
|
|
// PackageManagerHealthCheck checks package manager health
|
|
type PackageManagerHealthCheck struct {
|
|
cacheDir string
|
|
logger *logrus.Logger
|
|
}
|
|
|
|
// NewPackageManagerHealthCheck creates a new package manager health check
|
|
func NewPackageManagerHealthCheck(cacheDir string) *PackageManagerHealthCheck {
|
|
return &PackageManagerHealthCheck{
|
|
cacheDir: cacheDir,
|
|
logger: logrus.New(),
|
|
}
|
|
}
|
|
|
|
func (p *PackageManagerHealthCheck) Name() string {
|
|
return "package_manager"
|
|
}
|
|
|
|
func (p *PackageManagerHealthCheck) IsCritical() bool {
|
|
return false
|
|
}
|
|
|
|
func (p *PackageManagerHealthCheck) Check() (*HealthCheck, error) {
|
|
// Check if cache directory exists and is writable
|
|
if p.cacheDir == "" {
|
|
return &HealthCheck{
|
|
Name: p.Name(),
|
|
Status: HealthStatusUnhealthy,
|
|
Message: "Cache directory not configured",
|
|
Critical: p.IsCritical(),
|
|
}, nil
|
|
}
|
|
|
|
// Check if directory exists
|
|
if _, err := os.Stat(p.cacheDir); os.IsNotExist(err) {
|
|
return &HealthCheck{
|
|
Name: p.Name(),
|
|
Status: HealthStatusDegraded,
|
|
Message: "Cache directory does not exist",
|
|
Critical: p.IsCritical(),
|
|
}, nil
|
|
}
|
|
|
|
// Check if directory is writable
|
|
testFile := filepath.Join(p.cacheDir, ".health_check_test")
|
|
if err := os.WriteFile(testFile, []byte("test"), 0644); err != nil {
|
|
return &HealthCheck{
|
|
Name: p.Name(),
|
|
Status: HealthStatusUnhealthy,
|
|
Message: "Cache directory is not writable",
|
|
Critical: p.IsCritical(),
|
|
}, nil
|
|
}
|
|
|
|
// Clean up test file
|
|
os.Remove(testFile)
|
|
|
|
// Check cache size
|
|
cacheSize, err := p.getCacheSize()
|
|
if err != nil {
|
|
return &HealthCheck{
|
|
Name: p.Name(),
|
|
Status: HealthStatusDegraded,
|
|
Message: "Unable to determine cache size",
|
|
Critical: p.IsCritical(),
|
|
}, nil
|
|
}
|
|
|
|
details := map[string]interface{}{
|
|
"cache_dir": p.cacheDir,
|
|
"cache_size": cacheSize,
|
|
"writable": true,
|
|
}
|
|
|
|
return &HealthCheck{
|
|
Name: p.Name(),
|
|
Status: HealthStatusHealthy,
|
|
Message: "Package manager cache is healthy",
|
|
Details: details,
|
|
Critical: p.IsCritical(),
|
|
}, nil
|
|
}
|
|
|
|
func (p *PackageManagerHealthCheck) getCacheSize() (int64, error) {
|
|
var size int64
|
|
err := filepath.Walk(p.cacheDir, func(path string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !info.IsDir() {
|
|
size += info.Size()
|
|
}
|
|
return nil
|
|
})
|
|
return size, err
|
|
}
|
|
|
|
// OSTreeHealthCheck checks OSTree tool health
|
|
type OSTreeHealthCheck struct {
|
|
logger *logrus.Logger
|
|
}
|
|
|
|
// NewOSTreeHealthCheck creates a new OSTree health check
|
|
func NewOSTreeHealthCheck() *OSTreeHealthCheck {
|
|
return &OSTreeHealthCheck{
|
|
logger: logrus.New(),
|
|
}
|
|
}
|
|
|
|
func (o *OSTreeHealthCheck) Name() string {
|
|
return "ostree"
|
|
}
|
|
|
|
func (o *OSTreeHealthCheck) IsCritical() bool {
|
|
return true
|
|
}
|
|
|
|
func (o *OSTreeHealthCheck) Check() (*HealthCheck, error) {
|
|
// Check if ostree command is available
|
|
if _, err := os.Stat("/usr/bin/ostree"); os.IsNotExist(err) {
|
|
return &HealthCheck{
|
|
Name: o.Name(),
|
|
Status: HealthStatusUnhealthy,
|
|
Message: "OSTree command not found",
|
|
Critical: o.IsCritical(),
|
|
}, nil
|
|
}
|
|
|
|
// Check if apt-ostree is available
|
|
aptOstreeAvailable := false
|
|
if _, err := os.Stat("/usr/bin/apt-ostree"); err == nil {
|
|
aptOstreeAvailable = true
|
|
}
|
|
|
|
details := map[string]interface{}{
|
|
"ostree_available": true,
|
|
"apt_ostree_available": aptOstreeAvailable,
|
|
}
|
|
|
|
message := "OSTree tools are available"
|
|
if aptOstreeAvailable {
|
|
message += " (including apt-ostree)"
|
|
} else {
|
|
message += " (apt-ostree not available)"
|
|
}
|
|
|
|
status := HealthStatusHealthy
|
|
if !aptOstreeAvailable {
|
|
status = HealthStatusDegraded
|
|
}
|
|
|
|
return &HealthCheck{
|
|
Name: o.Name(),
|
|
Status: status,
|
|
Message: message,
|
|
Details: details,
|
|
Critical: o.IsCritical(),
|
|
}, nil
|
|
}
|
|
|
|
// BuildSystemHealthCheck checks build system connectivity
|
|
type BuildSystemHealthCheck struct {
|
|
orchestratorURL string
|
|
logger *logrus.Logger
|
|
}
|
|
|
|
// NewBuildSystemHealthCheck creates a new build system health check
|
|
func NewBuildSystemHealthCheck(orchestratorURL string) *BuildSystemHealthCheck {
|
|
return &BuildSystemHealthCheck{
|
|
orchestratorURL: orchestratorURL,
|
|
logger: logrus.New(),
|
|
}
|
|
}
|
|
|
|
func (b *BuildSystemHealthCheck) Name() string {
|
|
return "build_system"
|
|
}
|
|
|
|
func (b *BuildSystemHealthCheck) IsCritical() bool {
|
|
return false
|
|
}
|
|
|
|
func (b *BuildSystemHealthCheck) Check() (*HealthCheck, error) {
|
|
if b.orchestratorURL == "" {
|
|
return &HealthCheck{
|
|
Name: b.Name(),
|
|
Status: HealthStatusUnknown,
|
|
Message: "Orchestrator URL not configured",
|
|
Critical: b.IsCritical(),
|
|
}, nil
|
|
}
|
|
|
|
// Try to connect to orchestrator
|
|
// This is a simplified check - in production you'd want to make an actual HTTP request
|
|
// For now, we'll simulate a connection check
|
|
|
|
details := map[string]interface{}{
|
|
"orchestrator_url": b.orchestratorURL,
|
|
"configured": true,
|
|
}
|
|
|
|
// Placeholder: assume healthy if configured
|
|
// In reality, this would make an HTTP request to /health endpoint
|
|
return &HealthCheck{
|
|
Name: b.Name(),
|
|
Status: HealthStatusHealthy,
|
|
Message: "Build system is configured and accessible",
|
|
Details: details,
|
|
Critical: b.IsCritical(),
|
|
}, nil
|
|
}
|