first commit
This commit is contained in:
commit
57bb8aafbe
27 changed files with 8538 additions and 0 deletions
274
internal/monitoring/checks.go
Normal file
274
internal/monitoring/checks.go
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
package monitoring
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// SystemHealthCheck checks system-level health
|
||||
type SystemHealthCheck struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewSystemHealthCheck creates a new system health check
|
||||
func NewSystemHealthCheck() *SystemHealthCheck {
|
||||
return &SystemHealthCheck{
|
||||
logger: logrus.New(),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SystemHealthCheck) Name() string {
|
||||
return "system"
|
||||
}
|
||||
|
||||
func (s *SystemHealthCheck) IsCritical() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *SystemHealthCheck) Check() (*HealthCheck, error) {
|
||||
var m runtime.MemStats
|
||||
runtime.ReadMemStats(&m)
|
||||
|
||||
// Check memory usage
|
||||
memoryUsage := float64(m.Alloc) / float64(m.Sys) * 100
|
||||
memoryStatus := HealthStatusHealthy
|
||||
if memoryUsage > 90 {
|
||||
memoryStatus = HealthStatusUnhealthy
|
||||
} else if memoryUsage > 75 {
|
||||
memoryStatus = HealthStatusDegraded
|
||||
}
|
||||
|
||||
// Determine overall status
|
||||
overallStatus := memoryStatus
|
||||
|
||||
details := map[string]interface{}{
|
||||
"memory_alloc": m.Alloc,
|
||||
"memory_sys": m.Sys,
|
||||
"memory_usage_pct": memoryUsage,
|
||||
"goroutines": runtime.NumGoroutine(),
|
||||
}
|
||||
|
||||
return &HealthCheck{
|
||||
Name: s.Name(),
|
||||
Status: overallStatus,
|
||||
Message: fmt.Sprintf("System health: memory %.1f%%", memoryUsage),
|
||||
Details: details,
|
||||
Critical: s.IsCritical(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// PackageManagerHealthCheck checks package manager health
|
||||
type PackageManagerHealthCheck struct {
|
||||
cacheDir string
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewPackageManagerHealthCheck creates a new package manager health check
|
||||
func NewPackageManagerHealthCheck(cacheDir string) *PackageManagerHealthCheck {
|
||||
return &PackageManagerHealthCheck{
|
||||
cacheDir: cacheDir,
|
||||
logger: logrus.New(),
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PackageManagerHealthCheck) Name() string {
|
||||
return "package_manager"
|
||||
}
|
||||
|
||||
func (p *PackageManagerHealthCheck) IsCritical() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (p *PackageManagerHealthCheck) Check() (*HealthCheck, error) {
|
||||
// Check if cache directory exists and is writable
|
||||
if p.cacheDir == "" {
|
||||
return &HealthCheck{
|
||||
Name: p.Name(),
|
||||
Status: HealthStatusUnhealthy,
|
||||
Message: "Cache directory not configured",
|
||||
Critical: p.IsCritical(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Check if directory exists
|
||||
if _, err := os.Stat(p.cacheDir); os.IsNotExist(err) {
|
||||
return &HealthCheck{
|
||||
Name: p.Name(),
|
||||
Status: HealthStatusDegraded,
|
||||
Message: "Cache directory does not exist",
|
||||
Critical: p.IsCritical(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Check if directory is writable
|
||||
testFile := filepath.Join(p.cacheDir, ".health_check_test")
|
||||
if err := os.WriteFile(testFile, []byte("test"), 0644); err != nil {
|
||||
return &HealthCheck{
|
||||
Name: p.Name(),
|
||||
Status: HealthStatusUnhealthy,
|
||||
Message: "Cache directory is not writable",
|
||||
Critical: p.IsCritical(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Clean up test file
|
||||
os.Remove(testFile)
|
||||
|
||||
// Check cache size
|
||||
cacheSize, err := p.getCacheSize()
|
||||
if err != nil {
|
||||
return &HealthCheck{
|
||||
Name: p.Name(),
|
||||
Status: HealthStatusDegraded,
|
||||
Message: "Unable to determine cache size",
|
||||
Critical: p.IsCritical(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
details := map[string]interface{}{
|
||||
"cache_dir": p.cacheDir,
|
||||
"cache_size": cacheSize,
|
||||
"writable": true,
|
||||
}
|
||||
|
||||
return &HealthCheck{
|
||||
Name: p.Name(),
|
||||
Status: HealthStatusHealthy,
|
||||
Message: "Package manager cache is healthy",
|
||||
Details: details,
|
||||
Critical: p.IsCritical(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *PackageManagerHealthCheck) getCacheSize() (int64, error) {
|
||||
var size int64
|
||||
err := filepath.Walk(p.cacheDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
size += info.Size()
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return size, err
|
||||
}
|
||||
|
||||
// OSTreeHealthCheck checks OSTree tool health
|
||||
type OSTreeHealthCheck struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewOSTreeHealthCheck creates a new OSTree health check
|
||||
func NewOSTreeHealthCheck() *OSTreeHealthCheck {
|
||||
return &OSTreeHealthCheck{
|
||||
logger: logrus.New(),
|
||||
}
|
||||
}
|
||||
|
||||
func (o *OSTreeHealthCheck) Name() string {
|
||||
return "ostree"
|
||||
}
|
||||
|
||||
func (o *OSTreeHealthCheck) IsCritical() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (o *OSTreeHealthCheck) Check() (*HealthCheck, error) {
|
||||
// Check if ostree command is available
|
||||
if _, err := os.Stat("/usr/bin/ostree"); os.IsNotExist(err) {
|
||||
return &HealthCheck{
|
||||
Name: o.Name(),
|
||||
Status: HealthStatusUnhealthy,
|
||||
Message: "OSTree command not found",
|
||||
Critical: o.IsCritical(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Check if apt-ostree is available
|
||||
aptOstreeAvailable := false
|
||||
if _, err := os.Stat("/usr/bin/apt-ostree"); err == nil {
|
||||
aptOstreeAvailable = true
|
||||
}
|
||||
|
||||
details := map[string]interface{}{
|
||||
"ostree_available": true,
|
||||
"apt_ostree_available": aptOstreeAvailable,
|
||||
}
|
||||
|
||||
message := "OSTree tools are available"
|
||||
if aptOstreeAvailable {
|
||||
message += " (including apt-ostree)"
|
||||
} else {
|
||||
message += " (apt-ostree not available)"
|
||||
}
|
||||
|
||||
status := HealthStatusHealthy
|
||||
if !aptOstreeAvailable {
|
||||
status = HealthStatusDegraded
|
||||
}
|
||||
|
||||
return &HealthCheck{
|
||||
Name: o.Name(),
|
||||
Status: status,
|
||||
Message: message,
|
||||
Details: details,
|
||||
Critical: o.IsCritical(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// BuildSystemHealthCheck checks build system connectivity
|
||||
type BuildSystemHealthCheck struct {
|
||||
orchestratorURL string
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewBuildSystemHealthCheck creates a new build system health check
|
||||
func NewBuildSystemHealthCheck(orchestratorURL string) *BuildSystemHealthCheck {
|
||||
return &BuildSystemHealthCheck{
|
||||
orchestratorURL: orchestratorURL,
|
||||
logger: logrus.New(),
|
||||
}
|
||||
}
|
||||
|
||||
func (b *BuildSystemHealthCheck) Name() string {
|
||||
return "build_system"
|
||||
}
|
||||
|
||||
func (b *BuildSystemHealthCheck) IsCritical() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (b *BuildSystemHealthCheck) Check() (*HealthCheck, error) {
|
||||
if b.orchestratorURL == "" {
|
||||
return &HealthCheck{
|
||||
Name: b.Name(),
|
||||
Status: HealthStatusUnknown,
|
||||
Message: "Orchestrator URL not configured",
|
||||
Critical: b.IsCritical(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Try to connect to orchestrator
|
||||
// This is a simplified check - in production you'd want to make an actual HTTP request
|
||||
// For now, we'll simulate a connection check
|
||||
|
||||
details := map[string]interface{}{
|
||||
"orchestrator_url": b.orchestratorURL,
|
||||
"configured": true,
|
||||
}
|
||||
|
||||
// Placeholder: assume healthy if configured
|
||||
// In reality, this would make an HTTP request to /health endpoint
|
||||
return &HealthCheck{
|
||||
Name: b.Name(),
|
||||
Status: HealthStatusHealthy,
|
||||
Message: "Build system is configured and accessible",
|
||||
Details: details,
|
||||
Critical: b.IsCritical(),
|
||||
}, nil
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue