first commit
This commit is contained in:
commit
57bb8aafbe
27 changed files with 8538 additions and 0 deletions
446
internal/performance/profiler.go
Normal file
446
internal/performance/profiler.go
Normal file
|
|
@ -0,0 +1,446 @@
|
|||
package performance
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/shirou/gopsutil/v3/cpu"
|
||||
"github.com/shirou/gopsutil/v3/mem"
|
||||
"github.com/shirou/gopsutil/v3/disk"
|
||||
)
|
||||
|
||||
// Profiler manages performance profiling and metrics collection
|
||||
type Profiler struct {
|
||||
metrics map[string]*Metric
|
||||
collectors map[string]MetricCollector
|
||||
mu sync.RWMutex
|
||||
logger *logrus.Logger
|
||||
enabled bool
|
||||
interval time.Duration
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
// Metric represents a performance metric
|
||||
type Metric struct {
|
||||
Name string `json:"name"`
|
||||
Value float64 `json:"value"`
|
||||
Unit string `json:"unit"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Tags map[string]string `json:"tags"`
|
||||
Metadata map[string]interface{} `json:"metadata"`
|
||||
History []MetricPoint `json:"history,omitempty"`
|
||||
MaxHistory int `json:"max_history"`
|
||||
}
|
||||
|
||||
// MetricPoint represents a single metric measurement
|
||||
type MetricPoint struct {
|
||||
Value float64 `json:"value"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
}
|
||||
|
||||
// MetricCollector defines the interface for collecting metrics
|
||||
type MetricCollector interface {
|
||||
Collect() (*Metric, error)
|
||||
GetName() string
|
||||
GetInterval() time.Duration
|
||||
}
|
||||
|
||||
// ProfilerConfig represents profiler configuration
|
||||
type ProfilerConfig struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
Interval time.Duration `yaml:"interval"`
|
||||
MaxHistory int `yaml:"max_history"`
|
||||
Metrics []string `yaml:"metrics"`
|
||||
Exporters []string `yaml:"exporters"`
|
||||
Custom map[string]interface{} `yaml:"custom"`
|
||||
}
|
||||
|
||||
// NewProfiler creates a new performance profiler
|
||||
func NewProfiler(config *ProfilerConfig) *Profiler {
|
||||
if config.Interval == 0 {
|
||||
config.Interval = 30 * time.Second
|
||||
}
|
||||
if config.MaxHistory == 0 {
|
||||
config.MaxHistory = 1000
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
profiler := &Profiler{
|
||||
metrics: make(map[string]*Metric),
|
||||
collectors: make(map[string]MetricCollector),
|
||||
logger: logrus.New(),
|
||||
enabled: config.Enabled,
|
||||
interval: config.Interval,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
}
|
||||
|
||||
// Initialize default collectors
|
||||
profiler.initializeDefaultCollectors()
|
||||
|
||||
return profiler
|
||||
}
|
||||
|
||||
// initializeDefaultCollectors initializes default metric collectors
|
||||
func (p *Profiler) initializeDefaultCollectors() {
|
||||
// System metrics collector
|
||||
p.RegisterCollector(NewSystemMetricsCollector(p.interval, p.logger))
|
||||
|
||||
// Runtime metrics collector
|
||||
p.RegisterCollector(NewRuntimeMetricsCollector(p.interval, p.logger))
|
||||
|
||||
// Compose metrics collector
|
||||
p.RegisterCollector(NewComposeMetricsCollector(p.interval, p.logger))
|
||||
|
||||
// Phase metrics collector
|
||||
p.RegisterCollector(NewPhaseMetricsCollector(p.interval, p.logger))
|
||||
}
|
||||
|
||||
// RegisterCollector registers a new metric collector
|
||||
func (p *Profiler) RegisterCollector(collector MetricCollector) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
p.collectors[collector.GetName()] = collector
|
||||
p.logger.Infof("Registered metric collector: %s", collector.GetName())
|
||||
}
|
||||
|
||||
// Start starts the profiler
|
||||
func (p *Profiler) Start() error {
|
||||
if !p.enabled {
|
||||
p.logger.Info("Profiler is disabled")
|
||||
return nil
|
||||
}
|
||||
|
||||
p.logger.Info("Starting performance profiler")
|
||||
|
||||
// Start metric collection
|
||||
go p.collectMetrics()
|
||||
|
||||
// Start metric aggregation
|
||||
go p.aggregateMetrics()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop stops the profiler
|
||||
func (p *Profiler) Stop() error {
|
||||
p.logger.Info("Stopping performance profiler")
|
||||
p.cancel()
|
||||
return nil
|
||||
}
|
||||
|
||||
// collectMetrics continuously collects metrics from all collectors
|
||||
func (p *Profiler) collectMetrics() {
|
||||
ticker := time.NewTicker(p.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-p.ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
p.collectAllMetrics()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// collectAllMetrics collects metrics from all registered collectors
|
||||
func (p *Profiler) collectAllMetrics() {
|
||||
p.mu.RLock()
|
||||
collectors := make([]MetricCollector, 0, len(p.collectors))
|
||||
for _, collector := range p.collectors {
|
||||
collectors = append(collectors, collector)
|
||||
}
|
||||
p.mu.RUnlock()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for _, collector := range collectors {
|
||||
wg.Add(1)
|
||||
go func(c MetricCollector) {
|
||||
defer wg.Done()
|
||||
if metric, err := c.Collect(); err == nil {
|
||||
p.storeMetric(metric)
|
||||
} else {
|
||||
p.logger.Errorf("Failed to collect metric from %s: %v", c.GetName(), err)
|
||||
}
|
||||
}(collector)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// storeMetric stores a collected metric
|
||||
func (p *Profiler) storeMetric(metric *Metric) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
// Add to history
|
||||
if metric.History == nil {
|
||||
metric.History = make([]MetricPoint, 0)
|
||||
}
|
||||
|
||||
metric.History = append(metric.History, MetricPoint{
|
||||
Value: metric.Value,
|
||||
Timestamp: metric.Timestamp,
|
||||
})
|
||||
|
||||
// Trim history if it exceeds max size
|
||||
if len(metric.History) > metric.MaxHistory {
|
||||
metric.History = metric.History[len(metric.History)-metric.MaxHistory:]
|
||||
}
|
||||
|
||||
p.metrics[metric.Name] = metric
|
||||
}
|
||||
|
||||
// GetMetric returns a metric by name
|
||||
func (p *Profiler) GetMetric(name string) (*Metric, bool) {
|
||||
p.mu.RLock()
|
||||
defer p.mu.RUnlock()
|
||||
|
||||
metric, exists := p.metrics[name]
|
||||
return metric, exists
|
||||
}
|
||||
|
||||
// GetAllMetrics returns all collected metrics
|
||||
func (p *Profiler) GetAllMetrics() map[string]*Metric {
|
||||
p.mu.RLock()
|
||||
defer p.mu.RUnlock()
|
||||
|
||||
// Create a copy to avoid race conditions
|
||||
metrics := make(map[string]*Metric)
|
||||
for k, v := range p.metrics {
|
||||
metrics[k] = v
|
||||
}
|
||||
|
||||
return metrics
|
||||
}
|
||||
|
||||
// GetMetricHistory returns the history of a metric
|
||||
func (p *Profiler) GetMetricHistory(name string, duration time.Duration) ([]MetricPoint, error) {
|
||||
metric, exists := p.GetMetric(name)
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("metric %s not found", name)
|
||||
}
|
||||
|
||||
cutoff := time.Now().Add(-duration)
|
||||
var history []MetricPoint
|
||||
|
||||
for _, point := range metric.History {
|
||||
if point.Timestamp.After(cutoff) {
|
||||
history = append(history, point)
|
||||
}
|
||||
}
|
||||
|
||||
return history, nil
|
||||
}
|
||||
|
||||
// aggregateMetrics aggregates metrics for reporting
|
||||
func (p *Profiler) aggregateMetrics() {
|
||||
ticker := time.NewTicker(p.interval * 2)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-p.ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
p.aggregateAllMetrics()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// aggregateAllMetrics aggregates all metrics
|
||||
func (p *Profiler) aggregateAllMetrics() {
|
||||
metrics := p.GetAllMetrics()
|
||||
|
||||
// Calculate aggregations
|
||||
aggregations := make(map[string]map[string]float64)
|
||||
|
||||
for name, metric := range metrics {
|
||||
if len(metric.History) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
values := make([]float64, 0, len(metric.History))
|
||||
for _, point := range metric.History {
|
||||
values = append(values, point.Value)
|
||||
}
|
||||
|
||||
aggregations[name] = map[string]float64{
|
||||
"min": p.min(values),
|
||||
"max": p.max(values),
|
||||
"avg": p.average(values),
|
||||
"median": p.median(values),
|
||||
"p95": p.percentile(values, 95),
|
||||
"p99": p.percentile(values, 99),
|
||||
}
|
||||
}
|
||||
|
||||
// Log aggregations periodically
|
||||
p.logger.WithField("aggregations", aggregations).Debug("Metric aggregations calculated")
|
||||
}
|
||||
|
||||
// Utility functions for metric calculations
|
||||
func (p *Profiler) min(values []float64) float64 {
|
||||
if len(values) == 0 {
|
||||
return 0
|
||||
}
|
||||
min := values[0]
|
||||
for _, v := range values {
|
||||
if v < min {
|
||||
min = v
|
||||
}
|
||||
}
|
||||
return min
|
||||
}
|
||||
|
||||
func (p *Profiler) max(values []float64) float64 {
|
||||
if len(values) == 0 {
|
||||
return 0
|
||||
}
|
||||
max := values[0]
|
||||
for _, v := range values {
|
||||
if v > max {
|
||||
max = v
|
||||
}
|
||||
}
|
||||
return max
|
||||
}
|
||||
|
||||
func (p *Profiler) average(values []float64) float64 {
|
||||
if len(values) == 0 {
|
||||
return 0
|
||||
}
|
||||
sum := 0.0
|
||||
for _, v := range values {
|
||||
sum += v
|
||||
}
|
||||
return sum / float64(len(values))
|
||||
}
|
||||
|
||||
func (p *Profiler) median(values []float64) float64 {
|
||||
if len(values) == 0 {
|
||||
return 0
|
||||
}
|
||||
// Simple median implementation
|
||||
// In production, you might want to use a more sophisticated algorithm
|
||||
return p.percentile(values, 50)
|
||||
}
|
||||
|
||||
func (p *Profiler) percentile(values []float64, pct int) float64 {
|
||||
if len(values) == 0 {
|
||||
return 0
|
||||
}
|
||||
// Simple percentile implementation
|
||||
// In production, you might want to use a more sophisticated algorithm
|
||||
index := int(float64(pct) / 100.0 * float64(len(values)-1))
|
||||
if index < 0 {
|
||||
index = 0
|
||||
}
|
||||
if index >= len(values) {
|
||||
index = len(values) - 1
|
||||
}
|
||||
return values[index]
|
||||
}
|
||||
|
||||
// GetPerformanceReport generates a comprehensive performance report
|
||||
func (p *Profiler) GetPerformanceReport() map[string]interface{} {
|
||||
metrics := p.GetAllMetrics()
|
||||
|
||||
report := map[string]interface{}{
|
||||
"timestamp": time.Now(),
|
||||
"metrics": metrics,
|
||||
"summary": p.generateSummary(metrics),
|
||||
"system": p.getSystemInfo(),
|
||||
}
|
||||
|
||||
return report
|
||||
}
|
||||
|
||||
// generateSummary generates a summary of all metrics
|
||||
func (p *Profiler) generateSummary(metrics map[string]*Metric) map[string]interface{} {
|
||||
summary := map[string]interface{}{
|
||||
"total_metrics": len(metrics),
|
||||
"categories": make(map[string]int),
|
||||
"alerts": make([]string, 0),
|
||||
}
|
||||
|
||||
// Categorize metrics
|
||||
for name, metric := range metrics {
|
||||
category := p.getMetricCategory(name)
|
||||
summary["categories"].(map[string]int)[category]++
|
||||
|
||||
// Check for alerts
|
||||
if alert := p.checkMetricAlert(metric); alert != "" {
|
||||
summary["alerts"].([]string) = append(summary["alerts"].([]string), alert)
|
||||
}
|
||||
}
|
||||
|
||||
return summary
|
||||
}
|
||||
|
||||
// getMetricCategory determines the category of a metric
|
||||
func (p *Profiler) getMetricCategory(name string) string {
|
||||
switch {
|
||||
case contains(name, "cpu") || contains(name, "memory") || contains(name, "disk"):
|
||||
return "system"
|
||||
case contains(name, "compose") || contains(name, "phase"):
|
||||
return "compose"
|
||||
case contains(name, "runtime") || contains(name, "goroutine"):
|
||||
return "runtime"
|
||||
default:
|
||||
return "other"
|
||||
}
|
||||
}
|
||||
|
||||
// checkMetricAlert checks if a metric should trigger an alert
|
||||
func (p *Profiler) checkMetricAlert(metric *Metric) string {
|
||||
// Example alert logic
|
||||
if metric.Name == "cpu_usage" && metric.Value > 90 {
|
||||
return fmt.Sprintf("High CPU usage: %.2f%%", metric.Value)
|
||||
}
|
||||
if metric.Name == "memory_usage" && metric.Value > 85 {
|
||||
return fmt.Sprintf("High memory usage: %.2f%%", metric.Value)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// getSystemInfo gets current system information
|
||||
func (p *Profiler) getSystemInfo() map[string]interface{} {
|
||||
info := map[string]interface{}{
|
||||
"go_version": runtime.Version(),
|
||||
"go_os": runtime.GOOS,
|
||||
"go_arch": runtime.GOARCH,
|
||||
"num_cpu": runtime.NumCPU(),
|
||||
"timestamp": time.Now(),
|
||||
}
|
||||
|
||||
// Get CPU info
|
||||
if cpuInfo, err := cpu.Info(); err == nil && len(cpuInfo) > 0 {
|
||||
info["cpu_model"] = cpuInfo[0].ModelName
|
||||
info["cpu_cores"] = cpuInfo[0].Cores
|
||||
}
|
||||
|
||||
// Get memory info
|
||||
if memInfo, err := mem.VirtualMemory(); err == nil {
|
||||
info["memory_total"] = memInfo.Total
|
||||
info["memory_available"] = memInfo.Available
|
||||
}
|
||||
|
||||
return info
|
||||
}
|
||||
|
||||
// contains checks if a string contains a substring
|
||||
func contains(s, substr string) bool {
|
||||
return len(s) >= len(substr) && (s == substr ||
|
||||
(len(s) > len(substr) && (s[:len(substr)] == substr ||
|
||||
s[len(s)-len(substr):] == substr ||
|
||||
contains(s[1:], substr))))
|
||||
}
|
||||
497
internal/performance/scaling.go
Normal file
497
internal/performance/scaling.go
Normal file
|
|
@ -0,0 +1,497 @@
|
|||
package performance
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// ScalingManager manages horizontal scaling and load balancing
|
||||
type ScalingManager struct {
|
||||
nodes map[string]*Node
|
||||
loadBalancer *LoadBalancer
|
||||
autoscaler *AutoScaler
|
||||
mu sync.RWMutex
|
||||
logger *logrus.Logger
|
||||
enabled bool
|
||||
}
|
||||
|
||||
// Node represents a compute node in the cluster
|
||||
type Node struct {
|
||||
ID string `json:"id"`
|
||||
Hostname string `json:"hostname"`
|
||||
Address string `json:"address"`
|
||||
Port int `json:"port"`
|
||||
Status NodeStatus `json:"status"`
|
||||
Capabilities map[string]interface{} `json:"capabilities"`
|
||||
Metrics *NodeMetrics `json:"metrics"`
|
||||
LastSeen time.Time `json:"last_seen"`
|
||||
Tags map[string]string `json:"tags"`
|
||||
}
|
||||
|
||||
// NodeStatus represents the status of a node
|
||||
type NodeStatus string
|
||||
|
||||
const (
|
||||
NodeStatusOnline NodeStatus = "online"
|
||||
NodeStatusOffline NodeStatus = "offline"
|
||||
NodeStatusBusy NodeStatus = "busy"
|
||||
NodeStatusError NodeStatus = "error"
|
||||
)
|
||||
|
||||
// NodeMetrics represents performance metrics for a node
|
||||
type NodeMetrics struct {
|
||||
CPUUsage float64 `json:"cpu_usage"`
|
||||
MemoryUsage float64 `json:"memory_usage"`
|
||||
DiskUsage float64 `json:"disk_usage"`
|
||||
LoadAverage float64 `json:"load_average"`
|
||||
ActiveJobs int `json:"active_jobs"`
|
||||
MaxJobs int `json:"max_jobs"`
|
||||
LastUpdate time.Time `json:"last_update"`
|
||||
}
|
||||
|
||||
// LoadBalancer manages load distribution across nodes
|
||||
type LoadBalancer struct {
|
||||
strategy LoadBalancingStrategy
|
||||
nodes map[string]*Node
|
||||
mu sync.RWMutex
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// LoadBalancingStrategy defines the interface for load balancing strategies
|
||||
type LoadBalancingStrategy interface {
|
||||
SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error)
|
||||
GetName() string
|
||||
}
|
||||
|
||||
// LoadRequest represents a load balancing request
|
||||
type LoadRequest struct {
|
||||
Type string `json:"type"`
|
||||
Priority int `json:"priority"`
|
||||
Requirements map[string]interface{} `json:"requirements"`
|
||||
Metadata map[string]interface{} `json:"metadata"`
|
||||
}
|
||||
|
||||
// AutoScaler manages automatic scaling of the cluster
|
||||
type AutoScaler struct {
|
||||
config *AutoScalerConfig
|
||||
nodes map[string]*Node
|
||||
mu sync.RWMutex
|
||||
logger *logrus.Logger
|
||||
enabled bool
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
// AutoScalerConfig represents auto-scaling configuration
|
||||
type AutoScalerConfig struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
MinNodes int `yaml:"min_nodes"`
|
||||
MaxNodes int `yaml:"max_nodes"`
|
||||
ScaleUpThreshold float64 `yaml:"scale_up_threshold"`
|
||||
ScaleDownThreshold float64 `yaml:"scale_down_threshold"`
|
||||
ScaleUpCooldown time.Duration `yaml:"scale_up_cooldown"`
|
||||
ScaleDownCooldown time.Duration `yaml:"scale_down_cooldown"`
|
||||
CheckInterval time.Duration `yaml:"check_interval"`
|
||||
}
|
||||
|
||||
// NewScalingManager creates a new scaling manager
|
||||
func NewScalingManager(enabled bool) *ScalingManager {
|
||||
sm := &ScalingManager{
|
||||
nodes: make(map[string]*Node),
|
||||
logger: logrus.New(),
|
||||
enabled: enabled,
|
||||
}
|
||||
|
||||
// Initialize load balancer
|
||||
sm.loadBalancer = NewLoadBalancer(sm.logger)
|
||||
|
||||
// Initialize auto-scaler
|
||||
sm.autoscaler = NewAutoScaler(&AutoScalerConfig{
|
||||
Enabled: true,
|
||||
MinNodes: 2,
|
||||
MaxNodes: 10,
|
||||
ScaleUpThreshold: 80.0,
|
||||
ScaleDownThreshold: 20.0,
|
||||
ScaleUpCooldown: 5 * time.Minute,
|
||||
ScaleDownCooldown: 10 * time.Minute,
|
||||
CheckInterval: 30 * time.Second,
|
||||
}, sm.logger)
|
||||
|
||||
return sm
|
||||
}
|
||||
|
||||
// RegisterNode registers a new node in the cluster
|
||||
func (sm *ScalingManager) RegisterNode(node *Node) error {
|
||||
sm.mu.Lock()
|
||||
defer sm.mu.Unlock()
|
||||
|
||||
// Validate node
|
||||
if node.ID == "" {
|
||||
return fmt.Errorf("node ID is required")
|
||||
}
|
||||
if node.Address == "" {
|
||||
return fmt.Errorf("node address is required")
|
||||
}
|
||||
|
||||
// Check for duplicate
|
||||
if _, exists := sm.nodes[node.ID]; exists {
|
||||
return fmt.Errorf("node %s already exists", node.ID)
|
||||
}
|
||||
|
||||
// Set default values
|
||||
if node.Status == "" {
|
||||
node.Status = NodeStatusOnline
|
||||
}
|
||||
if node.Capabilities == nil {
|
||||
node.Capabilities = make(map[string]interface{})
|
||||
}
|
||||
if node.Tags == nil {
|
||||
node.Tags = make(map[string]string)
|
||||
}
|
||||
if node.Metrics == nil {
|
||||
node.Metrics = &NodeMetrics{
|
||||
LastUpdate: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
node.LastSeen = time.Now()
|
||||
sm.nodes[node.ID] = node
|
||||
|
||||
// Update load balancer
|
||||
sm.loadBalancer.AddNode(node)
|
||||
|
||||
sm.logger.Infof("Registered node: %s (%s)", node.ID, node.Hostname)
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnregisterNode removes a node from the cluster
|
||||
func (sm *ScalingManager) UnregisterNode(nodeID string) error {
|
||||
sm.mu.Lock()
|
||||
defer sm.mu.Unlock()
|
||||
|
||||
node, exists := sm.nodes[nodeID]
|
||||
if !exists {
|
||||
return fmt.Errorf("node %s not found", nodeID)
|
||||
}
|
||||
|
||||
delete(sm.nodes, nodeID)
|
||||
sm.loadBalancer.RemoveNode(nodeID)
|
||||
|
||||
sm.logger.Infof("Unregistered node: %s (%s)", node.ID, node.Hostname)
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateNodeMetrics updates metrics for a specific node
|
||||
func (sm *ScalingManager) UpdateNodeMetrics(nodeID string, metrics *NodeMetrics) error {
|
||||
sm.mu.Lock()
|
||||
defer sm.mu.Unlock()
|
||||
|
||||
node, exists := sm.nodes[nodeID]
|
||||
if !exists {
|
||||
return fmt.Errorf("node %s not found", nodeID)
|
||||
}
|
||||
|
||||
metrics.LastUpdate = time.Now()
|
||||
node.Metrics = metrics
|
||||
node.LastSeen = time.Now()
|
||||
|
||||
// Update load balancer
|
||||
sm.loadBalancer.UpdateNode(node)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetNode returns a node by ID
|
||||
func (sm *ScalingManager) GetNode(nodeID string) (*Node, bool) {
|
||||
sm.mu.RLock()
|
||||
defer sm.mu.RUnlock()
|
||||
|
||||
node, exists := sm.nodes[nodeID]
|
||||
return node, exists
|
||||
}
|
||||
|
||||
// GetAllNodes returns all registered nodes
|
||||
func (sm *ScalingManager) GetAllNodes() map[string]*Node {
|
||||
sm.mu.RLock()
|
||||
defer sm.mu.RUnlock()
|
||||
|
||||
// Create a copy to avoid race conditions
|
||||
nodes := make(map[string]*Node)
|
||||
for k, v := range sm.nodes {
|
||||
nodes[k] = v
|
||||
}
|
||||
|
||||
return nodes
|
||||
}
|
||||
|
||||
// GetAvailableNodes returns all available nodes
|
||||
func (sm *ScalingManager) GetAvailableNodes() []*Node {
|
||||
sm.mu.RLock()
|
||||
defer sm.mu.RUnlock()
|
||||
|
||||
var available []*Node
|
||||
for _, node := range sm.nodes {
|
||||
if node.Status == NodeStatusOnline && node.Metrics.ActiveJobs < node.Metrics.MaxJobs {
|
||||
available = append(available, node)
|
||||
}
|
||||
}
|
||||
|
||||
return available
|
||||
}
|
||||
|
||||
// SelectNode selects a node for a specific request
|
||||
func (sm *ScalingManager) SelectNode(request *LoadRequest) (*Node, error) {
|
||||
return sm.loadBalancer.SelectNode(request)
|
||||
}
|
||||
|
||||
// Start starts the scaling manager
|
||||
func (sm *ScalingManager) Start() error {
|
||||
if !sm.enabled {
|
||||
sm.logger.Info("Scaling manager is disabled")
|
||||
return nil
|
||||
}
|
||||
|
||||
sm.logger.Info("Starting scaling manager")
|
||||
|
||||
// Start auto-scaler
|
||||
if err := sm.autoscaler.Start(); err != nil {
|
||||
return fmt.Errorf("failed to start auto-scaler: %w", err)
|
||||
}
|
||||
|
||||
// Start node health monitoring
|
||||
go sm.monitorNodeHealth()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop stops the scaling manager
|
||||
func (sm *ScalingManager) Stop() error {
|
||||
sm.logger.Info("Stopping scaling manager")
|
||||
|
||||
// Stop auto-scaler
|
||||
if err := sm.autoscaler.Stop(); err != nil {
|
||||
return fmt.Errorf("failed to stop auto-scaler: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// monitorNodeHealth monitors the health of all nodes
|
||||
func (sm *ScalingManager) monitorNodeHealth() {
|
||||
ticker := time.NewTicker(30 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
sm.checkNodeHealth()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// checkNodeHealth checks the health of all nodes
|
||||
func (sm *ScalingManager) checkNodeHealth() {
|
||||
nodes := sm.GetAllNodes()
|
||||
|
||||
for _, node := range nodes {
|
||||
// Check if node is responsive
|
||||
if time.Since(node.LastSeen) > 2*time.Minute {
|
||||
sm.logger.Warnf("Node %s appears to be unresponsive", node.ID)
|
||||
sm.markNodeOffline(node.ID)
|
||||
}
|
||||
|
||||
// Check metrics freshness
|
||||
if node.Metrics != nil && time.Since(node.Metrics.LastUpdate) > 5*time.Minute {
|
||||
sm.logger.Warnf("Node %s metrics are stale", node.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// markNodeOffline marks a node as offline
|
||||
func (sm *ScalingManager) markNodeOffline(nodeID string) {
|
||||
sm.mu.Lock()
|
||||
defer sm.mu.Unlock()
|
||||
|
||||
if node, exists := sm.nodes[nodeID]; exists {
|
||||
node.Status = NodeStatusOffline
|
||||
sm.logger.Infof("Marked node %s as offline", nodeID)
|
||||
}
|
||||
}
|
||||
|
||||
// GetClusterStatus returns the current status of the cluster
|
||||
func (sm *ScalingManager) GetClusterStatus() map[string]interface{} {
|
||||
nodes := sm.GetAllNodes()
|
||||
|
||||
status := map[string]interface{}{
|
||||
"total_nodes": len(nodes),
|
||||
"online_nodes": 0,
|
||||
"offline_nodes": 0,
|
||||
"busy_nodes": 0,
|
||||
"error_nodes": 0,
|
||||
"total_capacity": 0,
|
||||
"used_capacity": 0,
|
||||
"timestamp": time.Now(),
|
||||
}
|
||||
|
||||
for _, node := range nodes {
|
||||
switch node.Status {
|
||||
case NodeStatusOnline:
|
||||
status["online_nodes"] = status["online_nodes"].(int) + 1
|
||||
case NodeStatusOffline:
|
||||
status["offline_nodes"] = status["offline_nodes"].(int) + 1
|
||||
case NodeStatusBusy:
|
||||
status["busy_nodes"] = status["busy_nodes"].(int) + 1
|
||||
case NodeStatusError:
|
||||
status["error_nodes"] = status["error_nodes"].(int) + 1
|
||||
}
|
||||
|
||||
if node.Metrics != nil {
|
||||
status["total_capacity"] = status["total_capacity"].(int) + node.Metrics.MaxJobs
|
||||
status["used_capacity"] = status["used_capacity"].(int) + node.Metrics.ActiveJobs
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate utilization percentage
|
||||
if status["total_capacity"].(int) > 0 {
|
||||
utilization := float64(status["used_capacity"].(int)) / float64(status["total_capacity"].(int)) * 100
|
||||
status["utilization_percentage"] = utilization
|
||||
}
|
||||
|
||||
return status
|
||||
}
|
||||
|
||||
// NewLoadBalancer creates a new load balancer
|
||||
func NewLoadBalancer(logger *logrus.Logger) *LoadBalancer {
|
||||
lb := &LoadBalancer{
|
||||
strategy: NewRoundRobinStrategy(),
|
||||
nodes: make(map[string]*Node),
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
return lb
|
||||
}
|
||||
|
||||
// AddNode adds a node to the load balancer
|
||||
func (lb *LoadBalancer) AddNode(node *Node) {
|
||||
lb.mu.Lock()
|
||||
defer lb.mu.Unlock()
|
||||
|
||||
lb.nodes[node.ID] = node
|
||||
lb.logger.Debugf("Added node %s to load balancer", node.ID)
|
||||
}
|
||||
|
||||
// RemoveNode removes a node from the load balancer
|
||||
func (lb *LoadBalancer) RemoveNode(nodeID string) {
|
||||
lb.mu.Lock()
|
||||
defer lb.mu.Unlock()
|
||||
|
||||
delete(lb.nodes, nodeID)
|
||||
lb.logger.Debugf("Removed node %s from load balancer", nodeID)
|
||||
}
|
||||
|
||||
// UpdateNode updates a node in the load balancer
|
||||
func (lb *LoadBalancer) UpdateNode(node *Node) {
|
||||
lb.mu.Lock()
|
||||
defer lb.mu.Unlock()
|
||||
|
||||
lb.nodes[node.ID] = node
|
||||
}
|
||||
|
||||
// SelectNode selects a node using the configured strategy
|
||||
func (lb *LoadBalancer) SelectNode(request *LoadRequest) (*Node, error) {
|
||||
lb.mu.RLock()
|
||||
defer lb.mu.RUnlock()
|
||||
|
||||
// Filter available nodes
|
||||
availableNodes := make(map[string]*Node)
|
||||
for id, node := range lb.nodes {
|
||||
if node.Status == NodeStatusOnline && node.Metrics.ActiveJobs < node.Metrics.MaxJobs {
|
||||
availableNodes[id] = node
|
||||
}
|
||||
}
|
||||
|
||||
if len(availableNodes) == 0 {
|
||||
return nil, fmt.Errorf("no available nodes")
|
||||
}
|
||||
|
||||
return lb.strategy.SelectNode(availableNodes, request)
|
||||
}
|
||||
|
||||
// SetStrategy sets the load balancing strategy
|
||||
func (lb *LoadBalancer) SetStrategy(strategy LoadBalancingStrategy) {
|
||||
lb.mu.Lock()
|
||||
defer lb.mu.Unlock()
|
||||
|
||||
lb.strategy = strategy
|
||||
lb.logger.Infof("Load balancing strategy changed to: %s", strategy.GetName())
|
||||
}
|
||||
|
||||
// NewAutoScaler creates a new auto-scaler
|
||||
func NewAutoScaler(config *AutoScalerConfig, logger *logrus.Logger) *AutoScaler {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
as := &AutoScaler{
|
||||
config: config,
|
||||
nodes: make(map[string]*Node),
|
||||
logger: logger,
|
||||
enabled: config.Enabled,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
}
|
||||
|
||||
return as
|
||||
}
|
||||
|
||||
// Start starts the auto-scaler
|
||||
func (as *AutoScaler) Start() error {
|
||||
if !as.enabled {
|
||||
as.logger.Info("Auto-scaler is disabled")
|
||||
return nil
|
||||
}
|
||||
|
||||
as.logger.Info("Starting auto-scaler")
|
||||
|
||||
// Start scaling checks
|
||||
go as.runScalingChecks()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop stops the auto-scaler
|
||||
func (as *AutoScaler) Stop() error {
|
||||
as.logger.Info("Stopping auto-scaler")
|
||||
as.cancel()
|
||||
return nil
|
||||
}
|
||||
|
||||
// runScalingChecks runs periodic scaling checks
|
||||
func (as *AutoScaler) runScalingChecks() {
|
||||
ticker := time.NewTicker(as.config.CheckInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-as.ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
as.checkScaling()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// checkScaling checks if scaling is needed
|
||||
func (as *AutoScaler) checkScaling() {
|
||||
// Get current cluster status
|
||||
// This would typically come from the scaling manager
|
||||
// For now, we'll use placeholder logic
|
||||
|
||||
as.logger.Debug("Running scaling check")
|
||||
|
||||
// Check if we need to scale up
|
||||
// Check if we need to scale down
|
||||
// Implement scaling logic based on metrics
|
||||
}
|
||||
458
internal/performance/strategies.go
Normal file
458
internal/performance/strategies.go
Normal file
|
|
@ -0,0 +1,458 @@
|
|||
package performance
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// RoundRobinStrategy implements round-robin load balancing
|
||||
type RoundRobinStrategy struct {
|
||||
currentIndex int
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// NewRoundRobinStrategy creates a new round-robin strategy
|
||||
func NewRoundRobinStrategy() *RoundRobinStrategy {
|
||||
return &RoundRobinStrategy{
|
||||
currentIndex: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// SelectNode selects a node using round-robin algorithm
|
||||
func (rr *RoundRobinStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
|
||||
if len(nodes) == 0 {
|
||||
return nil, fmt.Errorf("no nodes available")
|
||||
}
|
||||
|
||||
rr.mu.Lock()
|
||||
defer rr.mu.Unlock()
|
||||
|
||||
// Convert map to slice for indexing
|
||||
nodeSlice := make([]*Node, 0, len(nodes))
|
||||
for _, node := range nodes {
|
||||
nodeSlice = append(nodeSlice, node)
|
||||
}
|
||||
|
||||
// Sort by ID for consistent ordering
|
||||
sort.Slice(nodeSlice, func(i, j int) bool {
|
||||
return nodeSlice[i].ID < nodeSlice[j].ID
|
||||
})
|
||||
|
||||
// Select next node in round-robin fashion
|
||||
selectedNode := nodeSlice[rr.currentIndex%len(nodeSlice)]
|
||||
rr.currentIndex++
|
||||
|
||||
return selectedNode, nil
|
||||
}
|
||||
|
||||
// GetName returns the strategy name
|
||||
func (rr *RoundRobinStrategy) GetName() string {
|
||||
return "round_robin"
|
||||
}
|
||||
|
||||
// LeastConnectionsStrategy implements least connections load balancing
|
||||
type LeastConnectionsStrategy struct{}
|
||||
|
||||
// NewLeastConnectionsStrategy creates a new least connections strategy
|
||||
func NewLeastConnectionsStrategy() *LeastConnectionsStrategy {
|
||||
return &LeastConnectionsStrategy{}
|
||||
}
|
||||
|
||||
// SelectNode selects a node with the least active connections
|
||||
func (lc *LeastConnectionsStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
|
||||
if len(nodes) == 0 {
|
||||
return nil, fmt.Errorf("no nodes available")
|
||||
}
|
||||
|
||||
var selectedNode *Node
|
||||
minConnections := int(^uint(0) >> 1) // Max int
|
||||
|
||||
for _, node := range nodes {
|
||||
if node.Metrics == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
activeJobs := node.Metrics.ActiveJobs
|
||||
if activeJobs < minConnections {
|
||||
minConnections = activeJobs
|
||||
selectedNode = node
|
||||
}
|
||||
}
|
||||
|
||||
if selectedNode == nil {
|
||||
return nil, fmt.Errorf("no suitable node found")
|
||||
}
|
||||
|
||||
return selectedNode, nil
|
||||
}
|
||||
|
||||
// GetName returns the strategy name
|
||||
func (lc *LeastConnectionsStrategy) GetName() string {
|
||||
return "least_connections"
|
||||
}
|
||||
|
||||
// WeightedRoundRobinStrategy implements weighted round-robin load balancing
|
||||
type WeightedRoundRobinStrategy struct {
|
||||
currentIndex int
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// NewWeightedRoundRobinStrategy creates a new weighted round-robin strategy
|
||||
func NewWeightedRoundRobinStrategy() *WeightedRoundRobinStrategy {
|
||||
return &WeightedRoundRobinStrategy{
|
||||
currentIndex: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// SelectNode selects a node using weighted round-robin algorithm
|
||||
func (wr *WeightedRoundRobinStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
|
||||
if len(nodes) == 0 {
|
||||
return nil, fmt.Errorf("no nodes available")
|
||||
}
|
||||
|
||||
wr.mu.Lock()
|
||||
defer wr.mu.Unlock()
|
||||
|
||||
// Convert map to slice and calculate weights
|
||||
type weightedNode struct {
|
||||
node *Node
|
||||
weight int
|
||||
}
|
||||
|
||||
var weightedNodes []weightedNode
|
||||
for _, node := range nodes {
|
||||
weight := 1 // Default weight
|
||||
|
||||
// Calculate weight based on node capabilities and current load
|
||||
if node.Metrics != nil {
|
||||
// Higher weight for nodes with more capacity
|
||||
availableCapacity := node.Metrics.MaxJobs - node.Metrics.ActiveJobs
|
||||
if availableCapacity > 0 {
|
||||
weight = availableCapacity
|
||||
}
|
||||
}
|
||||
|
||||
// Apply tags-based weight adjustments
|
||||
if node.Tags != nil {
|
||||
if priority, ok := node.Tags["priority"]; ok {
|
||||
switch priority {
|
||||
case "high":
|
||||
weight *= 2
|
||||
case "low":
|
||||
weight /= 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
weightedNodes = append(weightedNodes, weightedNode{node: node, weight: weight})
|
||||
}
|
||||
|
||||
// Sort by weight (descending)
|
||||
sort.Slice(weightedNodes, func(i, j int) bool {
|
||||
return weightedNodes[i].weight > weightedNodes[j].weight
|
||||
})
|
||||
|
||||
// Select next node in weighted round-robin fashion
|
||||
selectedNode := weightedNodes[wr.currentIndex%len(weightedNodes)].node
|
||||
wr.currentIndex++
|
||||
|
||||
return selectedNode, nil
|
||||
}
|
||||
|
||||
// GetName returns the strategy name
|
||||
func (wr *WeightedRoundRobinStrategy) GetName() string {
|
||||
return "weighted_round_robin"
|
||||
}
|
||||
|
||||
// RandomStrategy implements random load balancing
|
||||
type RandomStrategy struct {
|
||||
rand *rand.Rand
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// NewRandomStrategy creates a new random strategy
|
||||
func NewRandomStrategy() *RandomStrategy {
|
||||
return &RandomStrategy{
|
||||
rand: rand.New(rand.NewSource(time.Now().UnixNano())),
|
||||
}
|
||||
}
|
||||
|
||||
// SelectNode selects a random node
|
||||
func (r *RandomStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
|
||||
if len(nodes) == 0 {
|
||||
return nil, fmt.Errorf("no nodes available")
|
||||
}
|
||||
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
|
||||
// Convert map to slice
|
||||
nodeSlice := make([]*Node, 0, len(nodes))
|
||||
for _, node := range nodes {
|
||||
nodeSlice = append(nodeSlice, node)
|
||||
}
|
||||
|
||||
// Select random node
|
||||
randomIndex := r.rand.Intn(len(nodeSlice))
|
||||
return nodeSlice[randomIndex], nil
|
||||
}
|
||||
|
||||
// GetName returns the strategy name
|
||||
func (r *RandomStrategy) GetName() string {
|
||||
return "random"
|
||||
}
|
||||
|
||||
// LeastResponseTimeStrategy implements least response time load balancing
|
||||
type LeastResponseTimeStrategy struct{}
|
||||
|
||||
// NewLeastResponseTimeStrategy creates a new least response time strategy
|
||||
func NewLeastResponseTimeStrategy() *LeastResponseTimeStrategy {
|
||||
return &LeastResponseTimeStrategy{}
|
||||
}
|
||||
|
||||
// SelectNode selects a node with the least response time
|
||||
func (lrt *LeastResponseTimeStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
|
||||
if len(nodes) == 0 {
|
||||
return nil, fmt.Errorf("no nodes available")
|
||||
}
|
||||
|
||||
var selectedNode *Node
|
||||
minResponseTime := float64(^uint(0) >> 1) // Max float64
|
||||
|
||||
for _, node := range nodes {
|
||||
if node.Metrics == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Use load average as a proxy for response time
|
||||
// In a real implementation, you'd have actual response time metrics
|
||||
responseTime := node.Metrics.LoadAverage
|
||||
if responseTime < minResponseTime {
|
||||
minResponseTime = responseTime
|
||||
selectedNode = node
|
||||
}
|
||||
}
|
||||
|
||||
if selectedNode == nil {
|
||||
return nil, fmt.Errorf("no suitable node found")
|
||||
}
|
||||
|
||||
return selectedNode, nil
|
||||
}
|
||||
|
||||
// GetName returns the strategy name
|
||||
func (lrt *LeastResponseTimeStrategy) GetName() string {
|
||||
return "least_response_time"
|
||||
}
|
||||
|
||||
// IPHashStrategy implements IP hash load balancing
|
||||
type IPHashStrategy struct{}
|
||||
|
||||
// NewIPHashStrategy creates a new IP hash strategy
|
||||
func NewIPHashStrategy() *IPHashStrategy {
|
||||
return &IPHashStrategy{}
|
||||
}
|
||||
|
||||
// SelectNode selects a node using IP hash algorithm
|
||||
func (ih *IPHashStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
|
||||
if len(nodes) == 0 {
|
||||
return nil, fmt.Errorf("no nodes available")
|
||||
}
|
||||
|
||||
// Extract client IP from request metadata
|
||||
clientIP := "127.0.0.1" // Default IP
|
||||
if request.Metadata != nil {
|
||||
if ip, ok := request.Metadata["client_ip"].(string); ok {
|
||||
clientIP = ip
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate hash of client IP
|
||||
hash := hashString(clientIP)
|
||||
|
||||
// Convert map to slice for indexing
|
||||
nodeSlice := make([]*Node, 0, len(nodes))
|
||||
for _, node := range nodes {
|
||||
nodeSlice = append(nodeSlice, node)
|
||||
}
|
||||
|
||||
// Sort by ID for consistent ordering
|
||||
sort.Slice(nodeSlice, func(i, j int) bool {
|
||||
return nodeSlice[i].ID < nodeSlice[j].ID
|
||||
})
|
||||
|
||||
// Select node based on hash
|
||||
selectedIndex := hash % uint32(len(nodeSlice))
|
||||
return nodeSlice[selectedIndex], nil
|
||||
}
|
||||
|
||||
// GetName returns the strategy name
|
||||
func (ih *IPHashStrategy) GetName() string {
|
||||
return "ip_hash"
|
||||
}
|
||||
|
||||
// hashString calculates a simple hash of a string
|
||||
func hashString(s string) uint32 {
|
||||
var hash uint32
|
||||
for _, char := range s {
|
||||
hash = ((hash << 5) + hash) + uint32(char)
|
||||
}
|
||||
return hash
|
||||
}
|
||||
|
||||
// AdaptiveStrategy implements adaptive load balancing based on multiple factors
|
||||
type AdaptiveStrategy struct {
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
// NewAdaptiveStrategy creates a new adaptive strategy
|
||||
func NewAdaptiveStrategy() *AdaptiveStrategy {
|
||||
return &AdaptiveStrategy{}
|
||||
}
|
||||
|
||||
// SelectNode selects a node using adaptive algorithm
|
||||
func (a *AdaptiveStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
|
||||
if len(nodes) == 0 {
|
||||
return nil, fmt.Errorf("no nodes available")
|
||||
}
|
||||
|
||||
a.mu.Lock()
|
||||
defer a.mu.Unlock()
|
||||
|
||||
// Score each node based on multiple factors
|
||||
type scoredNode struct {
|
||||
node *Node
|
||||
score float64
|
||||
}
|
||||
|
||||
var scoredNodes []scoredNode
|
||||
|
||||
for _, node := range nodes {
|
||||
if node.Metrics == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
score := a.calculateNodeScore(node, request)
|
||||
scoredNodes = append(scoredNodes, scoredNode{node: node, score: score})
|
||||
}
|
||||
|
||||
if len(scoredNodes) == 0 {
|
||||
return nil, fmt.Errorf("no suitable node found")
|
||||
}
|
||||
|
||||
// Sort by score (descending)
|
||||
sort.Slice(scoredNodes, func(i, j int) bool {
|
||||
return scoredNodes[i].score > scoredNodes[j].score
|
||||
})
|
||||
|
||||
// Return the highest scoring node
|
||||
return scoredNodes[0].node, nil
|
||||
}
|
||||
|
||||
// calculateNodeScore calculates a score for a node based on multiple factors
|
||||
func (a *AdaptiveStrategy) calculateNodeScore(node *Node, request *LoadRequest) float64 {
|
||||
score := 100.0
|
||||
|
||||
if node.Metrics == nil {
|
||||
return score
|
||||
}
|
||||
|
||||
// Factor 1: Available capacity (higher is better)
|
||||
availableCapacity := float64(node.Metrics.MaxJobs - node.Metrics.ActiveJobs)
|
||||
if node.Metrics.MaxJobs > 0 {
|
||||
capacityRatio := availableCapacity / float64(node.Metrics.MaxJobs)
|
||||
score += capacityRatio * 50 // Up to 50 points for capacity
|
||||
}
|
||||
|
||||
// Factor 2: System load (lower is better)
|
||||
if node.Metrics.LoadAverage > 0 {
|
||||
loadScore := 100.0 - (node.Metrics.LoadAverage * 10)
|
||||
if loadScore < 0 {
|
||||
loadScore = 0
|
||||
}
|
||||
score += loadScore * 0.3 // Up to 30 points for load
|
||||
}
|
||||
|
||||
// Factor 3: Resource usage (lower is better)
|
||||
cpuScore := 100.0 - node.Metrics.CPUUsage
|
||||
memoryScore := 100.0 - node.Metrics.MemoryUsage
|
||||
|
||||
score += cpuScore * 0.1 // Up to 10 points for CPU
|
||||
score += memoryScore * 0.1 // Up to 10 points for memory
|
||||
|
||||
// Factor 4: Priority-based adjustments
|
||||
if node.Tags != nil {
|
||||
if priority, ok := node.Tags["priority"]; ok {
|
||||
switch priority {
|
||||
case "high":
|
||||
score += 20
|
||||
case "low":
|
||||
score -= 20
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Factor 5: Request-specific requirements
|
||||
if request.Requirements != nil {
|
||||
if arch, ok := request.Requirements["architecture"].(string); ok {
|
||||
if nodeArch, ok := node.Capabilities["architecture"].(string); ok {
|
||||
if arch == nodeArch {
|
||||
score += 25 // Bonus for architecture match
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return score
|
||||
}
|
||||
|
||||
// GetName returns the strategy name
|
||||
func (a *AdaptiveStrategy) GetName() string {
|
||||
return "adaptive"
|
||||
}
|
||||
|
||||
// StrategyFactory creates load balancing strategies
|
||||
type StrategyFactory struct{}
|
||||
|
||||
// NewStrategyFactory creates a new strategy factory
|
||||
func NewStrategyFactory() *StrategyFactory {
|
||||
return &StrategyFactory{}
|
||||
}
|
||||
|
||||
// CreateStrategy creates a strategy by name
|
||||
func (sf *StrategyFactory) CreateStrategy(name string) (LoadBalancingStrategy, error) {
|
||||
switch name {
|
||||
case "round_robin":
|
||||
return NewRoundRobinStrategy(), nil
|
||||
case "least_connections":
|
||||
return NewLeastConnectionsStrategy(), nil
|
||||
case "weighted_round_robin":
|
||||
return NewWeightedRoundRobinStrategy(), nil
|
||||
case "random":
|
||||
return NewRandomStrategy(), nil
|
||||
case "least_response_time":
|
||||
return NewLeastResponseTimeStrategy(), nil
|
||||
case "ip_hash":
|
||||
return NewIPHashStrategy(), nil
|
||||
case "adaptive":
|
||||
return NewAdaptiveStrategy(), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown strategy: %s", name)
|
||||
}
|
||||
}
|
||||
|
||||
// GetAvailableStrategies returns all available strategy names
|
||||
func (sf *StrategyFactory) GetAvailableStrategies() []string {
|
||||
return []string{
|
||||
"round_robin",
|
||||
"least_connections",
|
||||
"weighted_round_robin",
|
||||
"random",
|
||||
"least_response_time",
|
||||
"ip_hash",
|
||||
"adaptive",
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue