first commit

This commit is contained in:
robojerk 2025-08-18 23:32:51 -07:00
commit 57bb8aafbe
27 changed files with 8538 additions and 0 deletions

View file

@ -0,0 +1,446 @@
package performance
import (
"context"
"fmt"
"runtime"
"sync"
"time"
"github.com/sirupsen/logrus"
"github.com/shirou/gopsutil/v3/cpu"
"github.com/shirou/gopsutil/v3/mem"
"github.com/shirou/gopsutil/v3/disk"
)
// Profiler manages performance profiling and metrics collection
type Profiler struct {
metrics map[string]*Metric
collectors map[string]MetricCollector
mu sync.RWMutex
logger *logrus.Logger
enabled bool
interval time.Duration
ctx context.Context
cancel context.CancelFunc
}
// Metric represents a performance metric
type Metric struct {
Name string `json:"name"`
Value float64 `json:"value"`
Unit string `json:"unit"`
Timestamp time.Time `json:"timestamp"`
Tags map[string]string `json:"tags"`
Metadata map[string]interface{} `json:"metadata"`
History []MetricPoint `json:"history,omitempty"`
MaxHistory int `json:"max_history"`
}
// MetricPoint represents a single metric measurement
type MetricPoint struct {
Value float64 `json:"value"`
Timestamp time.Time `json:"timestamp"`
}
// MetricCollector defines the interface for collecting metrics
type MetricCollector interface {
Collect() (*Metric, error)
GetName() string
GetInterval() time.Duration
}
// ProfilerConfig represents profiler configuration
type ProfilerConfig struct {
Enabled bool `yaml:"enabled"`
Interval time.Duration `yaml:"interval"`
MaxHistory int `yaml:"max_history"`
Metrics []string `yaml:"metrics"`
Exporters []string `yaml:"exporters"`
Custom map[string]interface{} `yaml:"custom"`
}
// NewProfiler creates a new performance profiler
func NewProfiler(config *ProfilerConfig) *Profiler {
if config.Interval == 0 {
config.Interval = 30 * time.Second
}
if config.MaxHistory == 0 {
config.MaxHistory = 1000
}
ctx, cancel := context.WithCancel(context.Background())
profiler := &Profiler{
metrics: make(map[string]*Metric),
collectors: make(map[string]MetricCollector),
logger: logrus.New(),
enabled: config.Enabled,
interval: config.Interval,
ctx: ctx,
cancel: cancel,
}
// Initialize default collectors
profiler.initializeDefaultCollectors()
return profiler
}
// initializeDefaultCollectors initializes default metric collectors
func (p *Profiler) initializeDefaultCollectors() {
// System metrics collector
p.RegisterCollector(NewSystemMetricsCollector(p.interval, p.logger))
// Runtime metrics collector
p.RegisterCollector(NewRuntimeMetricsCollector(p.interval, p.logger))
// Compose metrics collector
p.RegisterCollector(NewComposeMetricsCollector(p.interval, p.logger))
// Phase metrics collector
p.RegisterCollector(NewPhaseMetricsCollector(p.interval, p.logger))
}
// RegisterCollector registers a new metric collector
func (p *Profiler) RegisterCollector(collector MetricCollector) {
p.mu.Lock()
defer p.mu.Unlock()
p.collectors[collector.GetName()] = collector
p.logger.Infof("Registered metric collector: %s", collector.GetName())
}
// Start starts the profiler
func (p *Profiler) Start() error {
if !p.enabled {
p.logger.Info("Profiler is disabled")
return nil
}
p.logger.Info("Starting performance profiler")
// Start metric collection
go p.collectMetrics()
// Start metric aggregation
go p.aggregateMetrics()
return nil
}
// Stop stops the profiler
func (p *Profiler) Stop() error {
p.logger.Info("Stopping performance profiler")
p.cancel()
return nil
}
// collectMetrics continuously collects metrics from all collectors
func (p *Profiler) collectMetrics() {
ticker := time.NewTicker(p.interval)
defer ticker.Stop()
for {
select {
case <-p.ctx.Done():
return
case <-ticker.C:
p.collectAllMetrics()
}
}
}
// collectAllMetrics collects metrics from all registered collectors
func (p *Profiler) collectAllMetrics() {
p.mu.RLock()
collectors := make([]MetricCollector, 0, len(p.collectors))
for _, collector := range p.collectors {
collectors = append(collectors, collector)
}
p.mu.RUnlock()
var wg sync.WaitGroup
for _, collector := range collectors {
wg.Add(1)
go func(c MetricCollector) {
defer wg.Done()
if metric, err := c.Collect(); err == nil {
p.storeMetric(metric)
} else {
p.logger.Errorf("Failed to collect metric from %s: %v", c.GetName(), err)
}
}(collector)
}
wg.Wait()
}
// storeMetric stores a collected metric
func (p *Profiler) storeMetric(metric *Metric) {
p.mu.Lock()
defer p.mu.Unlock()
// Add to history
if metric.History == nil {
metric.History = make([]MetricPoint, 0)
}
metric.History = append(metric.History, MetricPoint{
Value: metric.Value,
Timestamp: metric.Timestamp,
})
// Trim history if it exceeds max size
if len(metric.History) > metric.MaxHistory {
metric.History = metric.History[len(metric.History)-metric.MaxHistory:]
}
p.metrics[metric.Name] = metric
}
// GetMetric returns a metric by name
func (p *Profiler) GetMetric(name string) (*Metric, bool) {
p.mu.RLock()
defer p.mu.RUnlock()
metric, exists := p.metrics[name]
return metric, exists
}
// GetAllMetrics returns all collected metrics
func (p *Profiler) GetAllMetrics() map[string]*Metric {
p.mu.RLock()
defer p.mu.RUnlock()
// Create a copy to avoid race conditions
metrics := make(map[string]*Metric)
for k, v := range p.metrics {
metrics[k] = v
}
return metrics
}
// GetMetricHistory returns the history of a metric
func (p *Profiler) GetMetricHistory(name string, duration time.Duration) ([]MetricPoint, error) {
metric, exists := p.GetMetric(name)
if !exists {
return nil, fmt.Errorf("metric %s not found", name)
}
cutoff := time.Now().Add(-duration)
var history []MetricPoint
for _, point := range metric.History {
if point.Timestamp.After(cutoff) {
history = append(history, point)
}
}
return history, nil
}
// aggregateMetrics aggregates metrics for reporting
func (p *Profiler) aggregateMetrics() {
ticker := time.NewTicker(p.interval * 2)
defer ticker.Stop()
for {
select {
case <-p.ctx.Done():
return
case <-ticker.C:
p.aggregateAllMetrics()
}
}
}
// aggregateAllMetrics aggregates all metrics
func (p *Profiler) aggregateAllMetrics() {
metrics := p.GetAllMetrics()
// Calculate aggregations
aggregations := make(map[string]map[string]float64)
for name, metric := range metrics {
if len(metric.History) == 0 {
continue
}
values := make([]float64, 0, len(metric.History))
for _, point := range metric.History {
values = append(values, point.Value)
}
aggregations[name] = map[string]float64{
"min": p.min(values),
"max": p.max(values),
"avg": p.average(values),
"median": p.median(values),
"p95": p.percentile(values, 95),
"p99": p.percentile(values, 99),
}
}
// Log aggregations periodically
p.logger.WithField("aggregations", aggregations).Debug("Metric aggregations calculated")
}
// Utility functions for metric calculations
func (p *Profiler) min(values []float64) float64 {
if len(values) == 0 {
return 0
}
min := values[0]
for _, v := range values {
if v < min {
min = v
}
}
return min
}
func (p *Profiler) max(values []float64) float64 {
if len(values) == 0 {
return 0
}
max := values[0]
for _, v := range values {
if v > max {
max = v
}
}
return max
}
func (p *Profiler) average(values []float64) float64 {
if len(values) == 0 {
return 0
}
sum := 0.0
for _, v := range values {
sum += v
}
return sum / float64(len(values))
}
func (p *Profiler) median(values []float64) float64 {
if len(values) == 0 {
return 0
}
// Simple median implementation
// In production, you might want to use a more sophisticated algorithm
return p.percentile(values, 50)
}
func (p *Profiler) percentile(values []float64, pct int) float64 {
if len(values) == 0 {
return 0
}
// Simple percentile implementation
// In production, you might want to use a more sophisticated algorithm
index := int(float64(pct) / 100.0 * float64(len(values)-1))
if index < 0 {
index = 0
}
if index >= len(values) {
index = len(values) - 1
}
return values[index]
}
// GetPerformanceReport generates a comprehensive performance report
func (p *Profiler) GetPerformanceReport() map[string]interface{} {
metrics := p.GetAllMetrics()
report := map[string]interface{}{
"timestamp": time.Now(),
"metrics": metrics,
"summary": p.generateSummary(metrics),
"system": p.getSystemInfo(),
}
return report
}
// generateSummary generates a summary of all metrics
func (p *Profiler) generateSummary(metrics map[string]*Metric) map[string]interface{} {
summary := map[string]interface{}{
"total_metrics": len(metrics),
"categories": make(map[string]int),
"alerts": make([]string, 0),
}
// Categorize metrics
for name, metric := range metrics {
category := p.getMetricCategory(name)
summary["categories"].(map[string]int)[category]++
// Check for alerts
if alert := p.checkMetricAlert(metric); alert != "" {
summary["alerts"].([]string) = append(summary["alerts"].([]string), alert)
}
}
return summary
}
// getMetricCategory determines the category of a metric
func (p *Profiler) getMetricCategory(name string) string {
switch {
case contains(name, "cpu") || contains(name, "memory") || contains(name, "disk"):
return "system"
case contains(name, "compose") || contains(name, "phase"):
return "compose"
case contains(name, "runtime") || contains(name, "goroutine"):
return "runtime"
default:
return "other"
}
}
// checkMetricAlert checks if a metric should trigger an alert
func (p *Profiler) checkMetricAlert(metric *Metric) string {
// Example alert logic
if metric.Name == "cpu_usage" && metric.Value > 90 {
return fmt.Sprintf("High CPU usage: %.2f%%", metric.Value)
}
if metric.Name == "memory_usage" && metric.Value > 85 {
return fmt.Sprintf("High memory usage: %.2f%%", metric.Value)
}
return ""
}
// getSystemInfo gets current system information
func (p *Profiler) getSystemInfo() map[string]interface{} {
info := map[string]interface{}{
"go_version": runtime.Version(),
"go_os": runtime.GOOS,
"go_arch": runtime.GOARCH,
"num_cpu": runtime.NumCPU(),
"timestamp": time.Now(),
}
// Get CPU info
if cpuInfo, err := cpu.Info(); err == nil && len(cpuInfo) > 0 {
info["cpu_model"] = cpuInfo[0].ModelName
info["cpu_cores"] = cpuInfo[0].Cores
}
// Get memory info
if memInfo, err := mem.VirtualMemory(); err == nil {
info["memory_total"] = memInfo.Total
info["memory_available"] = memInfo.Available
}
return info
}
// contains checks if a string contains a substring
func contains(s, substr string) bool {
return len(s) >= len(substr) && (s == substr ||
(len(s) > len(substr) && (s[:len(substr)] == substr ||
s[len(s)-len(substr):] == substr ||
contains(s[1:], substr))))
}

View file

@ -0,0 +1,497 @@
package performance
import (
"context"
"fmt"
"sync"
"time"
"github.com/sirupsen/logrus"
)
// ScalingManager manages horizontal scaling and load balancing
type ScalingManager struct {
nodes map[string]*Node
loadBalancer *LoadBalancer
autoscaler *AutoScaler
mu sync.RWMutex
logger *logrus.Logger
enabled bool
}
// Node represents a compute node in the cluster
type Node struct {
ID string `json:"id"`
Hostname string `json:"hostname"`
Address string `json:"address"`
Port int `json:"port"`
Status NodeStatus `json:"status"`
Capabilities map[string]interface{} `json:"capabilities"`
Metrics *NodeMetrics `json:"metrics"`
LastSeen time.Time `json:"last_seen"`
Tags map[string]string `json:"tags"`
}
// NodeStatus represents the status of a node
type NodeStatus string
const (
NodeStatusOnline NodeStatus = "online"
NodeStatusOffline NodeStatus = "offline"
NodeStatusBusy NodeStatus = "busy"
NodeStatusError NodeStatus = "error"
)
// NodeMetrics represents performance metrics for a node
type NodeMetrics struct {
CPUUsage float64 `json:"cpu_usage"`
MemoryUsage float64 `json:"memory_usage"`
DiskUsage float64 `json:"disk_usage"`
LoadAverage float64 `json:"load_average"`
ActiveJobs int `json:"active_jobs"`
MaxJobs int `json:"max_jobs"`
LastUpdate time.Time `json:"last_update"`
}
// LoadBalancer manages load distribution across nodes
type LoadBalancer struct {
strategy LoadBalancingStrategy
nodes map[string]*Node
mu sync.RWMutex
logger *logrus.Logger
}
// LoadBalancingStrategy defines the interface for load balancing strategies
type LoadBalancingStrategy interface {
SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error)
GetName() string
}
// LoadRequest represents a load balancing request
type LoadRequest struct {
Type string `json:"type"`
Priority int `json:"priority"`
Requirements map[string]interface{} `json:"requirements"`
Metadata map[string]interface{} `json:"metadata"`
}
// AutoScaler manages automatic scaling of the cluster
type AutoScaler struct {
config *AutoScalerConfig
nodes map[string]*Node
mu sync.RWMutex
logger *logrus.Logger
enabled bool
ctx context.Context
cancel context.CancelFunc
}
// AutoScalerConfig represents auto-scaling configuration
type AutoScalerConfig struct {
Enabled bool `yaml:"enabled"`
MinNodes int `yaml:"min_nodes"`
MaxNodes int `yaml:"max_nodes"`
ScaleUpThreshold float64 `yaml:"scale_up_threshold"`
ScaleDownThreshold float64 `yaml:"scale_down_threshold"`
ScaleUpCooldown time.Duration `yaml:"scale_up_cooldown"`
ScaleDownCooldown time.Duration `yaml:"scale_down_cooldown"`
CheckInterval time.Duration `yaml:"check_interval"`
}
// NewScalingManager creates a new scaling manager
func NewScalingManager(enabled bool) *ScalingManager {
sm := &ScalingManager{
nodes: make(map[string]*Node),
logger: logrus.New(),
enabled: enabled,
}
// Initialize load balancer
sm.loadBalancer = NewLoadBalancer(sm.logger)
// Initialize auto-scaler
sm.autoscaler = NewAutoScaler(&AutoScalerConfig{
Enabled: true,
MinNodes: 2,
MaxNodes: 10,
ScaleUpThreshold: 80.0,
ScaleDownThreshold: 20.0,
ScaleUpCooldown: 5 * time.Minute,
ScaleDownCooldown: 10 * time.Minute,
CheckInterval: 30 * time.Second,
}, sm.logger)
return sm
}
// RegisterNode registers a new node in the cluster
func (sm *ScalingManager) RegisterNode(node *Node) error {
sm.mu.Lock()
defer sm.mu.Unlock()
// Validate node
if node.ID == "" {
return fmt.Errorf("node ID is required")
}
if node.Address == "" {
return fmt.Errorf("node address is required")
}
// Check for duplicate
if _, exists := sm.nodes[node.ID]; exists {
return fmt.Errorf("node %s already exists", node.ID)
}
// Set default values
if node.Status == "" {
node.Status = NodeStatusOnline
}
if node.Capabilities == nil {
node.Capabilities = make(map[string]interface{})
}
if node.Tags == nil {
node.Tags = make(map[string]string)
}
if node.Metrics == nil {
node.Metrics = &NodeMetrics{
LastUpdate: time.Now(),
}
}
node.LastSeen = time.Now()
sm.nodes[node.ID] = node
// Update load balancer
sm.loadBalancer.AddNode(node)
sm.logger.Infof("Registered node: %s (%s)", node.ID, node.Hostname)
return nil
}
// UnregisterNode removes a node from the cluster
func (sm *ScalingManager) UnregisterNode(nodeID string) error {
sm.mu.Lock()
defer sm.mu.Unlock()
node, exists := sm.nodes[nodeID]
if !exists {
return fmt.Errorf("node %s not found", nodeID)
}
delete(sm.nodes, nodeID)
sm.loadBalancer.RemoveNode(nodeID)
sm.logger.Infof("Unregistered node: %s (%s)", node.ID, node.Hostname)
return nil
}
// UpdateNodeMetrics updates metrics for a specific node
func (sm *ScalingManager) UpdateNodeMetrics(nodeID string, metrics *NodeMetrics) error {
sm.mu.Lock()
defer sm.mu.Unlock()
node, exists := sm.nodes[nodeID]
if !exists {
return fmt.Errorf("node %s not found", nodeID)
}
metrics.LastUpdate = time.Now()
node.Metrics = metrics
node.LastSeen = time.Now()
// Update load balancer
sm.loadBalancer.UpdateNode(node)
return nil
}
// GetNode returns a node by ID
func (sm *ScalingManager) GetNode(nodeID string) (*Node, bool) {
sm.mu.RLock()
defer sm.mu.RUnlock()
node, exists := sm.nodes[nodeID]
return node, exists
}
// GetAllNodes returns all registered nodes
func (sm *ScalingManager) GetAllNodes() map[string]*Node {
sm.mu.RLock()
defer sm.mu.RUnlock()
// Create a copy to avoid race conditions
nodes := make(map[string]*Node)
for k, v := range sm.nodes {
nodes[k] = v
}
return nodes
}
// GetAvailableNodes returns all available nodes
func (sm *ScalingManager) GetAvailableNodes() []*Node {
sm.mu.RLock()
defer sm.mu.RUnlock()
var available []*Node
for _, node := range sm.nodes {
if node.Status == NodeStatusOnline && node.Metrics.ActiveJobs < node.Metrics.MaxJobs {
available = append(available, node)
}
}
return available
}
// SelectNode selects a node for a specific request
func (sm *ScalingManager) SelectNode(request *LoadRequest) (*Node, error) {
return sm.loadBalancer.SelectNode(request)
}
// Start starts the scaling manager
func (sm *ScalingManager) Start() error {
if !sm.enabled {
sm.logger.Info("Scaling manager is disabled")
return nil
}
sm.logger.Info("Starting scaling manager")
// Start auto-scaler
if err := sm.autoscaler.Start(); err != nil {
return fmt.Errorf("failed to start auto-scaler: %w", err)
}
// Start node health monitoring
go sm.monitorNodeHealth()
return nil
}
// Stop stops the scaling manager
func (sm *ScalingManager) Stop() error {
sm.logger.Info("Stopping scaling manager")
// Stop auto-scaler
if err := sm.autoscaler.Stop(); err != nil {
return fmt.Errorf("failed to stop auto-scaler: %w", err)
}
return nil
}
// monitorNodeHealth monitors the health of all nodes
func (sm *ScalingManager) monitorNodeHealth() {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
sm.checkNodeHealth()
}
}
}
// checkNodeHealth checks the health of all nodes
func (sm *ScalingManager) checkNodeHealth() {
nodes := sm.GetAllNodes()
for _, node := range nodes {
// Check if node is responsive
if time.Since(node.LastSeen) > 2*time.Minute {
sm.logger.Warnf("Node %s appears to be unresponsive", node.ID)
sm.markNodeOffline(node.ID)
}
// Check metrics freshness
if node.Metrics != nil && time.Since(node.Metrics.LastUpdate) > 5*time.Minute {
sm.logger.Warnf("Node %s metrics are stale", node.ID)
}
}
}
// markNodeOffline marks a node as offline
func (sm *ScalingManager) markNodeOffline(nodeID string) {
sm.mu.Lock()
defer sm.mu.Unlock()
if node, exists := sm.nodes[nodeID]; exists {
node.Status = NodeStatusOffline
sm.logger.Infof("Marked node %s as offline", nodeID)
}
}
// GetClusterStatus returns the current status of the cluster
func (sm *ScalingManager) GetClusterStatus() map[string]interface{} {
nodes := sm.GetAllNodes()
status := map[string]interface{}{
"total_nodes": len(nodes),
"online_nodes": 0,
"offline_nodes": 0,
"busy_nodes": 0,
"error_nodes": 0,
"total_capacity": 0,
"used_capacity": 0,
"timestamp": time.Now(),
}
for _, node := range nodes {
switch node.Status {
case NodeStatusOnline:
status["online_nodes"] = status["online_nodes"].(int) + 1
case NodeStatusOffline:
status["offline_nodes"] = status["offline_nodes"].(int) + 1
case NodeStatusBusy:
status["busy_nodes"] = status["busy_nodes"].(int) + 1
case NodeStatusError:
status["error_nodes"] = status["error_nodes"].(int) + 1
}
if node.Metrics != nil {
status["total_capacity"] = status["total_capacity"].(int) + node.Metrics.MaxJobs
status["used_capacity"] = status["used_capacity"].(int) + node.Metrics.ActiveJobs
}
}
// Calculate utilization percentage
if status["total_capacity"].(int) > 0 {
utilization := float64(status["used_capacity"].(int)) / float64(status["total_capacity"].(int)) * 100
status["utilization_percentage"] = utilization
}
return status
}
// NewLoadBalancer creates a new load balancer
func NewLoadBalancer(logger *logrus.Logger) *LoadBalancer {
lb := &LoadBalancer{
strategy: NewRoundRobinStrategy(),
nodes: make(map[string]*Node),
logger: logger,
}
return lb
}
// AddNode adds a node to the load balancer
func (lb *LoadBalancer) AddNode(node *Node) {
lb.mu.Lock()
defer lb.mu.Unlock()
lb.nodes[node.ID] = node
lb.logger.Debugf("Added node %s to load balancer", node.ID)
}
// RemoveNode removes a node from the load balancer
func (lb *LoadBalancer) RemoveNode(nodeID string) {
lb.mu.Lock()
defer lb.mu.Unlock()
delete(lb.nodes, nodeID)
lb.logger.Debugf("Removed node %s from load balancer", nodeID)
}
// UpdateNode updates a node in the load balancer
func (lb *LoadBalancer) UpdateNode(node *Node) {
lb.mu.Lock()
defer lb.mu.Unlock()
lb.nodes[node.ID] = node
}
// SelectNode selects a node using the configured strategy
func (lb *LoadBalancer) SelectNode(request *LoadRequest) (*Node, error) {
lb.mu.RLock()
defer lb.mu.RUnlock()
// Filter available nodes
availableNodes := make(map[string]*Node)
for id, node := range lb.nodes {
if node.Status == NodeStatusOnline && node.Metrics.ActiveJobs < node.Metrics.MaxJobs {
availableNodes[id] = node
}
}
if len(availableNodes) == 0 {
return nil, fmt.Errorf("no available nodes")
}
return lb.strategy.SelectNode(availableNodes, request)
}
// SetStrategy sets the load balancing strategy
func (lb *LoadBalancer) SetStrategy(strategy LoadBalancingStrategy) {
lb.mu.Lock()
defer lb.mu.Unlock()
lb.strategy = strategy
lb.logger.Infof("Load balancing strategy changed to: %s", strategy.GetName())
}
// NewAutoScaler creates a new auto-scaler
func NewAutoScaler(config *AutoScalerConfig, logger *logrus.Logger) *AutoScaler {
ctx, cancel := context.WithCancel(context.Background())
as := &AutoScaler{
config: config,
nodes: make(map[string]*Node),
logger: logger,
enabled: config.Enabled,
ctx: ctx,
cancel: cancel,
}
return as
}
// Start starts the auto-scaler
func (as *AutoScaler) Start() error {
if !as.enabled {
as.logger.Info("Auto-scaler is disabled")
return nil
}
as.logger.Info("Starting auto-scaler")
// Start scaling checks
go as.runScalingChecks()
return nil
}
// Stop stops the auto-scaler
func (as *AutoScaler) Stop() error {
as.logger.Info("Stopping auto-scaler")
as.cancel()
return nil
}
// runScalingChecks runs periodic scaling checks
func (as *AutoScaler) runScalingChecks() {
ticker := time.NewTicker(as.config.CheckInterval)
defer ticker.Stop()
for {
select {
case <-as.ctx.Done():
return
case <-ticker.C:
as.checkScaling()
}
}
}
// checkScaling checks if scaling is needed
func (as *AutoScaler) checkScaling() {
// Get current cluster status
// This would typically come from the scaling manager
// For now, we'll use placeholder logic
as.logger.Debug("Running scaling check")
// Check if we need to scale up
// Check if we need to scale down
// Implement scaling logic based on metrics
}

View file

@ -0,0 +1,458 @@
package performance
import (
"fmt"
"math/rand"
"sort"
"sync"
"time"
)
// RoundRobinStrategy implements round-robin load balancing
type RoundRobinStrategy struct {
currentIndex int
mu sync.Mutex
}
// NewRoundRobinStrategy creates a new round-robin strategy
func NewRoundRobinStrategy() *RoundRobinStrategy {
return &RoundRobinStrategy{
currentIndex: 0,
}
}
// SelectNode selects a node using round-robin algorithm
func (rr *RoundRobinStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
if len(nodes) == 0 {
return nil, fmt.Errorf("no nodes available")
}
rr.mu.Lock()
defer rr.mu.Unlock()
// Convert map to slice for indexing
nodeSlice := make([]*Node, 0, len(nodes))
for _, node := range nodes {
nodeSlice = append(nodeSlice, node)
}
// Sort by ID for consistent ordering
sort.Slice(nodeSlice, func(i, j int) bool {
return nodeSlice[i].ID < nodeSlice[j].ID
})
// Select next node in round-robin fashion
selectedNode := nodeSlice[rr.currentIndex%len(nodeSlice)]
rr.currentIndex++
return selectedNode, nil
}
// GetName returns the strategy name
func (rr *RoundRobinStrategy) GetName() string {
return "round_robin"
}
// LeastConnectionsStrategy implements least connections load balancing
type LeastConnectionsStrategy struct{}
// NewLeastConnectionsStrategy creates a new least connections strategy
func NewLeastConnectionsStrategy() *LeastConnectionsStrategy {
return &LeastConnectionsStrategy{}
}
// SelectNode selects a node with the least active connections
func (lc *LeastConnectionsStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
if len(nodes) == 0 {
return nil, fmt.Errorf("no nodes available")
}
var selectedNode *Node
minConnections := int(^uint(0) >> 1) // Max int
for _, node := range nodes {
if node.Metrics == nil {
continue
}
activeJobs := node.Metrics.ActiveJobs
if activeJobs < minConnections {
minConnections = activeJobs
selectedNode = node
}
}
if selectedNode == nil {
return nil, fmt.Errorf("no suitable node found")
}
return selectedNode, nil
}
// GetName returns the strategy name
func (lc *LeastConnectionsStrategy) GetName() string {
return "least_connections"
}
// WeightedRoundRobinStrategy implements weighted round-robin load balancing
type WeightedRoundRobinStrategy struct {
currentIndex int
mu sync.Mutex
}
// NewWeightedRoundRobinStrategy creates a new weighted round-robin strategy
func NewWeightedRoundRobinStrategy() *WeightedRoundRobinStrategy {
return &WeightedRoundRobinStrategy{
currentIndex: 0,
}
}
// SelectNode selects a node using weighted round-robin algorithm
func (wr *WeightedRoundRobinStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
if len(nodes) == 0 {
return nil, fmt.Errorf("no nodes available")
}
wr.mu.Lock()
defer wr.mu.Unlock()
// Convert map to slice and calculate weights
type weightedNode struct {
node *Node
weight int
}
var weightedNodes []weightedNode
for _, node := range nodes {
weight := 1 // Default weight
// Calculate weight based on node capabilities and current load
if node.Metrics != nil {
// Higher weight for nodes with more capacity
availableCapacity := node.Metrics.MaxJobs - node.Metrics.ActiveJobs
if availableCapacity > 0 {
weight = availableCapacity
}
}
// Apply tags-based weight adjustments
if node.Tags != nil {
if priority, ok := node.Tags["priority"]; ok {
switch priority {
case "high":
weight *= 2
case "low":
weight /= 2
}
}
}
weightedNodes = append(weightedNodes, weightedNode{node: node, weight: weight})
}
// Sort by weight (descending)
sort.Slice(weightedNodes, func(i, j int) bool {
return weightedNodes[i].weight > weightedNodes[j].weight
})
// Select next node in weighted round-robin fashion
selectedNode := weightedNodes[wr.currentIndex%len(weightedNodes)].node
wr.currentIndex++
return selectedNode, nil
}
// GetName returns the strategy name
func (wr *WeightedRoundRobinStrategy) GetName() string {
return "weighted_round_robin"
}
// RandomStrategy implements random load balancing
type RandomStrategy struct {
rand *rand.Rand
mu sync.Mutex
}
// NewRandomStrategy creates a new random strategy
func NewRandomStrategy() *RandomStrategy {
return &RandomStrategy{
rand: rand.New(rand.NewSource(time.Now().UnixNano())),
}
}
// SelectNode selects a random node
func (r *RandomStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
if len(nodes) == 0 {
return nil, fmt.Errorf("no nodes available")
}
r.mu.Lock()
defer r.mu.Unlock()
// Convert map to slice
nodeSlice := make([]*Node, 0, len(nodes))
for _, node := range nodes {
nodeSlice = append(nodeSlice, node)
}
// Select random node
randomIndex := r.rand.Intn(len(nodeSlice))
return nodeSlice[randomIndex], nil
}
// GetName returns the strategy name
func (r *RandomStrategy) GetName() string {
return "random"
}
// LeastResponseTimeStrategy implements least response time load balancing
type LeastResponseTimeStrategy struct{}
// NewLeastResponseTimeStrategy creates a new least response time strategy
func NewLeastResponseTimeStrategy() *LeastResponseTimeStrategy {
return &LeastResponseTimeStrategy{}
}
// SelectNode selects a node with the least response time
func (lrt *LeastResponseTimeStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
if len(nodes) == 0 {
return nil, fmt.Errorf("no nodes available")
}
var selectedNode *Node
minResponseTime := float64(^uint(0) >> 1) // Max float64
for _, node := range nodes {
if node.Metrics == nil {
continue
}
// Use load average as a proxy for response time
// In a real implementation, you'd have actual response time metrics
responseTime := node.Metrics.LoadAverage
if responseTime < minResponseTime {
minResponseTime = responseTime
selectedNode = node
}
}
if selectedNode == nil {
return nil, fmt.Errorf("no suitable node found")
}
return selectedNode, nil
}
// GetName returns the strategy name
func (lrt *LeastResponseTimeStrategy) GetName() string {
return "least_response_time"
}
// IPHashStrategy implements IP hash load balancing
type IPHashStrategy struct{}
// NewIPHashStrategy creates a new IP hash strategy
func NewIPHashStrategy() *IPHashStrategy {
return &IPHashStrategy{}
}
// SelectNode selects a node using IP hash algorithm
func (ih *IPHashStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
if len(nodes) == 0 {
return nil, fmt.Errorf("no nodes available")
}
// Extract client IP from request metadata
clientIP := "127.0.0.1" // Default IP
if request.Metadata != nil {
if ip, ok := request.Metadata["client_ip"].(string); ok {
clientIP = ip
}
}
// Calculate hash of client IP
hash := hashString(clientIP)
// Convert map to slice for indexing
nodeSlice := make([]*Node, 0, len(nodes))
for _, node := range nodes {
nodeSlice = append(nodeSlice, node)
}
// Sort by ID for consistent ordering
sort.Slice(nodeSlice, func(i, j int) bool {
return nodeSlice[i].ID < nodeSlice[j].ID
})
// Select node based on hash
selectedIndex := hash % uint32(len(nodeSlice))
return nodeSlice[selectedIndex], nil
}
// GetName returns the strategy name
func (ih *IPHashStrategy) GetName() string {
return "ip_hash"
}
// hashString calculates a simple hash of a string
func hashString(s string) uint32 {
var hash uint32
for _, char := range s {
hash = ((hash << 5) + hash) + uint32(char)
}
return hash
}
// AdaptiveStrategy implements adaptive load balancing based on multiple factors
type AdaptiveStrategy struct {
mu sync.Mutex
}
// NewAdaptiveStrategy creates a new adaptive strategy
func NewAdaptiveStrategy() *AdaptiveStrategy {
return &AdaptiveStrategy{}
}
// SelectNode selects a node using adaptive algorithm
func (a *AdaptiveStrategy) SelectNode(nodes map[string]*Node, request *LoadRequest) (*Node, error) {
if len(nodes) == 0 {
return nil, fmt.Errorf("no nodes available")
}
a.mu.Lock()
defer a.mu.Unlock()
// Score each node based on multiple factors
type scoredNode struct {
node *Node
score float64
}
var scoredNodes []scoredNode
for _, node := range nodes {
if node.Metrics == nil {
continue
}
score := a.calculateNodeScore(node, request)
scoredNodes = append(scoredNodes, scoredNode{node: node, score: score})
}
if len(scoredNodes) == 0 {
return nil, fmt.Errorf("no suitable node found")
}
// Sort by score (descending)
sort.Slice(scoredNodes, func(i, j int) bool {
return scoredNodes[i].score > scoredNodes[j].score
})
// Return the highest scoring node
return scoredNodes[0].node, nil
}
// calculateNodeScore calculates a score for a node based on multiple factors
func (a *AdaptiveStrategy) calculateNodeScore(node *Node, request *LoadRequest) float64 {
score := 100.0
if node.Metrics == nil {
return score
}
// Factor 1: Available capacity (higher is better)
availableCapacity := float64(node.Metrics.MaxJobs - node.Metrics.ActiveJobs)
if node.Metrics.MaxJobs > 0 {
capacityRatio := availableCapacity / float64(node.Metrics.MaxJobs)
score += capacityRatio * 50 // Up to 50 points for capacity
}
// Factor 2: System load (lower is better)
if node.Metrics.LoadAverage > 0 {
loadScore := 100.0 - (node.Metrics.LoadAverage * 10)
if loadScore < 0 {
loadScore = 0
}
score += loadScore * 0.3 // Up to 30 points for load
}
// Factor 3: Resource usage (lower is better)
cpuScore := 100.0 - node.Metrics.CPUUsage
memoryScore := 100.0 - node.Metrics.MemoryUsage
score += cpuScore * 0.1 // Up to 10 points for CPU
score += memoryScore * 0.1 // Up to 10 points for memory
// Factor 4: Priority-based adjustments
if node.Tags != nil {
if priority, ok := node.Tags["priority"]; ok {
switch priority {
case "high":
score += 20
case "low":
score -= 20
}
}
}
// Factor 5: Request-specific requirements
if request.Requirements != nil {
if arch, ok := request.Requirements["architecture"].(string); ok {
if nodeArch, ok := node.Capabilities["architecture"].(string); ok {
if arch == nodeArch {
score += 25 // Bonus for architecture match
}
}
}
}
return score
}
// GetName returns the strategy name
func (a *AdaptiveStrategy) GetName() string {
return "adaptive"
}
// StrategyFactory creates load balancing strategies
type StrategyFactory struct{}
// NewStrategyFactory creates a new strategy factory
func NewStrategyFactory() *StrategyFactory {
return &StrategyFactory{}
}
// CreateStrategy creates a strategy by name
func (sf *StrategyFactory) CreateStrategy(name string) (LoadBalancingStrategy, error) {
switch name {
case "round_robin":
return NewRoundRobinStrategy(), nil
case "least_connections":
return NewLeastConnectionsStrategy(), nil
case "weighted_round_robin":
return NewWeightedRoundRobinStrategy(), nil
case "random":
return NewRandomStrategy(), nil
case "least_response_time":
return NewLeastResponseTimeStrategy(), nil
case "ip_hash":
return NewIPHashStrategy(), nil
case "adaptive":
return NewAdaptiveStrategy(), nil
default:
return nil, fmt.Errorf("unknown strategy: %s", name)
}
}
// GetAvailableStrategies returns all available strategy names
func (sf *StrategyFactory) GetAvailableStrategies() []string {
return []string{
"round_robin",
"least_connections",
"weighted_round_robin",
"random",
"least_response_time",
"ip_hash",
"adaptive",
}
}