debian-forge-composer/internal/prometheus/job_metrics.go
Sanne Raymaekers a971f9340b worker/server: update metrics on requeue
When requeuing a job the next worker requesting the job would decrement
pending counter, but the pending counter only ever got incremented once,
when the job was first enqueued. Thus make sure to increment the pending
counter when a job is requeued.
2024-11-07 17:18:48 +01:00

91 lines
3.3 KiB
Go

package prometheus
import (
"time"
"github.com/osbuild/osbuild-composer/internal/worker/clienterrors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var (
TotalJobs = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "total_jobs",
Namespace: Namespace,
Subsystem: WorkerSubsystem,
Help: "Total jobs",
}, []string{"type", "status", "tenant", "arch"})
)
var (
PendingJobs = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "pending_jobs",
Namespace: Namespace,
Subsystem: WorkerSubsystem,
Help: "Currently pending jobs",
}, []string{"type", "tenant"})
)
var (
RunningJobs = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "running_jobs",
Namespace: Namespace,
Subsystem: WorkerSubsystem,
Help: "Currently running jobs",
}, []string{"type", "tenant"})
)
var (
JobDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "job_duration_seconds",
Namespace: Namespace,
Subsystem: WorkerSubsystem,
Help: "Duration spent by workers on a job.",
Buckets: []float64{.1, .2, .5, 1, 2.5, 5, 10, 20, 30, 40, 60, 90, 120, 150, 180, 240, 300, 360, 420, 480, 540, 600, 720, 840, 960, 1080, 1200, 1320, 1440, 1560, 1680, 1800, 2100, 2400, 2700, 3000, 3600, 4800, 6000, 7200, 9000, 10800, 12600, 14400, 16200, 18000, 19800, 24000, 27000, 30000, 33000, 36000, 39600, 43200, 57600, 86400},
}, []string{"type", "status", "tenant", "arch"})
)
var (
JobWaitDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "job_wait_duration_seconds",
Namespace: Namespace,
Subsystem: WorkerSubsystem,
Help: "Duration a job spends on the queue.",
Buckets: []float64{.1, .2, .5, 1, 2.5, 5, 10, 20, 30, 40, 60, 90, 120, 150, 180, 240, 300, 360, 420, 480, 540, 600, 720, 840, 960, 1080, 1200, 1320, 1440, 1560, 1680, 1800, 2100, 2400, 2700, 3000, 3600, 4800, 6000, 7200, 9000, 10800, 12600, 14400, 16200, 18000, 19800, 24000, 27000, 30000, 33000, 36000, 39600, 43200, 57600, 86400},
}, []string{"type", "tenant", "arch"})
)
func EnqueueJobMetrics(jobType, tenant string) {
PendingJobs.WithLabelValues(jobType, tenant).Inc()
}
func DequeueJobMetrics(pending time.Time, started time.Time, jobType, tenant, arch string) {
if !started.IsZero() && !pending.IsZero() {
diff := started.Sub(pending).Seconds()
JobWaitDuration.WithLabelValues(jobType, tenant, arch).Observe(diff)
PendingJobs.WithLabelValues(jobType, tenant).Dec()
RunningJobs.WithLabelValues(jobType, tenant).Inc()
}
}
func RequeueJobMetrics(jobType, tenant string) {
PendingJobs.WithLabelValues(jobType, tenant).Inc()
RunningJobs.WithLabelValues(jobType, tenant).Dec()
}
func CancelJobMetrics(started time.Time, jobType, tenant string) {
if !started.IsZero() {
RunningJobs.WithLabelValues(jobType, tenant).Dec()
} else {
PendingJobs.WithLabelValues(jobType, tenant).Dec()
}
}
func FinishJobMetrics(started time.Time, finished time.Time, canceled bool, jobType, tenant, arch string, status clienterrors.StatusCode) {
if !finished.IsZero() && !canceled {
diff := finished.Sub(started).Seconds()
JobDuration.WithLabelValues(jobType, status.ToString(), tenant, arch).Observe(diff)
TotalJobs.WithLabelValues(jobType, status.ToString(), tenant, arch).Inc()
RunningJobs.WithLabelValues(jobType, tenant).Dec()
}
}