From 4eeaebd40bb6d896bf3373cb3b889725781ad968 Mon Sep 17 00:00:00 2001 From: Tom Gundersen Date: Tue, 15 Mar 2022 20:17:55 +0000 Subject: [PATCH] prometheus/job: measure time spent pending rather than queued We are interested in the time it takes from a job could be dequeued until it is, but if a job has dependencies that are not yet finished, it cannot be dequeued. Change the logic to measure the time since the last dependency was dequeued rather than when the job was queued. The purpose of this metric is to have an alert fire in case we have too few workers processing jobs. --- internal/prometheus/job_metrics.go | 6 +++--- internal/worker/server.go | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/internal/prometheus/job_metrics.go b/internal/prometheus/job_metrics.go index 3d980b327..b0f63c3fa 100644 --- a/internal/prometheus/job_metrics.go +++ b/internal/prometheus/job_metrics.go @@ -61,9 +61,9 @@ func EnqueueJobMetrics(jobType string) { PendingJobs.WithLabelValues(jobType).Inc() } -func DequeueJobMetrics(queued time.Time, started time.Time, jobType string) { - if !started.IsZero() && !queued.IsZero() { - diff := started.Sub(queued).Seconds() +func DequeueJobMetrics(pending time.Time, started time.Time, jobType string) { + if !started.IsZero() && !pending.IsZero() { + diff := started.Sub(pending).Seconds() JobWaitDuration.WithLabelValues(jobType).Observe(diff) PendingJobs.WithLabelValues(jobType).Dec() RunningJobs.WithLabelValues(jobType).Inc() diff --git a/internal/worker/server.go b/internal/worker/server.go index 56c5fa0f6..1416fb6ef 100644 --- a/internal/worker/server.go +++ b/internal/worker/server.go @@ -455,13 +455,23 @@ func (s *Server) requestJob(ctx context.Context, arch string, jobTypes []string, return } + // Record how long the job has been pending for, that is either how + // long it has been queued for, in case it has no dependencies, or + // how long it has been since all its dependencies finished, if it + // has any. + pending := status.Queued + for _, depID := range depIDs { // TODO: include type of arguments var result json.RawMessage - _, result, _, _, _, _, _, err = s.jobs.JobStatus(depID) + var finished time.Time + _, result, _, _, finished, _, _, err = s.jobs.JobStatus(depID) if err != nil { return } + if finished.After(pending) { + pending = finished + } dynamicArgs = append(dynamicArgs, result) } @@ -474,7 +484,7 @@ func (s *Server) requestJob(ctx context.Context, arch string, jobTypes []string, // TODO: Drop the ':$architecture' for metrics too, first prometheus queries for alerts and // dashboards need to be adjusted. - prometheus.DequeueJobMetrics(status.Queued, status.Started, jobType) + prometheus.DequeueJobMetrics(pending, status.Started, jobType) if jobType == "osbuild:"+arch { jobType = "osbuild" } else if jobType == "osbuild-koji:"+arch {