prometheus/job: measure time spent pending rather than queued

We are interested in the time it takes from a job could be dequeued
until it is, but if a job has dependencies that are not yet finished, it
cannot be dequeued.

Change the logic to measure the time since the last dependency was
dequeued rather than when the job was queued.

The purpose of this metric is to have an alert fire in case we have too
few workers processing jobs.
This commit is contained in:
Tom Gundersen 2022-03-15 20:17:55 +00:00
parent 4621768c14
commit 4eeaebd40b
2 changed files with 15 additions and 5 deletions

View file

@ -61,9 +61,9 @@ func EnqueueJobMetrics(jobType string) {
PendingJobs.WithLabelValues(jobType).Inc()
}
func DequeueJobMetrics(queued time.Time, started time.Time, jobType string) {
if !started.IsZero() && !queued.IsZero() {
diff := started.Sub(queued).Seconds()
func DequeueJobMetrics(pending time.Time, started time.Time, jobType string) {
if !started.IsZero() && !pending.IsZero() {
diff := started.Sub(pending).Seconds()
JobWaitDuration.WithLabelValues(jobType).Observe(diff)
PendingJobs.WithLabelValues(jobType).Dec()
RunningJobs.WithLabelValues(jobType).Inc()