diff --git a/internal/prometheus/job_metrics.go b/internal/prometheus/job_metrics.go index 93503a2c9..eda3dba98 100644 --- a/internal/prometheus/job_metrics.go +++ b/internal/prometheus/job_metrics.go @@ -3,12 +3,22 @@ package prometheus import ( "time" + "github.com/osbuild/osbuild-composer/internal/worker/clienterrors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" ) const workerSubsystem = "composer_worker" +var ( + TotalJobs = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "total_jobs", + Namespace: namespace, + Subsystem: workerSubsystem, + Help: "Total jobs", + }, []string{"type", "status"}) +) + var ( PendingJobs = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "pending_jobs", @@ -34,7 +44,7 @@ var ( Subsystem: workerSubsystem, Help: "Duration spent by workers on a job.", Buckets: []float64{.1, .2, .5, 1, 2, 4, 8, 16, 32, 40, 48, 64, 96, 128, 160, 192, 224, 256, 320, 382, 448, 512, 640, 768, 896, 1024, 1280, 1536, 1792, 2049}, - }, []string{"type"}) + }, []string{"type", "status"}) ) var ( @@ -68,10 +78,11 @@ func CancelJobMetrics(started time.Time, jobType string) { } } -func FinishJobMetrics(started time.Time, finished time.Time, canceled bool, jobType string) { +func FinishJobMetrics(started time.Time, finished time.Time, canceled bool, jobType string, status clienterrors.StatusCode) { if !finished.IsZero() && !canceled { diff := finished.Sub(started).Seconds() - JobDuration.WithLabelValues(jobType).Observe(diff) + JobDuration.WithLabelValues(jobType, status.ToString()).Observe(diff) + TotalJobs.WithLabelValues(jobType, status.ToString()).Inc() RunningJobs.WithLabelValues(jobType).Dec() } } diff --git a/internal/worker/clienterrors/errors.go b/internal/worker/clienterrors/errors.go index b5f584cc5..e3a277d97 100644 --- a/internal/worker/clienterrors/errors.go +++ b/internal/worker/clienterrors/errors.go @@ -34,6 +34,40 @@ type Error struct { Details interface{} `json:"details"` } +const ( + JobStatusSuccess = "2xx" + JobStatusUserInputError = "4xx" + JobStatusInternalError = "5xx" +) + +type StatusCode string + +func (s *StatusCode) ToString() string { + return string(*s) +} + +func GetStatusCode(err *Error) StatusCode { + if err == nil { + return JobStatusSuccess + } + switch err.ID { + case ErrorDNFDepsolveError: + return JobStatusInternalError + case ErrorDNFMarkingError: + return JobStatusInternalError + case ErrorNoDynamicArgs: + return JobStatusUserInputError + case ErrorInvalidTargetConfig: + return JobStatusUserInputError + case ErrorSharingTarget: + return JobStatusUserInputError + case ErrorInvalidTarget: + return JobStatusUserInputError + default: + return JobStatusInternalError + } +} + func WorkerClientError(code ClientErrorCode, reason string, details ...interface{}) *Error { return &Error{ ID: code, diff --git a/internal/worker/server.go b/internal/worker/server.go index 65f60b14a..4ba38579a 100644 --- a/internal/worker/server.go +++ b/internal/worker/server.go @@ -486,7 +486,8 @@ func (s *Server) FinishJob(token uuid.UUID, result json.RawMessage) error { if err != nil { logrus.Errorf("error finding job status: %v", err) } else { - prometheus.FinishJobMetrics(status.Started, status.Finished, status.Canceled, jobType) + statusCode := clienterrors.GetStatusCode(jobResult.JobError) + prometheus.FinishJobMetrics(status.Started, status.Finished, status.Canceled, jobType, statusCode) } // Move artifacts from the temporary location to the final job