metrics: add worker error metrics
This commit introduces the collection of error metrics since it is now possible to differentiate between internal errors and user input errors. Additionally, the error status is reported for job duration metrics.
This commit is contained in:
parent
6c4caec022
commit
290472dfdf
3 changed files with 50 additions and 4 deletions
|
|
@ -3,12 +3,22 @@ package prometheus
|
|||
import (
|
||||
"time"
|
||||
|
||||
"github.com/osbuild/osbuild-composer/internal/worker/clienterrors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
)
|
||||
|
||||
const workerSubsystem = "composer_worker"
|
||||
|
||||
var (
|
||||
TotalJobs = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "total_jobs",
|
||||
Namespace: namespace,
|
||||
Subsystem: workerSubsystem,
|
||||
Help: "Total jobs",
|
||||
}, []string{"type", "status"})
|
||||
)
|
||||
|
||||
var (
|
||||
PendingJobs = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "pending_jobs",
|
||||
|
|
@ -34,7 +44,7 @@ var (
|
|||
Subsystem: workerSubsystem,
|
||||
Help: "Duration spent by workers on a job.",
|
||||
Buckets: []float64{.1, .2, .5, 1, 2, 4, 8, 16, 32, 40, 48, 64, 96, 128, 160, 192, 224, 256, 320, 382, 448, 512, 640, 768, 896, 1024, 1280, 1536, 1792, 2049},
|
||||
}, []string{"type"})
|
||||
}, []string{"type", "status"})
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
@ -68,10 +78,11 @@ func CancelJobMetrics(started time.Time, jobType string) {
|
|||
}
|
||||
}
|
||||
|
||||
func FinishJobMetrics(started time.Time, finished time.Time, canceled bool, jobType string) {
|
||||
func FinishJobMetrics(started time.Time, finished time.Time, canceled bool, jobType string, status clienterrors.StatusCode) {
|
||||
if !finished.IsZero() && !canceled {
|
||||
diff := finished.Sub(started).Seconds()
|
||||
JobDuration.WithLabelValues(jobType).Observe(diff)
|
||||
JobDuration.WithLabelValues(jobType, status.ToString()).Observe(diff)
|
||||
TotalJobs.WithLabelValues(jobType, status.ToString()).Inc()
|
||||
RunningJobs.WithLabelValues(jobType).Dec()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,6 +34,40 @@ type Error struct {
|
|||
Details interface{} `json:"details"`
|
||||
}
|
||||
|
||||
const (
|
||||
JobStatusSuccess = "2xx"
|
||||
JobStatusUserInputError = "4xx"
|
||||
JobStatusInternalError = "5xx"
|
||||
)
|
||||
|
||||
type StatusCode string
|
||||
|
||||
func (s *StatusCode) ToString() string {
|
||||
return string(*s)
|
||||
}
|
||||
|
||||
func GetStatusCode(err *Error) StatusCode {
|
||||
if err == nil {
|
||||
return JobStatusSuccess
|
||||
}
|
||||
switch err.ID {
|
||||
case ErrorDNFDepsolveError:
|
||||
return JobStatusInternalError
|
||||
case ErrorDNFMarkingError:
|
||||
return JobStatusInternalError
|
||||
case ErrorNoDynamicArgs:
|
||||
return JobStatusUserInputError
|
||||
case ErrorInvalidTargetConfig:
|
||||
return JobStatusUserInputError
|
||||
case ErrorSharingTarget:
|
||||
return JobStatusUserInputError
|
||||
case ErrorInvalidTarget:
|
||||
return JobStatusUserInputError
|
||||
default:
|
||||
return JobStatusInternalError
|
||||
}
|
||||
}
|
||||
|
||||
func WorkerClientError(code ClientErrorCode, reason string, details ...interface{}) *Error {
|
||||
return &Error{
|
||||
ID: code,
|
||||
|
|
|
|||
|
|
@ -486,7 +486,8 @@ func (s *Server) FinishJob(token uuid.UUID, result json.RawMessage) error {
|
|||
if err != nil {
|
||||
logrus.Errorf("error finding job status: %v", err)
|
||||
} else {
|
||||
prometheus.FinishJobMetrics(status.Started, status.Finished, status.Canceled, jobType)
|
||||
statusCode := clienterrors.GetStatusCode(jobResult.JobError)
|
||||
prometheus.FinishJobMetrics(status.Started, status.Finished, status.Canceled, jobType, statusCode)
|
||||
}
|
||||
|
||||
// Move artifacts from the temporary location to the final job
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue