prometheus: add tenant label
Include a tenant label for all prometheus metrics. Modify jobstatus function in the worker accordingly to return channel so it can be passed to prometheus.
This commit is contained in:
parent
5315264f2e
commit
873798514b
6 changed files with 56 additions and 54 deletions
|
|
@ -67,7 +67,7 @@ const (
|
|||
FROM jobs
|
||||
WHERE id = $1`
|
||||
sqlQueryJobStatus = `
|
||||
SELECT type, result, queued_at, started_at, finished_at, canceled
|
||||
SELECT type, channel, result, queued_at, started_at, finished_at, canceled
|
||||
FROM jobs
|
||||
WHERE id = $1`
|
||||
sqlQueryRunningId = `
|
||||
|
|
@ -473,7 +473,7 @@ func (q *DBJobQueue) CancelJob(id uuid.UUID) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (q *DBJobQueue) JobStatus(id uuid.UUID) (jobType string, result json.RawMessage, queued, started, finished time.Time, canceled bool, deps []uuid.UUID, err error) {
|
||||
func (q *DBJobQueue) JobStatus(id uuid.UUID) (jobType string, channel string, result json.RawMessage, queued, started, finished time.Time, canceled bool, deps []uuid.UUID, err error) {
|
||||
conn, err := q.pool.Acquire(context.Background())
|
||||
if err != nil {
|
||||
return
|
||||
|
|
@ -483,7 +483,7 @@ func (q *DBJobQueue) JobStatus(id uuid.UUID) (jobType string, result json.RawMes
|
|||
// Use double pointers for timestamps because they might be NULL, which would result in *time.Time == nil
|
||||
var sp, fp *time.Time
|
||||
var rp pgtype.JSON
|
||||
err = conn.QueryRow(context.Background(), sqlQueryJobStatus, id).Scan(&jobType, &rp, &queued, &sp, &fp, &canceled)
|
||||
err = conn.QueryRow(context.Background(), sqlQueryJobStatus, id).Scan(&jobType, &channel, &rp, &queued, &sp, &fp, &canceled)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
|
|
|||
|
|
@ -343,13 +343,14 @@ func (q *fsJobQueue) CancelJob(id uuid.UUID) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (q *fsJobQueue) JobStatus(id uuid.UUID) (jobType string, result json.RawMessage, queued, started, finished time.Time, canceled bool, deps []uuid.UUID, err error) {
|
||||
func (q *fsJobQueue) JobStatus(id uuid.UUID) (jobType string, channel string, result json.RawMessage, queued, started, finished time.Time, canceled bool, deps []uuid.UUID, err error) {
|
||||
j, err := q.readJob(id)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
jobType = j.Type
|
||||
channel = j.Channel
|
||||
result = j.Result
|
||||
queued = j.QueuedAt
|
||||
started = j.StartedAt
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ type JobQueue interface {
|
|||
// finished, respectively.
|
||||
//
|
||||
// Lastly, the IDs of the jobs dependencies are returned.
|
||||
JobStatus(id uuid.UUID) (jobType string, result json.RawMessage, queued, started, finished time.Time, canceled bool, deps []uuid.UUID, err error)
|
||||
JobStatus(id uuid.UUID) (jobType string, channel string, result json.RawMessage, queued, started, finished time.Time, canceled bool, deps []uuid.UUID, err error)
|
||||
|
||||
// Job returns all the parameters that define a job (everything provided during Enqueue).
|
||||
Job(id uuid.UUID) (jobType string, args json.RawMessage, dependencies []uuid.UUID, channel string, err error)
|
||||
|
|
|
|||
|
|
@ -212,7 +212,7 @@ func testDependencies(t *testing.T, q jobqueue.JobQueue) {
|
|||
require.ElementsMatch(t, []uuid.UUID{one, two}, r)
|
||||
|
||||
j := pushTestJob(t, q, "test", nil, []uuid.UUID{one, two}, "")
|
||||
jobType, _, queued, started, finished, canceled, deps, err := q.JobStatus(j)
|
||||
jobType, _, _, queued, started, finished, canceled, deps, err := q.JobStatus(j)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, jobType, "test")
|
||||
require.True(t, !queued.IsZero())
|
||||
|
|
@ -223,7 +223,7 @@ func testDependencies(t *testing.T, q jobqueue.JobQueue) {
|
|||
|
||||
require.Equal(t, j, finishNextTestJob(t, q, "test", testResult{}, []uuid.UUID{one, two}))
|
||||
|
||||
jobType, result, queued, started, finished, canceled, deps, err := q.JobStatus(j)
|
||||
jobType, _, result, queued, started, finished, canceled, deps, err := q.JobStatus(j)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, jobType, "test")
|
||||
require.True(t, !queued.IsZero())
|
||||
|
|
@ -241,7 +241,7 @@ func testDependencies(t *testing.T, q jobqueue.JobQueue) {
|
|||
two := pushTestJob(t, q, "test", nil, nil, "")
|
||||
|
||||
j := pushTestJob(t, q, "test", nil, []uuid.UUID{one, two}, "")
|
||||
jobType, _, queued, started, finished, canceled, deps, err := q.JobStatus(j)
|
||||
jobType, _, _, queued, started, finished, canceled, deps, err := q.JobStatus(j)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, jobType, "test")
|
||||
require.True(t, !queued.IsZero())
|
||||
|
|
@ -257,7 +257,7 @@ func testDependencies(t *testing.T, q jobqueue.JobQueue) {
|
|||
|
||||
require.Equal(t, j, finishNextTestJob(t, q, "test", testResult{}, []uuid.UUID{one, two}))
|
||||
|
||||
jobType, result, queued, started, finished, canceled, deps, err := q.JobStatus(j)
|
||||
jobType, _, result, queued, started, finished, canceled, deps, err := q.JobStatus(j)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, jobType, "test")
|
||||
require.True(t, !queued.IsZero())
|
||||
|
|
@ -353,7 +353,7 @@ func testCancel(t *testing.T, q jobqueue.JobQueue) {
|
|||
require.NotEmpty(t, id)
|
||||
err = q.CancelJob(id)
|
||||
require.NoError(t, err)
|
||||
jobType, result, _, _, _, canceled, _, err := q.JobStatus(id)
|
||||
jobType, _, result, _, _, _, canceled, _, err := q.JobStatus(id)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, jobType, "clownfish")
|
||||
require.True(t, canceled)
|
||||
|
|
@ -373,7 +373,7 @@ func testCancel(t *testing.T, q jobqueue.JobQueue) {
|
|||
require.Equal(t, json.RawMessage("null"), args)
|
||||
err = q.CancelJob(id)
|
||||
require.NoError(t, err)
|
||||
jobType, result, _, _, _, canceled, _, err = q.JobStatus(id)
|
||||
jobType, _, result, _, _, _, canceled, _, err = q.JobStatus(id)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, jobType, "clownfish")
|
||||
require.True(t, canceled)
|
||||
|
|
@ -396,7 +396,7 @@ func testCancel(t *testing.T, q jobqueue.JobQueue) {
|
|||
err = q.CancelJob(id)
|
||||
require.Error(t, err)
|
||||
require.Equal(t, jobqueue.ErrNotRunning, err)
|
||||
jobType, result, _, _, _, canceled, _, err = q.JobStatus(id)
|
||||
jobType, _, result, _, _, _, canceled, _, err = q.JobStatus(id)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, jobType, "clownfish")
|
||||
require.False(t, canceled)
|
||||
|
|
@ -613,7 +613,7 @@ func test100dequeuers(t *testing.T, q jobqueue.JobQueue) {
|
|||
// try to do some other operations on the jobqueue
|
||||
id := pushTestJob(t, q, "clownfish", nil, nil, "")
|
||||
|
||||
_, _, _, _, _, _, _, err := q.JobStatus(id)
|
||||
_, _, _, _, _, _, _, _, err := q.JobStatus(id)
|
||||
require.NoError(t, err)
|
||||
|
||||
finishNextTestJob(t, q, "clownfish", testResult{}, nil)
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ var (
|
|||
Namespace: namespace,
|
||||
Subsystem: workerSubsystem,
|
||||
Help: "Total jobs",
|
||||
}, []string{"type", "status"})
|
||||
}, []string{"type", "status", "tenant"})
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
@ -25,7 +25,7 @@ var (
|
|||
Namespace: namespace,
|
||||
Subsystem: workerSubsystem,
|
||||
Help: "Currently pending jobs",
|
||||
}, []string{"type"})
|
||||
}, []string{"type", "tenant"})
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
@ -34,7 +34,7 @@ var (
|
|||
Namespace: namespace,
|
||||
Subsystem: workerSubsystem,
|
||||
Help: "Currently running jobs",
|
||||
}, []string{"type"})
|
||||
}, []string{"type", "tenant"})
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
@ -44,7 +44,7 @@ var (
|
|||
Subsystem: workerSubsystem,
|
||||
Help: "Duration spent by workers on a job.",
|
||||
Buckets: []float64{.1, .2, .5, 1, 2, 4, 8, 16, 32, 40, 48, 64, 96, 128, 160, 192, 224, 256, 320, 382, 448, 512, 640, 768, 896, 1024, 1280, 1536, 1792, 2049},
|
||||
}, []string{"type", "status"})
|
||||
}, []string{"type", "status", "tenant"})
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
@ -54,35 +54,35 @@ var (
|
|||
Subsystem: workerSubsystem,
|
||||
Help: "Duration a job spends on the queue.",
|
||||
Buckets: []float64{.1, .2, .5, 1, 2, 4, 8, 16, 32, 40, 48, 64, 96, 128, 160, 192, 224, 256, 320, 382, 448, 512, 640, 768, 896, 1024, 1280, 1536, 1792, 2049},
|
||||
}, []string{"type"})
|
||||
}, []string{"type", "tenant"})
|
||||
)
|
||||
|
||||
func EnqueueJobMetrics(jobType string) {
|
||||
PendingJobs.WithLabelValues(jobType).Inc()
|
||||
func EnqueueJobMetrics(jobType, tenant string) {
|
||||
PendingJobs.WithLabelValues(jobType, tenant).Inc()
|
||||
}
|
||||
|
||||
func DequeueJobMetrics(pending time.Time, started time.Time, jobType string) {
|
||||
func DequeueJobMetrics(pending time.Time, started time.Time, jobType, tenant string) {
|
||||
if !started.IsZero() && !pending.IsZero() {
|
||||
diff := started.Sub(pending).Seconds()
|
||||
JobWaitDuration.WithLabelValues(jobType).Observe(diff)
|
||||
PendingJobs.WithLabelValues(jobType).Dec()
|
||||
RunningJobs.WithLabelValues(jobType).Inc()
|
||||
JobWaitDuration.WithLabelValues(jobType, tenant).Observe(diff)
|
||||
PendingJobs.WithLabelValues(jobType, tenant).Dec()
|
||||
RunningJobs.WithLabelValues(jobType, tenant).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
func CancelJobMetrics(started time.Time, jobType string) {
|
||||
func CancelJobMetrics(started time.Time, jobType string, tenant string) {
|
||||
if !started.IsZero() {
|
||||
RunningJobs.WithLabelValues(jobType).Dec()
|
||||
RunningJobs.WithLabelValues(jobType, tenant).Dec()
|
||||
} else {
|
||||
PendingJobs.WithLabelValues(jobType).Dec()
|
||||
PendingJobs.WithLabelValues(jobType, tenant).Dec()
|
||||
}
|
||||
}
|
||||
|
||||
func FinishJobMetrics(started time.Time, finished time.Time, canceled bool, jobType string, status clienterrors.StatusCode) {
|
||||
func FinishJobMetrics(started time.Time, finished time.Time, canceled bool, jobType, tenant string, status clienterrors.StatusCode) {
|
||||
if !finished.IsZero() && !canceled {
|
||||
diff := finished.Sub(started).Seconds()
|
||||
JobDuration.WithLabelValues(jobType, status.ToString()).Observe(diff)
|
||||
TotalJobs.WithLabelValues(jobType, status.ToString()).Inc()
|
||||
RunningJobs.WithLabelValues(jobType).Dec()
|
||||
JobDuration.WithLabelValues(jobType, status.ToString(), tenant).Observe(diff)
|
||||
TotalJobs.WithLabelValues(jobType, status.ToString(), tenant).Inc()
|
||||
RunningJobs.WithLabelValues(jobType, tenant).Dec()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ func (s *Server) EnqueueManifestJobByID(job *ManifestJobByID, parent uuid.UUID,
|
|||
}
|
||||
|
||||
func (s *Server) enqueue(jobType string, job interface{}, dependencies []uuid.UUID, channel string) (uuid.UUID, error) {
|
||||
prometheus.EnqueueJobMetrics(jobType)
|
||||
prometheus.EnqueueJobMetrics(jobType, channel)
|
||||
return s.jobs.Enqueue(jobType, job, dependencies, channel)
|
||||
}
|
||||
|
||||
|
|
@ -166,7 +166,7 @@ func (s *Server) CheckBuildDependencies(dep uuid.UUID, jobErr *clienterrors.Erro
|
|||
}
|
||||
|
||||
func (s *Server) OSBuildJobStatus(id uuid.UUID, result *OSBuildJobResult) (*JobStatus, []uuid.UUID, error) {
|
||||
jobType, status, deps, err := s.jobStatus(id, result)
|
||||
jobType, _, status, deps, err := s.jobStatus(id, result)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
|
@ -194,7 +194,7 @@ func (s *Server) OSBuildJobStatus(id uuid.UUID, result *OSBuildJobResult) (*JobS
|
|||
}
|
||||
|
||||
func (s *Server) OSBuildKojiJobStatus(id uuid.UUID, result *OSBuildKojiJobResult) (*JobStatus, []uuid.UUID, error) {
|
||||
jobType, status, deps, err := s.jobStatus(id, result)
|
||||
jobType, _, status, deps, err := s.jobStatus(id, result)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
|
@ -217,7 +217,7 @@ func (s *Server) OSBuildKojiJobStatus(id uuid.UUID, result *OSBuildKojiJobResult
|
|||
}
|
||||
|
||||
func (s *Server) KojiInitJobStatus(id uuid.UUID, result *KojiInitJobResult) (*JobStatus, []uuid.UUID, error) {
|
||||
jobType, status, deps, err := s.jobStatus(id, result)
|
||||
jobType, _, status, deps, err := s.jobStatus(id, result)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
|
@ -234,7 +234,7 @@ func (s *Server) KojiInitJobStatus(id uuid.UUID, result *KojiInitJobResult) (*Jo
|
|||
}
|
||||
|
||||
func (s *Server) KojiFinalizeJobStatus(id uuid.UUID, result *KojiFinalizeJobResult) (*JobStatus, []uuid.UUID, error) {
|
||||
jobType, status, deps, err := s.jobStatus(id, result)
|
||||
jobType, _, status, deps, err := s.jobStatus(id, result)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
|
@ -251,7 +251,7 @@ func (s *Server) KojiFinalizeJobStatus(id uuid.UUID, result *KojiFinalizeJobResu
|
|||
}
|
||||
|
||||
func (s *Server) DepsolveJobStatus(id uuid.UUID, result *DepsolveJobResult) (*JobStatus, []uuid.UUID, error) {
|
||||
jobType, status, deps, err := s.jobStatus(id, result)
|
||||
jobType, _, status, deps, err := s.jobStatus(id, result)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
|
@ -272,7 +272,7 @@ func (s *Server) DepsolveJobStatus(id uuid.UUID, result *DepsolveJobResult) (*Jo
|
|||
}
|
||||
|
||||
func (s *Server) ManifestJobStatus(id uuid.UUID, result *ManifestJobByIDResult) (*JobStatus, []uuid.UUID, error) {
|
||||
jobType, status, deps, err := s.jobStatus(id, result)
|
||||
jobType, _, status, deps, err := s.jobStatus(id, result)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
|
@ -284,20 +284,20 @@ func (s *Server) ManifestJobStatus(id uuid.UUID, result *ManifestJobByIDResult)
|
|||
return status, deps, nil
|
||||
}
|
||||
|
||||
func (s *Server) jobStatus(id uuid.UUID, result interface{}) (string, *JobStatus, []uuid.UUID, error) {
|
||||
jobType, rawResult, queued, started, finished, canceled, deps, err := s.jobs.JobStatus(id)
|
||||
func (s *Server) jobStatus(id uuid.UUID, result interface{}) (string, string, *JobStatus, []uuid.UUID, error) {
|
||||
jobType, channel, rawResult, queued, started, finished, canceled, deps, err := s.jobs.JobStatus(id)
|
||||
if err != nil {
|
||||
return "", nil, nil, err
|
||||
return "", "", nil, nil, err
|
||||
}
|
||||
|
||||
if result != nil && !finished.IsZero() && !canceled {
|
||||
err = json.Unmarshal(rawResult, result)
|
||||
if err != nil {
|
||||
return "", nil, nil, fmt.Errorf("error unmarshaling result for job '%s': %v", id, err)
|
||||
return "", "", nil, nil, fmt.Errorf("error unmarshaling result for job '%s': %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
return jobType, &JobStatus{
|
||||
return jobType, channel, &JobStatus{
|
||||
Queued: queued,
|
||||
Started: started,
|
||||
Finished: finished,
|
||||
|
|
@ -350,11 +350,11 @@ func (s *Server) JobType(id uuid.UUID) (string, error) {
|
|||
}
|
||||
|
||||
func (s *Server) Cancel(id uuid.UUID) error {
|
||||
jobType, status, _, err := s.jobStatus(id, nil)
|
||||
jobType, channel, status, _, err := s.jobStatus(id, nil)
|
||||
if err != nil {
|
||||
logrus.Errorf("error getting job status: %v", err)
|
||||
} else {
|
||||
prometheus.CancelJobMetrics(status.Started, jobType)
|
||||
prometheus.CancelJobMetrics(status.Started, jobType, channel)
|
||||
}
|
||||
return s.jobs.CancelJob(id)
|
||||
}
|
||||
|
|
@ -366,7 +366,7 @@ func (s *Server) JobArtifact(id uuid.UUID, name string) (io.Reader, int64, error
|
|||
return nil, 0, errors.New("Artifacts not enabled")
|
||||
}
|
||||
|
||||
_, status, _, err := s.jobStatus(id, nil)
|
||||
_, _, status, _, err := s.jobStatus(id, nil)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
|
@ -395,7 +395,7 @@ func (s *Server) DeleteArtifacts(id uuid.UUID) error {
|
|||
return errors.New("Artifacts not enabled")
|
||||
}
|
||||
|
||||
_, status, _, err := s.jobStatus(id, nil)
|
||||
_, _, status, _, err := s.jobStatus(id, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
@ -449,10 +449,11 @@ func (s *Server) requestJob(ctx context.Context, arch string, jobTypes []string,
|
|||
return
|
||||
}
|
||||
|
||||
jobType, status, _, err := s.jobStatus(jobId, nil)
|
||||
jobType, channel, status, _, err := s.jobStatus(jobId, nil)
|
||||
if err != nil {
|
||||
logrus.Errorf("error retrieving job status: %v", err)
|
||||
return
|
||||
} else {
|
||||
prometheus.DequeueJobMetrics(status.Queued, status.Started, jobType, channel)
|
||||
}
|
||||
|
||||
// Record how long the job has been pending for, that is either how
|
||||
|
|
@ -465,7 +466,7 @@ func (s *Server) requestJob(ctx context.Context, arch string, jobTypes []string,
|
|||
// TODO: include type of arguments
|
||||
var result json.RawMessage
|
||||
var finished time.Time
|
||||
_, result, _, _, finished, _, _, err = s.jobs.JobStatus(depID)
|
||||
_, _, result, _, _, finished, _, _, err = s.jobs.JobStatus(depID)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
|
@ -484,7 +485,7 @@ func (s *Server) requestJob(ctx context.Context, arch string, jobTypes []string,
|
|||
|
||||
// TODO: Drop the ':$architecture' for metrics too, first prometheus queries for alerts and
|
||||
// dashboards need to be adjusted.
|
||||
prometheus.DequeueJobMetrics(pending, status.Started, jobType)
|
||||
prometheus.DequeueJobMetrics(pending, status.Started, jobType, channel)
|
||||
if jobType == "osbuild:"+arch {
|
||||
jobType = "osbuild"
|
||||
} else if jobType == "osbuild-koji:"+arch {
|
||||
|
|
@ -516,12 +517,12 @@ func (s *Server) FinishJob(token uuid.UUID, result json.RawMessage) error {
|
|||
}
|
||||
|
||||
var jobResult JobResult
|
||||
jobType, status, _, err := s.jobStatus(jobId, &jobResult)
|
||||
jobType, channel, status, _, err := s.jobStatus(jobId, &jobResult)
|
||||
if err != nil {
|
||||
logrus.Errorf("error finding job status: %v", err)
|
||||
} else {
|
||||
statusCode := clienterrors.GetStatusCode(jobResult.JobError)
|
||||
prometheus.FinishJobMetrics(status.Started, status.Finished, status.Canceled, jobType, statusCode)
|
||||
prometheus.FinishJobMetrics(status.Started, status.Finished, status.Canceled, jobType, channel, statusCode)
|
||||
}
|
||||
|
||||
// Move artifacts from the temporary location to the final job
|
||||
|
|
@ -664,7 +665,7 @@ func (h *apiHandlers) GetJob(ctx echo.Context, tokenstr string) error {
|
|||
|
||||
h.server.jobs.RefreshHeartbeat(token)
|
||||
|
||||
_, status, _, err := h.server.jobStatus(jobId, nil)
|
||||
_, _, status, _, err := h.server.jobStatus(jobId, nil)
|
||||
if err != nil {
|
||||
return api.HTTPErrorWithInternal(api.ErrorRetrievingJobStatus, err)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue