worker/server: set a job error when heartbeat gets missing

Previously, we just used an empty struct when heartbeat failed. This is fine
for the osbuild job because it's treated as a failed one when
result.OSBuildResult == false which is the default value.

koji-finalize works differently though: It's in a failed state if there's
an job error of kojiError != "". So when failed heartbeat set the struct to
be empty, this was treated as success because there's no error.

Let's fix this by introducing a new error for the situation where we don't get
a heartbeat in time for a specific job.
This commit is contained in:
Ondřej Budai 2022-06-29 12:33:05 +02:00 committed by Ondřej Budai
parent 358e58f3d3
commit 0693274ffe
2 changed files with 12 additions and 1 deletions

View file

@ -27,6 +27,7 @@ const (
ErrorEmptyPackageSpecs ClientErrorCode = 24
ErrorDNFRepoError ClientErrorCode = 25
ErrorJobDependency ClientErrorCode = 26
ErrorJobMissingHeartbeat ClientErrorCode = 27
)
type ClientErrorCode int

View file

@ -100,7 +100,17 @@ func (s *Server) WatchHeartbeats() {
for _, token := range s.jobs.Heartbeats(time.Second * 120) {
id, _ := s.jobs.IdFromToken(token)
logrus.Infof("Removing unresponsive job: %s\n", id)
err := s.FinishJob(token, nil)
missingHeartbeatResult := JobResult{
JobError: clienterrors.WorkerClientError(clienterrors.ErrorJobMissingHeartbeat, "Worker running this job stopped responding."),
}
resJson, err := json.Marshal(missingHeartbeatResult)
if err != nil {
logrus.Panicf("Cannot marshal the heartbeat error: %v", err)
}
err = s.FinishJob(token, resJson)
if err != nil {
logrus.Errorf("Error finishing unresponsive job: %v", err)
}