worker/server: set a job error when heartbeat gets missing
Previously, we just used an empty struct when heartbeat failed. This is fine for the osbuild job because it's treated as a failed one when result.OSBuildResult == false which is the default value. koji-finalize works differently though: It's in a failed state if there's an job error of kojiError != "". So when failed heartbeat set the struct to be empty, this was treated as success because there's no error. Let's fix this by introducing a new error for the situation where we don't get a heartbeat in time for a specific job.
This commit is contained in:
parent
358e58f3d3
commit
0693274ffe
2 changed files with 12 additions and 1 deletions
|
|
@ -27,6 +27,7 @@ const (
|
|||
ErrorEmptyPackageSpecs ClientErrorCode = 24
|
||||
ErrorDNFRepoError ClientErrorCode = 25
|
||||
ErrorJobDependency ClientErrorCode = 26
|
||||
ErrorJobMissingHeartbeat ClientErrorCode = 27
|
||||
)
|
||||
|
||||
type ClientErrorCode int
|
||||
|
|
|
|||
|
|
@ -100,7 +100,17 @@ func (s *Server) WatchHeartbeats() {
|
|||
for _, token := range s.jobs.Heartbeats(time.Second * 120) {
|
||||
id, _ := s.jobs.IdFromToken(token)
|
||||
logrus.Infof("Removing unresponsive job: %s\n", id)
|
||||
err := s.FinishJob(token, nil)
|
||||
|
||||
missingHeartbeatResult := JobResult{
|
||||
JobError: clienterrors.WorkerClientError(clienterrors.ErrorJobMissingHeartbeat, "Worker running this job stopped responding."),
|
||||
}
|
||||
|
||||
resJson, err := json.Marshal(missingHeartbeatResult)
|
||||
if err != nil {
|
||||
logrus.Panicf("Cannot marshal the heartbeat error: %v", err)
|
||||
}
|
||||
|
||||
err = s.FinishJob(token, resJson)
|
||||
if err != nil {
|
||||
logrus.Errorf("Error finishing unresponsive job: %v", err)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue