worker/server: log unresponsive job removal

Re-add the logging for when unresponsive heartbeats
are being removed so we can verify that they
are correctly being logged as 5xx errors.
This commit is contained in:
Gianluca Zuccarelli 2023-01-09 16:18:02 +00:00 committed by Tomáš Hozza
parent fe554ad184
commit 08aa1e99a1

View file

@ -114,6 +114,9 @@ func (s *Server) WatchHeartbeats() {
//nolint:staticcheck // avoid SA1015, this is an endless function
for range time.Tick(time.Second * 30) {
for _, token := range s.jobs.Heartbeats(time.Second * 120) {
id, _ := s.jobs.IdFromToken(token)
logrus.Infof("Removing unresponsive job: %s\n", id)
missingHeartbeatResult := JobResult{
JobError: clienterrors.WorkerClientError(clienterrors.ErrorJobMissingHeartbeat,
fmt.Sprintf("Workers running this job stopped responding more than %d times.", maxHeartbeatRetries),