From 299a5e52ab20d484180affe7f4273ef17cdc50d7 Mon Sep 17 00:00:00 2001 From: Lars Karlitski Date: Sat, 31 Oct 2020 10:47:42 +0100 Subject: [PATCH] worker: use OSBuildJobResult consistently Workers reported status via an `osbuild.Result`, which only includes osbuild output. Make it report OSBuildJobResult instead, which was meant to be used for this purpose and is already used as the result type in the jobqueue. While at it, add any errors produced by targets into this struct, as well as an overall success flag. Note that this breaks older workers returning the result of an osbuild job to a new composer. I think this is fine in this case, for two reasons: 1. We don't support running different versions of the worker and composer in the weldr API, and remote workers aren't widely used yet. 2. Both osbuild.Result and worker.OSBuildJobResult have a top-level `Success` boolean. Thus, logs are lost in such cases, but the overall status of the compose is not. --- cmd/osbuild-worker/main.go | 74 ++++++++++++------------------------- internal/cloudapi/server.go | 2 +- internal/kojiapi/server.go | 4 +- internal/weldr/api.go | 2 +- internal/worker/client.go | 6 +-- internal/worker/json.go | 6 +-- internal/worker/server.go | 10 ++++- 7 files changed, 41 insertions(+), 63 deletions(-) diff --git a/cmd/osbuild-worker/main.go b/cmd/osbuild-worker/main.go index ff5b1146d..a41542aa3 100644 --- a/cmd/osbuild-worker/main.go +++ b/cmd/osbuild-worker/main.go @@ -60,20 +60,6 @@ func createTLSConfig(config *connectionConfig) (*tls.Config, error) { }, nil } -type TargetsError struct { - Errors []error -} - -func (e *TargetsError) Error() string { - errString := fmt.Sprintf("%d target(s) errored:\n", len(e.Errors)) - - for _, err := range e.Errors { - errString += err.Error() + "\n" - } - - return errString -} - func packageMetadataToSignature(pkg osbuild.RPMPackageMetadata) *string { if pkg.SigGPG != "" { return &pkg.SigGPG @@ -107,7 +93,7 @@ func osbuildStagesToRPMs(stages []osbuild.StageResult) []koji.RPM { return rpms } -func RunJob(job worker.Job, store string, kojiServers map[string]koji.GSSAPICredentials) (*osbuild.Result, error) { +func RunJob(job worker.Job, store string, kojiServers map[string]koji.GSSAPICredentials) (*worker.OSBuildJobResult, error) { outputDirectory, err := ioutil.TempDir("/var/tmp", "osbuild-worker-*") if err != nil { return nil, fmt.Errorf("error creating temporary output directory: %v", err) @@ -126,14 +112,14 @@ func RunJob(job worker.Job, store string, kojiServers map[string]koji.GSSAPICred start_time := time.Now() - result, err := RunOSBuild(args.Manifest, store, outputDirectory, os.Stderr) + osbuildOutput, err := RunOSBuild(args.Manifest, store, outputDirectory, os.Stderr) if err != nil { return nil, err } end_time := time.Now() - if result.Success && args.ImageName != "" { + if osbuildOutput.Success && args.ImageName != "" { var f *os.File imagePath := path.Join(outputDirectory, args.ImageName) if args.StreamOptimized { @@ -158,7 +144,7 @@ func RunJob(job worker.Job, store string, kojiServers map[string]koji.GSSAPICred for _, t := range args.Targets { switch options := t.Options.(type) { case *target.LocalTargetOptions: - if !result.Success { + if !osbuildOutput.Success { continue } var f *os.File @@ -184,7 +170,7 @@ func RunJob(job worker.Job, store string, kojiServers map[string]koji.GSSAPICred } case *target.AWSTargetOptions: - if !result.Success { + if !osbuildOutput.Success { continue } a, err := awsupload.New(options.Region, options.AccessKeyID, options.SecretAccessKey) @@ -211,7 +197,7 @@ func RunJob(job worker.Job, store string, kojiServers map[string]koji.GSSAPICred continue } case *target.AzureTargetOptions: - if !result.Success { + if !osbuildOutput.Success { continue } credentials := azure.Credentials{ @@ -263,7 +249,7 @@ func RunJob(job worker.Job, store string, kojiServers map[string]koji.GSSAPICred } }() - if result.Success == false { + if osbuildOutput.Success == false { err = k.CGFailBuild(int(options.BuildID), options.Token) if err != nil { log.Printf("CGFailBuild failed: %v", err) @@ -314,7 +300,7 @@ func RunJob(job worker.Job, store string, kojiServers map[string]koji.GSSAPICred Arch: common.CurrentArch(), }, Tools: []koji.Tool{}, - RPMs: osbuildStagesToRPMs(result.Build.Stages), + RPMs: osbuildStagesToRPMs(osbuildOutput.Build.Stages), }, } output := []koji.Image{ @@ -326,7 +312,7 @@ func RunJob(job worker.Job, store string, kojiServers map[string]koji.GSSAPICred ChecksumType: "md5", MD5: hash, Type: "image", - RPMs: osbuildStagesToRPMs(result.Stages), + RPMs: osbuildStagesToRPMs(osbuildOutput.Stages), Extra: koji.ImageExtra{ Info: koji.ImageExtraInfo{ Arch: "noarch", @@ -345,11 +331,16 @@ func RunJob(job worker.Job, store string, kojiServers map[string]koji.GSSAPICred } } - if len(r) > 0 { - return result, &TargetsError{r} + var targetErrors []string + for _, err := range r { + targetErrors = append(targetErrors, err.Error()) } - return result, nil + return &worker.OSBuildJobResult{ + Success: osbuildOutput.Success && len(targetErrors) == 0, + OSBuildOutput: osbuildOutput, + TargetErrors: targetErrors, + }, nil } // Regularly ask osbuild-composer if the compose we're currently working on was @@ -459,38 +450,21 @@ func main() { fmt.Printf("Running job %v\n", job.Id()) - ctx, cancel := context.WithCancel(context.Background()) + ctx, cancelWatcher := context.WithCancel(context.Background()) go WatchJob(ctx, job) result, err := RunJob(job, store, kojiServers) - if err != nil || result.Success == false { - log.Printf(" Job failed: %v", err) - - // Ensure we always have a non-nil result, composer doesn't like nils. - // This can happen in cases when OSBuild crashes and doesn't produce - // a meaningful output. E.g. when the machine runs of disk space. - if result == nil { - result = &osbuild.Result{ - Success: false, - } - } - - // set the success to false on every error. This is hacky but composer - // currently relies only on this flag to decide whether a compose was - // successful. There's no different way how to inform composer that - // e.g. an upload fail. Therefore, this line reuses the osbuild success - // flag to indicate all error kinds. - result.Success = false - } else { - log.Printf(" 🎉 Job completed successfully: %v", job.Id()) + cancelWatcher() + if err != nil { + log.Printf("Job %s failed: %v", job.Id(), err) + continue } - // signal to WatchJob() that it can stop watching - cancel() - err = job.Update(result) if err != nil { log.Fatalf("Error reporting job result: %v", err) } + + log.Printf("Job %s finished", job.Id()) } } diff --git a/internal/cloudapi/server.go b/internal/cloudapi/server.go index 65e6d3579..47363f31a 100644 --- a/internal/cloudapi/server.go +++ b/internal/cloudapi/server.go @@ -249,7 +249,7 @@ func composeStatusFromJobStatus(js *worker.JobStatus) string { return StatusRunning } - if js.Result.OSBuildOutput != nil && js.Result.OSBuildOutput.Success { + if js.Result.Success { return StatusSuccess } diff --git a/internal/kojiapi/server.go b/internal/kojiapi/server.go index e3d205c53..21519d33a 100644 --- a/internal/kojiapi/server.go +++ b/internal/kojiapi/server.go @@ -240,7 +240,7 @@ func composeStatusFromJobStatus(js *worker.JobStatus) string { return "pending" } - if js.Result.OSBuildOutput != nil && js.Result.OSBuildOutput.Success { + if js.Result.Success { return "success" } @@ -260,7 +260,7 @@ func imageStatusFromJobStatus(js *worker.JobStatus) string { return "building" } - if js.Result.OSBuildOutput != nil && js.Result.OSBuildOutput.Success { + if js.Result.Success { return "success" } diff --git a/internal/weldr/api.go b/internal/weldr/api.go index bf99e1d29..d8b0f069d 100644 --- a/internal/weldr/api.go +++ b/internal/weldr/api.go @@ -207,7 +207,7 @@ func composeStateFromJobStatus(js *worker.JobStatus) ComposeState { return ComposeRunning } - if js.Result.OSBuildOutput != nil && js.Result.OSBuildOutput.Success { + if js.Result.Success { return ComposeFinished } diff --git a/internal/worker/client.go b/internal/worker/client.go index f48fd10d9..045be648e 100644 --- a/internal/worker/client.go +++ b/internal/worker/client.go @@ -14,7 +14,6 @@ import ( "github.com/google/uuid" "github.com/osbuild/osbuild-composer/internal/common" - "github.com/osbuild/osbuild-composer/internal/osbuild" "github.com/osbuild/osbuild-composer/internal/worker/api" ) @@ -26,7 +25,7 @@ type Client struct { type Job interface { Id() uuid.UUID OSBuildArgs() (*OSBuildJob, error) - Update(result *osbuild.Result) error + Update(result *OSBuildJobResult) error Canceled() (bool, error) UploadArtifact(name string, reader io.Reader) error } @@ -152,11 +151,10 @@ func (j *job) OSBuildArgs() (*OSBuildJob, error) { return &args, nil } -func (j *job) Update(result *osbuild.Result) error { +func (j *job) Update(result *OSBuildJobResult) error { var buf bytes.Buffer err := json.NewEncoder(&buf).Encode(api.UpdateJobJSONRequestBody{ Result: result, - Status: "FINISHED", }) if err != nil { panic(err) diff --git a/internal/worker/json.go b/internal/worker/json.go index 3cd8120f4..b7bfcb348 100644 --- a/internal/worker/json.go +++ b/internal/worker/json.go @@ -4,7 +4,6 @@ import ( "encoding/json" "github.com/google/uuid" - "github.com/osbuild/osbuild-composer/internal/common" "github.com/osbuild/osbuild-composer/internal/distro" "github.com/osbuild/osbuild-composer/internal/osbuild" "github.com/osbuild/osbuild-composer/internal/target" @@ -22,7 +21,9 @@ type OSBuildJob struct { } type OSBuildJobResult struct { + Success bool `json:"success"` OSBuildOutput *osbuild.Result `json:"osbuild_output,omitempty"` + TargetErrors []string `json:"target_errors,omitempty"` } // @@ -46,8 +47,7 @@ type getJobResponse struct { } type updateJobRequest struct { - Status common.ImageBuildState `json:"status"` - Result *osbuild.Result `json:"result"` + Result json.RawMessage `json:"result"` } type updateJobResponse struct { diff --git a/internal/worker/server.go b/internal/worker/server.go index 15d1550e3..6cb9e1ac2 100644 --- a/internal/worker/server.go +++ b/internal/worker/server.go @@ -96,6 +96,12 @@ func (s *Server) JobStatus(id uuid.UUID) (*JobStatus, error) { return nil, err } + // For backwards compatibility: OSBuildJobResult didn't use to have a + // top-level `Success` flag. Override it here by looking into the job. + if result.Success == false && result.OSBuildOutput != nil { + result.Success = result.OSBuildOutput.Success && len(result.TargetErrors) == 0 + } + return &JobStatus{ Queued: queued, Started: started, @@ -187,7 +193,7 @@ func (s *Server) RunningJob(token uuid.UUID) (uuid.UUID, error) { return jobId, nil } -func (s *Server) FinishJob(token uuid.UUID, result *OSBuildJobResult) error { +func (s *Server) FinishJob(token uuid.UUID, result json.RawMessage) error { s.runningMutex.Lock() defer s.runningMutex.Unlock() @@ -303,7 +309,7 @@ func (h *apiHandlers) UpdateJob(ctx echo.Context, idstr string) error { return err } - err = h.server.FinishJob(token, &OSBuildJobResult{OSBuildOutput: body.Result}) + err = h.server.FinishJob(token, body.Result) if err != nil { switch err { case ErrTokenNotExist: