composer uses the success field to decide whether a build succeeded or failed. This is bad. Unfortunately, fixing this requires kinda big code changes. This commit changes the worker's behaviour to set the osbuild success flag to false even on errors which weren't caused by osbuild (e.g. an upload error). This is certainly hacky but I think it's still essential to tell the user that an error occurred. Fixes #789
275 lines
6.7 KiB
Go
275 lines
6.7 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"crypto/tls"
|
|
"crypto/x509"
|
|
"errors"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"log"
|
|
"os"
|
|
"path"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/osbuild/osbuild-composer/internal/common"
|
|
"github.com/osbuild/osbuild-composer/internal/target"
|
|
"github.com/osbuild/osbuild-composer/internal/upload/awsupload"
|
|
"github.com/osbuild/osbuild-composer/internal/upload/azure"
|
|
"github.com/osbuild/osbuild-composer/internal/worker"
|
|
)
|
|
|
|
type connectionConfig struct {
|
|
CACertFile string
|
|
ClientKeyFile string
|
|
ClientCertFile string
|
|
}
|
|
|
|
func createTLSConfig(config *connectionConfig) (*tls.Config, error) {
|
|
caCertPEM, err := ioutil.ReadFile(config.CACertFile)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
roots := x509.NewCertPool()
|
|
ok := roots.AppendCertsFromPEM(caCertPEM)
|
|
if !ok {
|
|
return nil, errors.New("failed to append root certificate")
|
|
}
|
|
|
|
cert, err := tls.LoadX509KeyPair(config.ClientCertFile, config.ClientKeyFile)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &tls.Config{
|
|
RootCAs: roots,
|
|
Certificates: []tls.Certificate{cert},
|
|
}, nil
|
|
}
|
|
|
|
type TargetsError struct {
|
|
Errors []error
|
|
}
|
|
|
|
func (e *TargetsError) Error() string {
|
|
errString := fmt.Sprintf("%d target(s) errored:\n", len(e.Errors))
|
|
|
|
for _, err := range e.Errors {
|
|
errString += err.Error() + "\n"
|
|
}
|
|
|
|
return errString
|
|
}
|
|
|
|
func RunJob(job *worker.Job, store string, uploadFunc func(uuid.UUID, string, io.Reader) error) (*common.ComposeResult, error) {
|
|
outputDirectory, err := ioutil.TempDir("/var/tmp", "osbuild-worker-*")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error creating temporary output directory: %v", err)
|
|
}
|
|
defer func() {
|
|
err := os.RemoveAll(outputDirectory)
|
|
if err != nil {
|
|
log.Printf("Error removing temporary output directory (%s): %v", outputDirectory, err)
|
|
}
|
|
}()
|
|
|
|
result, err := RunOSBuild(job.Manifest, store, outputDirectory, os.Stderr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var r []error
|
|
|
|
for _, t := range job.Targets {
|
|
switch options := t.Options.(type) {
|
|
case *target.LocalTargetOptions:
|
|
f, err := os.Open(path.Join(outputDirectory, options.Filename))
|
|
if err != nil {
|
|
r = append(r, err)
|
|
continue
|
|
}
|
|
|
|
err = uploadFunc(job.Id, options.Filename, f)
|
|
if err != nil {
|
|
r = append(r, err)
|
|
continue
|
|
}
|
|
|
|
case *target.AWSTargetOptions:
|
|
|
|
a, err := awsupload.New(options.Region, options.AccessKeyID, options.SecretAccessKey)
|
|
if err != nil {
|
|
r = append(r, err)
|
|
continue
|
|
}
|
|
|
|
if options.Key == "" {
|
|
options.Key = job.Id.String()
|
|
}
|
|
|
|
_, err = a.Upload(path.Join(outputDirectory, options.Filename), options.Bucket, options.Key)
|
|
if err != nil {
|
|
r = append(r, err)
|
|
continue
|
|
}
|
|
|
|
/* TODO: communicate back the AMI */
|
|
_, err = a.Register(t.ImageName, options.Bucket, options.Key)
|
|
if err != nil {
|
|
r = append(r, err)
|
|
continue
|
|
}
|
|
case *target.AzureTargetOptions:
|
|
|
|
credentials := azure.Credentials{
|
|
StorageAccount: options.StorageAccount,
|
|
StorageAccessKey: options.StorageAccessKey,
|
|
}
|
|
metadata := azure.ImageMetadata{
|
|
ContainerName: options.Container,
|
|
ImageName: t.ImageName,
|
|
}
|
|
|
|
const azureMaxUploadGoroutines = 4
|
|
err := azure.UploadImage(
|
|
credentials,
|
|
metadata,
|
|
path.Join(outputDirectory, options.Filename),
|
|
azureMaxUploadGoroutines,
|
|
)
|
|
|
|
if err != nil {
|
|
r = append(r, err)
|
|
continue
|
|
}
|
|
default:
|
|
r = append(r, fmt.Errorf("invalid target type"))
|
|
}
|
|
}
|
|
|
|
err = os.RemoveAll(outputDirectory)
|
|
if err != nil {
|
|
log.Printf("Error removing osbuild output directory (%s): %v", outputDirectory, err)
|
|
}
|
|
|
|
if len(r) > 0 {
|
|
return result, &TargetsError{r}
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// Regularly ask osbuild-composer if the compose we're currently working on was
|
|
// canceled and exit the process if it was.
|
|
// It would be cleaner to kill the osbuild process using (`exec.CommandContext`
|
|
// or similar), but osbuild does not currently support this. Exiting here will
|
|
// make systemd clean up the whole cgroup and restart this service.
|
|
func WatchJob(ctx context.Context, client *worker.Client, job *worker.Job) {
|
|
for {
|
|
select {
|
|
case <-time.After(15 * time.Second):
|
|
if client.JobCanceled(job) {
|
|
log.Println("Job was canceled. Exiting.")
|
|
os.Exit(0)
|
|
}
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func main() {
|
|
var unix bool
|
|
flag.BoolVar(&unix, "unix", false, "Interpret 'address' as a path to a unix domain socket instead of a network address")
|
|
|
|
flag.Usage = func() {
|
|
fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [-unix] address\n", os.Args[0])
|
|
flag.PrintDefaults()
|
|
os.Exit(0)
|
|
}
|
|
|
|
flag.Parse()
|
|
|
|
address := flag.Arg(0)
|
|
if address == "" {
|
|
flag.Usage()
|
|
}
|
|
|
|
cacheDirectory, ok := os.LookupEnv("CACHE_DIRECTORY")
|
|
if !ok {
|
|
log.Fatal("CACHE_DIRECTORY is not set. Is the service file missing CacheDirectory=?")
|
|
}
|
|
store := path.Join(cacheDirectory, "osbuild-store")
|
|
|
|
var client *worker.Client
|
|
if unix {
|
|
client = worker.NewClientUnix(address)
|
|
} else {
|
|
conf, err := createTLSConfig(&connectionConfig{
|
|
CACertFile: "/etc/osbuild-composer/ca-crt.pem",
|
|
ClientKeyFile: "/etc/osbuild-composer/worker-key.pem",
|
|
ClientCertFile: "/etc/osbuild-composer/worker-crt.pem",
|
|
})
|
|
if err != nil {
|
|
log.Fatalf("Error creating TLS config: %v", err)
|
|
}
|
|
|
|
client = worker.NewClient(address, conf)
|
|
}
|
|
|
|
for {
|
|
fmt.Println("Waiting for a new job...")
|
|
job, err := client.AddJob()
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
fmt.Printf("Running job %s\n", job.Id)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
go WatchJob(ctx, client, job)
|
|
|
|
var status common.ImageBuildState
|
|
result, err := RunJob(job, store, client.UploadImage)
|
|
if err != nil {
|
|
log.Printf(" Job failed: %v", err)
|
|
status = common.IBFailed
|
|
|
|
// If the error comes from osbuild, retrieve the result
|
|
if osbuildError, ok := err.(*OSBuildError); ok {
|
|
result = osbuildError.Result
|
|
}
|
|
|
|
// Ensure we always have a non-nil result, composer doesn't like nils.
|
|
// This can happen in cases when OSBuild crashes and doesn't produce
|
|
// a meaningful output. E.g. when the machine runs of disk space.
|
|
if result == nil {
|
|
result = &common.ComposeResult{
|
|
Success: false,
|
|
}
|
|
}
|
|
|
|
// set the success to false on every error. This is hacky but composer
|
|
// currently relies only on this flag to decide whether a compose was
|
|
// successful. There's no different way how to inform composer that
|
|
// e.g. an upload fail. Therefore, this line reuses the osbuild success
|
|
// flag to indicate all error kinds.
|
|
result.Success = false
|
|
} else {
|
|
log.Printf(" 🎉 Job completed successfully: %s", job.Id)
|
|
status = common.IBFinished
|
|
}
|
|
|
|
// signal to WatchJob() that it can stop watching
|
|
cancel()
|
|
|
|
err = client.UpdateJob(job, status, result)
|
|
if err != nil {
|
|
log.Fatalf("Error reporting job result: %v", err)
|
|
}
|
|
}
|
|
}
|