osbuild-service-maintenance: Clean up expired images

This commit is contained in:
sanne 2021-11-19 16:26:09 +01:00 committed by Tom Gundersen
parent 742e0e6616
commit c43ad2b22a
23 changed files with 899 additions and 32 deletions

View file

@ -0,0 +1,72 @@
package main
import (
"context"
"sync"
"time"
"github.com/sirupsen/logrus"
"golang.org/x/sync/semaphore"
"github.com/osbuild/osbuild-composer/internal/cloud/awscloud"
)
func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey, region string, cutoff time.Time) error {
a, err := awscloud.New(region, accessKeyID, accessKey, "")
if err != nil {
return err
}
var wg sync.WaitGroup
sem := semaphore.NewWeighted(int64(maxConcurrentRequests))
images, err := a.DescribeImagesByTag("Name", "composer-api-*")
if err != nil {
return err
}
for index, image := range images {
// TODO are these actual concerns?
if image.ImageId == nil {
logrus.Infof("ImageId is nil %v", image)
continue
}
if image.CreationDate == nil {
logrus.Infof("Image %v has nil creationdate", *image.ImageId)
continue
}
created, err := time.Parse(time.RFC3339, *image.CreationDate)
if err != nil {
logrus.Infof("Unable to parse date %s for image %s", *image.CreationDate, *image.ImageId)
continue
}
if !created.Before(cutoff) {
continue
}
if dryRun {
logrus.Infof("Dry run, aws image %s in region %s, with creation date %s would be removed", *image.ImageId, region, *image.CreationDate)
continue
}
if err = sem.Acquire(context.Background(), 1); err != nil {
logrus.Errorf("Error acquiring semaphore: %v", err)
continue
}
wg.Add(1)
go func(i int) {
defer sem.Release(1)
defer wg.Done()
err := a.RemoveSnapshotAndDeregisterImage(images[i])
if err != nil {
logrus.Errorf("Cleanup for image %s in region %s failed", *images[i].ImageId, region)
}
}(index)
}
wg.Wait()
return nil
}

View file

@ -0,0 +1,55 @@
package main
import (
"fmt"
"os"
"reflect"
)
// Do not write this config to logs or stdout, it contains secrets!
type Config struct {
DryRun string `env:"DRY_RUN"`
MaxConcurrentRequests string `env:"MAX_CONCURRENT_REQUESTS"`
PGHost string `env:"PGHOST"`
PGPort string `env:"PGPORT"`
PGDatabase string `env:"PGDATABASE"`
PGUser string `env:"PGUSER"`
PGPassword string `env:"PGPASSWORD"`
PGSSLMode string `env:"PGSSLMODE"`
GoogleApplicationCreds string `env:"GOOGLE_APPLICATION_CREDENTIALS"`
AWSAccessKeyID string `env:"AWS_ACCESS_KEY_ID"`
AWSSecretAccessKey string `env:"AWS_SECRET_ACCESS_KEY"`
}
// *string means the value is not required
// string means the value is required and should have a default value
func LoadConfigFromEnv(intf interface{}) error {
t := reflect.TypeOf(intf).Elem()
v := reflect.ValueOf(intf).Elem()
for i := 0; i < v.NumField(); i++ {
fieldT := t.Field(i)
fieldV := v.Field(i)
key, ok := fieldT.Tag.Lookup("env")
if !ok {
return fmt.Errorf("No env tag in config field")
}
confV, ok := os.LookupEnv(key)
kind := fieldV.Kind()
if ok {
switch kind {
case reflect.Ptr:
if fieldT.Type.Elem().Kind() != reflect.String {
return fmt.Errorf("Unsupported type")
}
fieldV.Set(reflect.ValueOf(&confV))
case reflect.String:
fieldV.SetString(confV)
default:
return fmt.Errorf("Unsupported type")
}
}
}
return nil
}

View file

@ -0,0 +1,66 @@
package main
import (
"context"
"fmt"
"sync"
"time"
"github.com/sirupsen/logrus"
"golang.org/x/sync/semaphore"
"google.golang.org/api/compute/v1"
"github.com/osbuild/osbuild-composer/internal/cloud/gcp"
)
func GCPCleanup(maxConcurrentRequests int, dryRun bool, cutoff time.Time) error {
g, err := gcp.New(nil)
if err != nil {
return err
}
sem := semaphore.NewWeighted(int64(maxConcurrentRequests))
var wg sync.WaitGroup
removeImageOlderThan := func(images *compute.ImageList) error {
for _, image := range images.Items {
created, err := time.Parse(time.RFC3339, image.CreationTimestamp)
if err != nil {
logrus.Errorf("Unable to parse image %s(%d)'s creation timestamp: %v", image.Name, image.Id, err)
continue
}
if !created.Before(cutoff) {
continue
}
if dryRun {
logrus.Infof("Dry run, gcp image %s(%d), with creation date %v would be removed", image.Name, image.Id, created)
continue
}
if err = sem.Acquire(context.Background(), 1); err != nil {
logrus.Errorf("Error acquiring semaphore: %v", err)
continue
}
wg.Add(1)
go func(id string) {
defer sem.Release(1)
defer wg.Done()
err = g.ComputeImageDelete(context.Background(), id)
if err != nil {
logrus.Errorf("Error deleting image %s created at %v", id, created)
}
}(fmt.Sprintf("%d", image.Id))
}
return nil
}
err = g.ComputeExecuteFunctionForImages(context.Background(), removeImageOlderThan)
if err != nil {
return err
}
wg.Wait()
return nil
}

View file

@ -0,0 +1,108 @@
package main
import (
"fmt"
"strconv"
"sync"
"time"
"github.com/sirupsen/logrus"
"github.com/osbuild/osbuild-composer/internal/jobqueue/dbjobqueue"
)
func main() {
logrus.SetReportCaller(true)
archs := []string{"x86_64"}
jobType := "osbuild"
// 14 days
cutoff := time.Now().Add(-(time.Hour * 24 * 14))
logrus.Infof("Cutoff date: %v", cutoff)
var conf Config
err := LoadConfigFromEnv(&conf)
if err != nil {
panic(err)
}
maxCReqs, err := strconv.Atoi(conf.MaxConcurrentRequests)
if err != nil {
panic(err)
}
dryRun, err := strconv.ParseBool(conf.DryRun)
if err != nil {
panic(err)
}
if dryRun {
logrus.Info("Dry run, no state will be changed")
}
dbURL := fmt.Sprintf("postgres://%s:%s@%s:%s/%s?sslmode=%s",
conf.PGUser,
conf.PGPassword,
conf.PGHost,
conf.PGPort,
conf.PGDatabase,
conf.PGSSLMode,
)
jobs, err := dbjobqueue.New(dbURL)
if err != nil {
panic(err)
}
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
logrus.Info("Cleaning up AWS")
err := AWSCleanup(maxCReqs, dryRun, conf.AWSAccessKeyID, conf.AWSSecretAccessKey, "us-east-1", cutoff)
if err != nil {
logrus.Errorf("AWS cleanup failed: %v", err)
}
}()
wg.Add(1)
go func() {
defer wg.Done()
logrus.Info("Cleaning up GCP")
if conf.GoogleApplicationCreds == "" {
logrus.Error("GCP credentials not specified")
return
}
err = GCPCleanup(maxCReqs, dryRun, cutoff)
if err != nil {
logrus.Errorf("GCP Cleanup failed: %v", err)
}
}()
wg.Wait()
logrus.Info("🦀🦀🦀 cloud cleanup done 🦀🦀🦀")
var jobTypes []string
for _, a := range archs {
jobTypes = append(jobTypes, fmt.Sprintf("%s:%s", jobType, a))
}
jobsByType, err := jobs.JobsUptoByType(jobTypes, cutoff)
if err != nil {
logrus.Errorf("Error querying jobs: %v", err)
return
}
for k, v := range jobsByType {
logrus.Infof("Deleting jobs and their dependencies of type %v", k)
if dryRun {
logrus.Infof("Dry run, skipping deletion of jobs: %v", v)
continue
}
for _, jobId := range v {
err = jobs.DeleteJobIncludingDependencies(jobId)
if err != nil {
logrus.Errorf("Error deleting job: %v", jobId)
}
}
}
logrus.Info("🦀🦀🦀 dbqueue cleanup done 🦀🦀🦀")
}