diff --git a/cmd/osbuild-service-maintenance/aws.go b/cmd/osbuild-service-maintenance/aws.go index bfdaa7af9..d5824a26c 100644 --- a/cmd/osbuild-service-maintenance/aws.go +++ b/cmd/osbuild-service-maintenance/aws.go @@ -24,6 +24,8 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s var a *awscloud.AWS var err error + ctx := context.Background() + if accessKeyID != "" && accessKey != "" { a, err = awscloud.New(region, accessKeyID, accessKey, "") if err != nil { @@ -83,7 +85,7 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s continue } - if err = sem.Acquire(context.Background(), 1); err != nil { + if err = sem.Acquire(ctx, 1); err != nil { logrus.Errorf("Error acquiring semaphore: %v", err) continue } @@ -102,6 +104,28 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s wg.Wait() } + // using err to collect both errors as we want to + // continue execution if one cleanup fails + err = nil + errSecureInstances := terminateOrphanedSecureInstances(a, dryRun) + // keep going with other cleanup even on error + if errSecureInstances != nil { + logrus.Errorf("Error in terminating secure instances: %v, continuing other cleanup.", errSecureInstances) + err = errSecureInstances + } + + errSecurityGroups := searchSGAndCleanup(ctx, a, dryRun) + if errSecurityGroups != nil { + logrus.Errorf("Error in cleaning up security groups: %v", errSecurityGroups) + if err != nil { + err = fmt.Errorf("Multiple errors while processing AWSCleanup: %w and %w.", err, errSecurityGroups) + } + } + + return err +} + +func terminateOrphanedSecureInstances(a *awscloud.AWS, dryRun bool) error { // Terminate leftover secure instances reservations, err := a.DescribeInstancesByTag("parent", "i-*") if err != nil { @@ -123,7 +147,7 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s } } - instanceIDs = filterReservations(instanceIDs, reservations) + instanceIDs = filterOnTooOld(instanceIDs, reservations) logrus.Infof("Cleaning up executor instances: %v", instanceIDs) if !dryRun { err = a.TerminateInstances(instanceIDs) @@ -133,11 +157,10 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s } else { logrus.Info("Dry run, didn't actually terminate any instances") } - return nil } -func filterReservations(instanceIDs []string, reservations []ec2types.Reservation) []string { +func filterOnTooOld(instanceIDs []string, reservations []ec2types.Reservation) []string { for _, res := range reservations { for _, i := range res.Instances { if i.LaunchTime.Before(time.Now().Add(-time.Hour * 2)) { @@ -188,13 +211,50 @@ func checkValidParent(childId string, parent []ec2types.Reservation) bool { } parentState := parent[0].Instances[0].State.Name - if parentState == ec2types.InstanceStateNameRunning || parentState == ec2types.InstanceStateNamePending { + if parentState != ec2types.InstanceStateNameTerminated { return true } logrus.Infof("Instance %s has a parent (%s) in state %s, so we'll terminate %s.", childId, *parent[0].Instances[0].InstanceId, parentState, childId) return false } +func searchSGAndCleanup(ctx context.Context, a *awscloud.AWS, dryRun bool) error { + securityGroups, err := a.DescribeSecurityGroupsByPrefix(ctx, "SG for i-") + if err != nil { + return err + } + + for _, sg := range securityGroups { + if sg.GroupId == nil || sg.GroupName == nil { + logrus.Errorf( + "Security Group needs to have a GroupId (%v) and a GroupName (%v).", + sg.GroupId, + sg.GroupName) + continue + } + reservations, err := a.DescribeInstancesBySecurityGroupID(*sg.GroupId) + if err != nil { + logrus.Errorf("Failed to describe security group %s: %v", *sg.GroupId, err) + continue + } + + // If no instance is running/pending, delete the SG + if allTerminated(reservations) { + logrus.Infof("Deleting security group: %s (%s)", *sg.GroupName, *sg.GroupId) + if !dryRun { + err := a.DeleteSecurityGroupById(ctx, sg.GroupId) + + if err != nil { + logrus.Errorf("Failed to delete security group %s: %v", *sg.GroupId, err) + } + } + } else { + logrus.Debugf("Security group %s has non terminated instances associated with it.", *sg.GroupId) + } + } + return nil +} + // allTerminated returns true if any instance of the reservations is not terminated // then it's considered "in use" func allTerminated(reservations []ec2types.Reservation) bool { diff --git a/cmd/osbuild-service-maintenance/aws_test.go b/cmd/osbuild-service-maintenance/aws_test.go index 32ced52ae..494c03c76 100644 --- a/cmd/osbuild-service-maintenance/aws_test.go +++ b/cmd/osbuild-service-maintenance/aws_test.go @@ -17,7 +17,7 @@ func TestFilterReservations(t *testing.T) { Instances: []ec2types.Instance{ { LaunchTime: common.ToPtr(time.Now().Add(-time.Hour * 24)), - InstanceId: common.ToPtr("not filtered"), + InstanceId: common.ToPtr("not filtered 1"), }, }, }, @@ -25,7 +25,7 @@ func TestFilterReservations(t *testing.T) { Instances: []ec2types.Instance{ { LaunchTime: common.ToPtr(time.Now().Add(-time.Minute * 121)), - InstanceId: common.ToPtr("not filtered"), + InstanceId: common.ToPtr("not filtered 2"), }, }, }, @@ -33,7 +33,7 @@ func TestFilterReservations(t *testing.T) { Instances: []ec2types.Instance{ { LaunchTime: common.ToPtr(time.Now().Add(-time.Minute * 119)), - InstanceId: common.ToPtr("filtered"), + InstanceId: common.ToPtr("filtered 1"), }, }, }, @@ -41,12 +41,87 @@ func TestFilterReservations(t *testing.T) { Instances: []ec2types.Instance{ { LaunchTime: common.ToPtr(time.Now()), - InstanceId: common.ToPtr("filtered"), + InstanceId: common.ToPtr("filtered 2"), }, }, }, } - instanceIDs := main.FilterReservations(reservations) - require.Equal(t, []string{"not filtered", "not filtered"}, instanceIDs) + instanceIDs := main.FilterOnTooOld([]string{}, reservations) + require.Equal(t, []string{"not filtered 1", "not filtered 2"}, instanceIDs) +} + +func TestCheckValidParent(t *testing.T) { + testInstanceID := "TestInstance" + tests := + []struct { + parent []ec2types.Reservation + result bool + }{ + // no parent + { + parent: []ec2types.Reservation{}, + result: false, + }, + // many parents - "valid" to leave as is + { + parent: []ec2types.Reservation{ + {}, {}, + }, + result: true, + }, + // no parent instance + { + parent: []ec2types.Reservation{ + {Instances: []ec2types.Instance{}}, + }, + result: false, + }, + // many parent instances - "valid" to leave as is + { + parent: []ec2types.Reservation{ + {Instances: []ec2types.Instance{{}, {}}}, + }, + result: true, + }, + // pending parent + { + parent: []ec2types.Reservation{ + {Instances: []ec2types.Instance{{ + InstanceId: &testInstanceID, + State: &ec2types.InstanceState{ + Name: ec2types.InstanceStateNamePending, + }, + }}}, + }, + result: true, + }, + // running parent + { + parent: []ec2types.Reservation{ + {Instances: []ec2types.Instance{{ + InstanceId: &testInstanceID, + State: &ec2types.InstanceState{ + Name: ec2types.InstanceStateNameRunning, + }, + }}}, + }, + result: true, + }, + // terminated parent - not valid instance + { + parent: []ec2types.Reservation{ + {Instances: []ec2types.Instance{{ + InstanceId: &testInstanceID, + State: &ec2types.InstanceState{ + Name: ec2types.InstanceStateNameTerminated, + }, + }}}, + }, + result: false, + }, + } + for _, tc := range tests { + require.Equal(t, tc.result, main.CheckValidParent("testChildId", tc.parent)) + } } diff --git a/cmd/osbuild-service-maintenance/export_test.go b/cmd/osbuild-service-maintenance/export_test.go index bf813e47a..563072b44 100644 --- a/cmd/osbuild-service-maintenance/export_test.go +++ b/cmd/osbuild-service-maintenance/export_test.go @@ -1,3 +1,4 @@ package main -var FilterReservations = filterReservations +var FilterOnTooOld = filterOnTooOld +var CheckValidParent = checkValidParent diff --git a/internal/cloud/awscloud/maintenance.go b/internal/cloud/awscloud/maintenance.go index a846aaaec..3e36f0181 100644 --- a/internal/cloud/awscloud/maintenance.go +++ b/internal/cloud/awscloud/maintenance.go @@ -3,6 +3,7 @@ package awscloud import ( "context" "fmt" + "strings" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/ec2" @@ -66,14 +67,14 @@ func (a *AWS) RemoveSnapshotAndDeregisterImage(image *ec2types.Image) error { return err } -func (a *AWS) DescribeInstancesByTag(tagKey, tagValue string) ([]ec2types.Reservation, error) { +func (a *AWS) describeInstancesByKeyValue(key, value string) ([]ec2types.Reservation, error) { res, err := a.ec2.DescribeInstances( context.Background(), &ec2.DescribeInstancesInput{ Filters: []ec2types.Filter{ { - Name: aws.String(fmt.Sprintf("tag:%s", tagKey)), - Values: []string{tagValue}, + Name: aws.String(key), + Values: []string{value}, }, }, }, @@ -84,6 +85,14 @@ func (a *AWS) DescribeInstancesByTag(tagKey, tagValue string) ([]ec2types.Reserv return res.Reservations, nil } +func (a *AWS) DescribeInstancesByTag(tagKey, tagValue string) ([]ec2types.Reservation, error) { + return a.describeInstancesByKeyValue(fmt.Sprintf("tag:%s", tagKey), tagValue) +} + +func (a *AWS) DescribeInstancesBySecurityGroupID(securityGroupID string) ([]ec2types.Reservation, error) { + return a.describeInstancesByKeyValue("instance.group-id", securityGroupID) +} + func (a *AWS) DescribeInstancesByInstanceID(instanceID string) ([]ec2types.Reservation, error) { res, err := a.ec2.DescribeInstances( context.Background(), @@ -106,3 +115,29 @@ func (a *AWS) TerminateInstances(instanceIDs []string) error { ) return err } + +func (a *AWS) DescribeSecurityGroupsByPrefix(ctx context.Context, prefix string) ([]ec2types.SecurityGroup, error) { + var securityGroups []ec2types.SecurityGroup + + sgOutput, err := a.ec2.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{}) + if err != nil { + return securityGroups, fmt.Errorf("failed to describe security groups: %w", err) + } + + for _, sg := range sgOutput.SecurityGroups { + if sg.GroupName != nil && strings.HasPrefix(*sg.GroupName, prefix) { + securityGroups = append(securityGroups, sg) + } + } + return securityGroups, nil +} + +func (a *AWS) DeleteSecurityGroupById(ctx context.Context, sgID *string) error { + _, err := a.ec2.DeleteSecurityGroup( + ctx, + &ec2.DeleteSecurityGroupInput{ + GroupId: sgID, + }, + ) + return err +}