osbuild-service-maintenance: implement removal of security groups

Security groups of instances that are terminated should be removed.
HMS-3632
This commit is contained in:
Florian Schüller 2024-12-03 17:55:59 +01:00 committed by Florian Schüller
parent 7ebe266d3c
commit a96ea533c0
4 changed files with 186 additions and 15 deletions

View file

@ -24,6 +24,8 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
var a *awscloud.AWS
var err error
ctx := context.Background()
if accessKeyID != "" && accessKey != "" {
a, err = awscloud.New(region, accessKeyID, accessKey, "")
if err != nil {
@ -83,7 +85,7 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
continue
}
if err = sem.Acquire(context.Background(), 1); err != nil {
if err = sem.Acquire(ctx, 1); err != nil {
logrus.Errorf("Error acquiring semaphore: %v", err)
continue
}
@ -102,6 +104,28 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
wg.Wait()
}
// using err to collect both errors as we want to
// continue execution if one cleanup fails
err = nil
errSecureInstances := terminateOrphanedSecureInstances(a, dryRun)
// keep going with other cleanup even on error
if errSecureInstances != nil {
logrus.Errorf("Error in terminating secure instances: %v, continuing other cleanup.", errSecureInstances)
err = errSecureInstances
}
errSecurityGroups := searchSGAndCleanup(ctx, a, dryRun)
if errSecurityGroups != nil {
logrus.Errorf("Error in cleaning up security groups: %v", errSecurityGroups)
if err != nil {
err = fmt.Errorf("Multiple errors while processing AWSCleanup: %w and %w.", err, errSecurityGroups)
}
}
return err
}
func terminateOrphanedSecureInstances(a *awscloud.AWS, dryRun bool) error {
// Terminate leftover secure instances
reservations, err := a.DescribeInstancesByTag("parent", "i-*")
if err != nil {
@ -123,7 +147,7 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
}
}
instanceIDs = filterReservations(instanceIDs, reservations)
instanceIDs = filterOnTooOld(instanceIDs, reservations)
logrus.Infof("Cleaning up executor instances: %v", instanceIDs)
if !dryRun {
err = a.TerminateInstances(instanceIDs)
@ -133,11 +157,10 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
} else {
logrus.Info("Dry run, didn't actually terminate any instances")
}
return nil
}
func filterReservations(instanceIDs []string, reservations []ec2types.Reservation) []string {
func filterOnTooOld(instanceIDs []string, reservations []ec2types.Reservation) []string {
for _, res := range reservations {
for _, i := range res.Instances {
if i.LaunchTime.Before(time.Now().Add(-time.Hour * 2)) {
@ -188,13 +211,50 @@ func checkValidParent(childId string, parent []ec2types.Reservation) bool {
}
parentState := parent[0].Instances[0].State.Name
if parentState == ec2types.InstanceStateNameRunning || parentState == ec2types.InstanceStateNamePending {
if parentState != ec2types.InstanceStateNameTerminated {
return true
}
logrus.Infof("Instance %s has a parent (%s) in state %s, so we'll terminate %s.", childId, *parent[0].Instances[0].InstanceId, parentState, childId)
return false
}
func searchSGAndCleanup(ctx context.Context, a *awscloud.AWS, dryRun bool) error {
securityGroups, err := a.DescribeSecurityGroupsByPrefix(ctx, "SG for i-")
if err != nil {
return err
}
for _, sg := range securityGroups {
if sg.GroupId == nil || sg.GroupName == nil {
logrus.Errorf(
"Security Group needs to have a GroupId (%v) and a GroupName (%v).",
sg.GroupId,
sg.GroupName)
continue
}
reservations, err := a.DescribeInstancesBySecurityGroupID(*sg.GroupId)
if err != nil {
logrus.Errorf("Failed to describe security group %s: %v", *sg.GroupId, err)
continue
}
// If no instance is running/pending, delete the SG
if allTerminated(reservations) {
logrus.Infof("Deleting security group: %s (%s)", *sg.GroupName, *sg.GroupId)
if !dryRun {
err := a.DeleteSecurityGroupById(ctx, sg.GroupId)
if err != nil {
logrus.Errorf("Failed to delete security group %s: %v", *sg.GroupId, err)
}
}
} else {
logrus.Debugf("Security group %s has non terminated instances associated with it.", *sg.GroupId)
}
}
return nil
}
// allTerminated returns true if any instance of the reservations is not terminated
// then it's considered "in use"
func allTerminated(reservations []ec2types.Reservation) bool {

View file

@ -17,7 +17,7 @@ func TestFilterReservations(t *testing.T) {
Instances: []ec2types.Instance{
{
LaunchTime: common.ToPtr(time.Now().Add(-time.Hour * 24)),
InstanceId: common.ToPtr("not filtered"),
InstanceId: common.ToPtr("not filtered 1"),
},
},
},
@ -25,7 +25,7 @@ func TestFilterReservations(t *testing.T) {
Instances: []ec2types.Instance{
{
LaunchTime: common.ToPtr(time.Now().Add(-time.Minute * 121)),
InstanceId: common.ToPtr("not filtered"),
InstanceId: common.ToPtr("not filtered 2"),
},
},
},
@ -33,7 +33,7 @@ func TestFilterReservations(t *testing.T) {
Instances: []ec2types.Instance{
{
LaunchTime: common.ToPtr(time.Now().Add(-time.Minute * 119)),
InstanceId: common.ToPtr("filtered"),
InstanceId: common.ToPtr("filtered 1"),
},
},
},
@ -41,12 +41,87 @@ func TestFilterReservations(t *testing.T) {
Instances: []ec2types.Instance{
{
LaunchTime: common.ToPtr(time.Now()),
InstanceId: common.ToPtr("filtered"),
InstanceId: common.ToPtr("filtered 2"),
},
},
},
}
instanceIDs := main.FilterReservations(reservations)
require.Equal(t, []string{"not filtered", "not filtered"}, instanceIDs)
instanceIDs := main.FilterOnTooOld([]string{}, reservations)
require.Equal(t, []string{"not filtered 1", "not filtered 2"}, instanceIDs)
}
func TestCheckValidParent(t *testing.T) {
testInstanceID := "TestInstance"
tests :=
[]struct {
parent []ec2types.Reservation
result bool
}{
// no parent
{
parent: []ec2types.Reservation{},
result: false,
},
// many parents - "valid" to leave as is
{
parent: []ec2types.Reservation{
{}, {},
},
result: true,
},
// no parent instance
{
parent: []ec2types.Reservation{
{Instances: []ec2types.Instance{}},
},
result: false,
},
// many parent instances - "valid" to leave as is
{
parent: []ec2types.Reservation{
{Instances: []ec2types.Instance{{}, {}}},
},
result: true,
},
// pending parent
{
parent: []ec2types.Reservation{
{Instances: []ec2types.Instance{{
InstanceId: &testInstanceID,
State: &ec2types.InstanceState{
Name: ec2types.InstanceStateNamePending,
},
}}},
},
result: true,
},
// running parent
{
parent: []ec2types.Reservation{
{Instances: []ec2types.Instance{{
InstanceId: &testInstanceID,
State: &ec2types.InstanceState{
Name: ec2types.InstanceStateNameRunning,
},
}}},
},
result: true,
},
// terminated parent - not valid instance
{
parent: []ec2types.Reservation{
{Instances: []ec2types.Instance{{
InstanceId: &testInstanceID,
State: &ec2types.InstanceState{
Name: ec2types.InstanceStateNameTerminated,
},
}}},
},
result: false,
},
}
for _, tc := range tests {
require.Equal(t, tc.result, main.CheckValidParent("testChildId", tc.parent))
}
}

View file

@ -1,3 +1,4 @@
package main
var FilterReservations = filterReservations
var FilterOnTooOld = filterOnTooOld
var CheckValidParent = checkValidParent