osbuild-service-maintenance: implement removal of security groups
Security groups of instances that are terminated should be removed. HMS-3632
This commit is contained in:
parent
7ebe266d3c
commit
a96ea533c0
4 changed files with 186 additions and 15 deletions
|
|
@ -24,6 +24,8 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
|
|||
var a *awscloud.AWS
|
||||
var err error
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
if accessKeyID != "" && accessKey != "" {
|
||||
a, err = awscloud.New(region, accessKeyID, accessKey, "")
|
||||
if err != nil {
|
||||
|
|
@ -83,7 +85,7 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
|
|||
continue
|
||||
}
|
||||
|
||||
if err = sem.Acquire(context.Background(), 1); err != nil {
|
||||
if err = sem.Acquire(ctx, 1); err != nil {
|
||||
logrus.Errorf("Error acquiring semaphore: %v", err)
|
||||
continue
|
||||
}
|
||||
|
|
@ -102,6 +104,28 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
|
|||
wg.Wait()
|
||||
}
|
||||
|
||||
// using err to collect both errors as we want to
|
||||
// continue execution if one cleanup fails
|
||||
err = nil
|
||||
errSecureInstances := terminateOrphanedSecureInstances(a, dryRun)
|
||||
// keep going with other cleanup even on error
|
||||
if errSecureInstances != nil {
|
||||
logrus.Errorf("Error in terminating secure instances: %v, continuing other cleanup.", errSecureInstances)
|
||||
err = errSecureInstances
|
||||
}
|
||||
|
||||
errSecurityGroups := searchSGAndCleanup(ctx, a, dryRun)
|
||||
if errSecurityGroups != nil {
|
||||
logrus.Errorf("Error in cleaning up security groups: %v", errSecurityGroups)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Multiple errors while processing AWSCleanup: %w and %w.", err, errSecurityGroups)
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func terminateOrphanedSecureInstances(a *awscloud.AWS, dryRun bool) error {
|
||||
// Terminate leftover secure instances
|
||||
reservations, err := a.DescribeInstancesByTag("parent", "i-*")
|
||||
if err != nil {
|
||||
|
|
@ -123,7 +147,7 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
|
|||
}
|
||||
}
|
||||
|
||||
instanceIDs = filterReservations(instanceIDs, reservations)
|
||||
instanceIDs = filterOnTooOld(instanceIDs, reservations)
|
||||
logrus.Infof("Cleaning up executor instances: %v", instanceIDs)
|
||||
if !dryRun {
|
||||
err = a.TerminateInstances(instanceIDs)
|
||||
|
|
@ -133,11 +157,10 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
|
|||
} else {
|
||||
logrus.Info("Dry run, didn't actually terminate any instances")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func filterReservations(instanceIDs []string, reservations []ec2types.Reservation) []string {
|
||||
func filterOnTooOld(instanceIDs []string, reservations []ec2types.Reservation) []string {
|
||||
for _, res := range reservations {
|
||||
for _, i := range res.Instances {
|
||||
if i.LaunchTime.Before(time.Now().Add(-time.Hour * 2)) {
|
||||
|
|
@ -188,13 +211,50 @@ func checkValidParent(childId string, parent []ec2types.Reservation) bool {
|
|||
}
|
||||
|
||||
parentState := parent[0].Instances[0].State.Name
|
||||
if parentState == ec2types.InstanceStateNameRunning || parentState == ec2types.InstanceStateNamePending {
|
||||
if parentState != ec2types.InstanceStateNameTerminated {
|
||||
return true
|
||||
}
|
||||
logrus.Infof("Instance %s has a parent (%s) in state %s, so we'll terminate %s.", childId, *parent[0].Instances[0].InstanceId, parentState, childId)
|
||||
return false
|
||||
}
|
||||
|
||||
func searchSGAndCleanup(ctx context.Context, a *awscloud.AWS, dryRun bool) error {
|
||||
securityGroups, err := a.DescribeSecurityGroupsByPrefix(ctx, "SG for i-")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, sg := range securityGroups {
|
||||
if sg.GroupId == nil || sg.GroupName == nil {
|
||||
logrus.Errorf(
|
||||
"Security Group needs to have a GroupId (%v) and a GroupName (%v).",
|
||||
sg.GroupId,
|
||||
sg.GroupName)
|
||||
continue
|
||||
}
|
||||
reservations, err := a.DescribeInstancesBySecurityGroupID(*sg.GroupId)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to describe security group %s: %v", *sg.GroupId, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// If no instance is running/pending, delete the SG
|
||||
if allTerminated(reservations) {
|
||||
logrus.Infof("Deleting security group: %s (%s)", *sg.GroupName, *sg.GroupId)
|
||||
if !dryRun {
|
||||
err := a.DeleteSecurityGroupById(ctx, sg.GroupId)
|
||||
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to delete security group %s: %v", *sg.GroupId, err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
logrus.Debugf("Security group %s has non terminated instances associated with it.", *sg.GroupId)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// allTerminated returns true if any instance of the reservations is not terminated
|
||||
// then it's considered "in use"
|
||||
func allTerminated(reservations []ec2types.Reservation) bool {
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ func TestFilterReservations(t *testing.T) {
|
|||
Instances: []ec2types.Instance{
|
||||
{
|
||||
LaunchTime: common.ToPtr(time.Now().Add(-time.Hour * 24)),
|
||||
InstanceId: common.ToPtr("not filtered"),
|
||||
InstanceId: common.ToPtr("not filtered 1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -25,7 +25,7 @@ func TestFilterReservations(t *testing.T) {
|
|||
Instances: []ec2types.Instance{
|
||||
{
|
||||
LaunchTime: common.ToPtr(time.Now().Add(-time.Minute * 121)),
|
||||
InstanceId: common.ToPtr("not filtered"),
|
||||
InstanceId: common.ToPtr("not filtered 2"),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -33,7 +33,7 @@ func TestFilterReservations(t *testing.T) {
|
|||
Instances: []ec2types.Instance{
|
||||
{
|
||||
LaunchTime: common.ToPtr(time.Now().Add(-time.Minute * 119)),
|
||||
InstanceId: common.ToPtr("filtered"),
|
||||
InstanceId: common.ToPtr("filtered 1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -41,12 +41,87 @@ func TestFilterReservations(t *testing.T) {
|
|||
Instances: []ec2types.Instance{
|
||||
{
|
||||
LaunchTime: common.ToPtr(time.Now()),
|
||||
InstanceId: common.ToPtr("filtered"),
|
||||
InstanceId: common.ToPtr("filtered 2"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
instanceIDs := main.FilterReservations(reservations)
|
||||
require.Equal(t, []string{"not filtered", "not filtered"}, instanceIDs)
|
||||
instanceIDs := main.FilterOnTooOld([]string{}, reservations)
|
||||
require.Equal(t, []string{"not filtered 1", "not filtered 2"}, instanceIDs)
|
||||
}
|
||||
|
||||
func TestCheckValidParent(t *testing.T) {
|
||||
testInstanceID := "TestInstance"
|
||||
tests :=
|
||||
[]struct {
|
||||
parent []ec2types.Reservation
|
||||
result bool
|
||||
}{
|
||||
// no parent
|
||||
{
|
||||
parent: []ec2types.Reservation{},
|
||||
result: false,
|
||||
},
|
||||
// many parents - "valid" to leave as is
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{}, {},
|
||||
},
|
||||
result: true,
|
||||
},
|
||||
// no parent instance
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{Instances: []ec2types.Instance{}},
|
||||
},
|
||||
result: false,
|
||||
},
|
||||
// many parent instances - "valid" to leave as is
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{Instances: []ec2types.Instance{{}, {}}},
|
||||
},
|
||||
result: true,
|
||||
},
|
||||
// pending parent
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{Instances: []ec2types.Instance{{
|
||||
InstanceId: &testInstanceID,
|
||||
State: &ec2types.InstanceState{
|
||||
Name: ec2types.InstanceStateNamePending,
|
||||
},
|
||||
}}},
|
||||
},
|
||||
result: true,
|
||||
},
|
||||
// running parent
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{Instances: []ec2types.Instance{{
|
||||
InstanceId: &testInstanceID,
|
||||
State: &ec2types.InstanceState{
|
||||
Name: ec2types.InstanceStateNameRunning,
|
||||
},
|
||||
}}},
|
||||
},
|
||||
result: true,
|
||||
},
|
||||
// terminated parent - not valid instance
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{Instances: []ec2types.Instance{{
|
||||
InstanceId: &testInstanceID,
|
||||
State: &ec2types.InstanceState{
|
||||
Name: ec2types.InstanceStateNameTerminated,
|
||||
},
|
||||
}}},
|
||||
},
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
require.Equal(t, tc.result, main.CheckValidParent("testChildId", tc.parent))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
package main
|
||||
|
||||
var FilterReservations = filterReservations
|
||||
var FilterOnTooOld = filterOnTooOld
|
||||
var CheckValidParent = checkValidParent
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue