osbuild-service-maintenance: implement removal of security groups
Security groups of instances that are terminated should be removed. HMS-3632
This commit is contained in:
parent
7ebe266d3c
commit
a96ea533c0
4 changed files with 186 additions and 15 deletions
|
|
@ -24,6 +24,8 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
|
|||
var a *awscloud.AWS
|
||||
var err error
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
if accessKeyID != "" && accessKey != "" {
|
||||
a, err = awscloud.New(region, accessKeyID, accessKey, "")
|
||||
if err != nil {
|
||||
|
|
@ -83,7 +85,7 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
|
|||
continue
|
||||
}
|
||||
|
||||
if err = sem.Acquire(context.Background(), 1); err != nil {
|
||||
if err = sem.Acquire(ctx, 1); err != nil {
|
||||
logrus.Errorf("Error acquiring semaphore: %v", err)
|
||||
continue
|
||||
}
|
||||
|
|
@ -102,6 +104,28 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
|
|||
wg.Wait()
|
||||
}
|
||||
|
||||
// using err to collect both errors as we want to
|
||||
// continue execution if one cleanup fails
|
||||
err = nil
|
||||
errSecureInstances := terminateOrphanedSecureInstances(a, dryRun)
|
||||
// keep going with other cleanup even on error
|
||||
if errSecureInstances != nil {
|
||||
logrus.Errorf("Error in terminating secure instances: %v, continuing other cleanup.", errSecureInstances)
|
||||
err = errSecureInstances
|
||||
}
|
||||
|
||||
errSecurityGroups := searchSGAndCleanup(ctx, a, dryRun)
|
||||
if errSecurityGroups != nil {
|
||||
logrus.Errorf("Error in cleaning up security groups: %v", errSecurityGroups)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Multiple errors while processing AWSCleanup: %w and %w.", err, errSecurityGroups)
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func terminateOrphanedSecureInstances(a *awscloud.AWS, dryRun bool) error {
|
||||
// Terminate leftover secure instances
|
||||
reservations, err := a.DescribeInstancesByTag("parent", "i-*")
|
||||
if err != nil {
|
||||
|
|
@ -123,7 +147,7 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
|
|||
}
|
||||
}
|
||||
|
||||
instanceIDs = filterReservations(instanceIDs, reservations)
|
||||
instanceIDs = filterOnTooOld(instanceIDs, reservations)
|
||||
logrus.Infof("Cleaning up executor instances: %v", instanceIDs)
|
||||
if !dryRun {
|
||||
err = a.TerminateInstances(instanceIDs)
|
||||
|
|
@ -133,11 +157,10 @@ func AWSCleanup(maxConcurrentRequests int, dryRun bool, accessKeyID, accessKey s
|
|||
} else {
|
||||
logrus.Info("Dry run, didn't actually terminate any instances")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func filterReservations(instanceIDs []string, reservations []ec2types.Reservation) []string {
|
||||
func filterOnTooOld(instanceIDs []string, reservations []ec2types.Reservation) []string {
|
||||
for _, res := range reservations {
|
||||
for _, i := range res.Instances {
|
||||
if i.LaunchTime.Before(time.Now().Add(-time.Hour * 2)) {
|
||||
|
|
@ -188,13 +211,50 @@ func checkValidParent(childId string, parent []ec2types.Reservation) bool {
|
|||
}
|
||||
|
||||
parentState := parent[0].Instances[0].State.Name
|
||||
if parentState == ec2types.InstanceStateNameRunning || parentState == ec2types.InstanceStateNamePending {
|
||||
if parentState != ec2types.InstanceStateNameTerminated {
|
||||
return true
|
||||
}
|
||||
logrus.Infof("Instance %s has a parent (%s) in state %s, so we'll terminate %s.", childId, *parent[0].Instances[0].InstanceId, parentState, childId)
|
||||
return false
|
||||
}
|
||||
|
||||
func searchSGAndCleanup(ctx context.Context, a *awscloud.AWS, dryRun bool) error {
|
||||
securityGroups, err := a.DescribeSecurityGroupsByPrefix(ctx, "SG for i-")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, sg := range securityGroups {
|
||||
if sg.GroupId == nil || sg.GroupName == nil {
|
||||
logrus.Errorf(
|
||||
"Security Group needs to have a GroupId (%v) and a GroupName (%v).",
|
||||
sg.GroupId,
|
||||
sg.GroupName)
|
||||
continue
|
||||
}
|
||||
reservations, err := a.DescribeInstancesBySecurityGroupID(*sg.GroupId)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to describe security group %s: %v", *sg.GroupId, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// If no instance is running/pending, delete the SG
|
||||
if allTerminated(reservations) {
|
||||
logrus.Infof("Deleting security group: %s (%s)", *sg.GroupName, *sg.GroupId)
|
||||
if !dryRun {
|
||||
err := a.DeleteSecurityGroupById(ctx, sg.GroupId)
|
||||
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to delete security group %s: %v", *sg.GroupId, err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
logrus.Debugf("Security group %s has non terminated instances associated with it.", *sg.GroupId)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// allTerminated returns true if any instance of the reservations is not terminated
|
||||
// then it's considered "in use"
|
||||
func allTerminated(reservations []ec2types.Reservation) bool {
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ func TestFilterReservations(t *testing.T) {
|
|||
Instances: []ec2types.Instance{
|
||||
{
|
||||
LaunchTime: common.ToPtr(time.Now().Add(-time.Hour * 24)),
|
||||
InstanceId: common.ToPtr("not filtered"),
|
||||
InstanceId: common.ToPtr("not filtered 1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -25,7 +25,7 @@ func TestFilterReservations(t *testing.T) {
|
|||
Instances: []ec2types.Instance{
|
||||
{
|
||||
LaunchTime: common.ToPtr(time.Now().Add(-time.Minute * 121)),
|
||||
InstanceId: common.ToPtr("not filtered"),
|
||||
InstanceId: common.ToPtr("not filtered 2"),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -33,7 +33,7 @@ func TestFilterReservations(t *testing.T) {
|
|||
Instances: []ec2types.Instance{
|
||||
{
|
||||
LaunchTime: common.ToPtr(time.Now().Add(-time.Minute * 119)),
|
||||
InstanceId: common.ToPtr("filtered"),
|
||||
InstanceId: common.ToPtr("filtered 1"),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -41,12 +41,87 @@ func TestFilterReservations(t *testing.T) {
|
|||
Instances: []ec2types.Instance{
|
||||
{
|
||||
LaunchTime: common.ToPtr(time.Now()),
|
||||
InstanceId: common.ToPtr("filtered"),
|
||||
InstanceId: common.ToPtr("filtered 2"),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
instanceIDs := main.FilterReservations(reservations)
|
||||
require.Equal(t, []string{"not filtered", "not filtered"}, instanceIDs)
|
||||
instanceIDs := main.FilterOnTooOld([]string{}, reservations)
|
||||
require.Equal(t, []string{"not filtered 1", "not filtered 2"}, instanceIDs)
|
||||
}
|
||||
|
||||
func TestCheckValidParent(t *testing.T) {
|
||||
testInstanceID := "TestInstance"
|
||||
tests :=
|
||||
[]struct {
|
||||
parent []ec2types.Reservation
|
||||
result bool
|
||||
}{
|
||||
// no parent
|
||||
{
|
||||
parent: []ec2types.Reservation{},
|
||||
result: false,
|
||||
},
|
||||
// many parents - "valid" to leave as is
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{}, {},
|
||||
},
|
||||
result: true,
|
||||
},
|
||||
// no parent instance
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{Instances: []ec2types.Instance{}},
|
||||
},
|
||||
result: false,
|
||||
},
|
||||
// many parent instances - "valid" to leave as is
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{Instances: []ec2types.Instance{{}, {}}},
|
||||
},
|
||||
result: true,
|
||||
},
|
||||
// pending parent
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{Instances: []ec2types.Instance{{
|
||||
InstanceId: &testInstanceID,
|
||||
State: &ec2types.InstanceState{
|
||||
Name: ec2types.InstanceStateNamePending,
|
||||
},
|
||||
}}},
|
||||
},
|
||||
result: true,
|
||||
},
|
||||
// running parent
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{Instances: []ec2types.Instance{{
|
||||
InstanceId: &testInstanceID,
|
||||
State: &ec2types.InstanceState{
|
||||
Name: ec2types.InstanceStateNameRunning,
|
||||
},
|
||||
}}},
|
||||
},
|
||||
result: true,
|
||||
},
|
||||
// terminated parent - not valid instance
|
||||
{
|
||||
parent: []ec2types.Reservation{
|
||||
{Instances: []ec2types.Instance{{
|
||||
InstanceId: &testInstanceID,
|
||||
State: &ec2types.InstanceState{
|
||||
Name: ec2types.InstanceStateNameTerminated,
|
||||
},
|
||||
}}},
|
||||
},
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
require.Equal(t, tc.result, main.CheckValidParent("testChildId", tc.parent))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
package main
|
||||
|
||||
var FilterReservations = filterReservations
|
||||
var FilterOnTooOld = filterOnTooOld
|
||||
var CheckValidParent = checkValidParent
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package awscloud
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/aws/aws-sdk-go-v2/aws"
|
||||
"github.com/aws/aws-sdk-go-v2/service/ec2"
|
||||
|
|
@ -66,14 +67,14 @@ func (a *AWS) RemoveSnapshotAndDeregisterImage(image *ec2types.Image) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (a *AWS) DescribeInstancesByTag(tagKey, tagValue string) ([]ec2types.Reservation, error) {
|
||||
func (a *AWS) describeInstancesByKeyValue(key, value string) ([]ec2types.Reservation, error) {
|
||||
res, err := a.ec2.DescribeInstances(
|
||||
context.Background(),
|
||||
&ec2.DescribeInstancesInput{
|
||||
Filters: []ec2types.Filter{
|
||||
{
|
||||
Name: aws.String(fmt.Sprintf("tag:%s", tagKey)),
|
||||
Values: []string{tagValue},
|
||||
Name: aws.String(key),
|
||||
Values: []string{value},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -84,6 +85,14 @@ func (a *AWS) DescribeInstancesByTag(tagKey, tagValue string) ([]ec2types.Reserv
|
|||
return res.Reservations, nil
|
||||
}
|
||||
|
||||
func (a *AWS) DescribeInstancesByTag(tagKey, tagValue string) ([]ec2types.Reservation, error) {
|
||||
return a.describeInstancesByKeyValue(fmt.Sprintf("tag:%s", tagKey), tagValue)
|
||||
}
|
||||
|
||||
func (a *AWS) DescribeInstancesBySecurityGroupID(securityGroupID string) ([]ec2types.Reservation, error) {
|
||||
return a.describeInstancesByKeyValue("instance.group-id", securityGroupID)
|
||||
}
|
||||
|
||||
func (a *AWS) DescribeInstancesByInstanceID(instanceID string) ([]ec2types.Reservation, error) {
|
||||
res, err := a.ec2.DescribeInstances(
|
||||
context.Background(),
|
||||
|
|
@ -106,3 +115,29 @@ func (a *AWS) TerminateInstances(instanceIDs []string) error {
|
|||
)
|
||||
return err
|
||||
}
|
||||
|
||||
func (a *AWS) DescribeSecurityGroupsByPrefix(ctx context.Context, prefix string) ([]ec2types.SecurityGroup, error) {
|
||||
var securityGroups []ec2types.SecurityGroup
|
||||
|
||||
sgOutput, err := a.ec2.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{})
|
||||
if err != nil {
|
||||
return securityGroups, fmt.Errorf("failed to describe security groups: %w", err)
|
||||
}
|
||||
|
||||
for _, sg := range sgOutput.SecurityGroups {
|
||||
if sg.GroupName != nil && strings.HasPrefix(*sg.GroupName, prefix) {
|
||||
securityGroups = append(securityGroups, sg)
|
||||
}
|
||||
}
|
||||
return securityGroups, nil
|
||||
}
|
||||
|
||||
func (a *AWS) DeleteSecurityGroupById(ctx context.Context, sgID *string) error {
|
||||
_, err := a.ec2.DeleteSecurityGroup(
|
||||
ctx,
|
||||
&ec2.DeleteSecurityGroupInput{
|
||||
GroupId: sgID,
|
||||
},
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue