cloud/awscloud: rework create fleet retry logic
The current path sometimes launches two instances, which is problematic because the rest of the secure instance code expects exactly one instance. A security group could be attached to both instances, and would block the worker from launching any more SIs, as it tries to delete the old security group first, which is still held by one of the surplus SIs which didn't get terminated. Only retry if: - on "UnfulfillableCapacity" or "InsufficientInstanceCapacity" error codes; - there wasn't an instance launched anyway. If either of these checks fail, do not try to launch another one, and just fail the job.
This commit is contained in:
parent
661f39cbb9
commit
d5912259a0
3 changed files with 105 additions and 8 deletions
|
|
@ -4,6 +4,9 @@ import (
|
|||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/aws/aws-sdk-go-v2/aws"
|
||||
"github.com/aws/aws-sdk-go-v2/service/ec2"
|
||||
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/osbuild/osbuild-composer/internal/cloud/awscloud"
|
||||
|
|
@ -164,3 +167,64 @@ func TestSICreateFleetFailures(t *testing.T) {
|
|||
require.Equal(t, 4, m.calledFn["DeleteSecurityGroup"])
|
||||
require.Equal(t, 4, m.calledFn["DeleteLaunchTemplate"])
|
||||
}
|
||||
|
||||
func TestDoCreateFleetRetry(t *testing.T) {
|
||||
cfOutput := &ec2.CreateFleetOutput{
|
||||
Errors: []ec2types.CreateFleetError{
|
||||
{
|
||||
ErrorCode: aws.String("UnfulfillableCapacity"),
|
||||
ErrorMessage: aws.String("Msg"),
|
||||
},
|
||||
},
|
||||
}
|
||||
retry, fmtErrs := awscloud.DoCreateFleetRetry(cfOutput)
|
||||
require.True(t, retry)
|
||||
require.Equal(t, []string{"UnfulfillableCapacity: Msg"}, fmtErrs)
|
||||
|
||||
cfOutput = &ec2.CreateFleetOutput{
|
||||
Errors: []ec2types.CreateFleetError{
|
||||
{
|
||||
ErrorCode: aws.String("Bogus"),
|
||||
ErrorMessage: aws.String("Msg"),
|
||||
},
|
||||
{
|
||||
ErrorCode: aws.String("InsufficientInstanceCapacity"),
|
||||
ErrorMessage: aws.String("Msg"),
|
||||
},
|
||||
},
|
||||
}
|
||||
retry, fmtErrs = awscloud.DoCreateFleetRetry(cfOutput)
|
||||
require.True(t, retry)
|
||||
require.Equal(t, []string{"Bogus: Msg", "InsufficientInstanceCapacity: Msg"}, fmtErrs)
|
||||
|
||||
cfOutput = &ec2.CreateFleetOutput{
|
||||
Errors: []ec2types.CreateFleetError{
|
||||
{
|
||||
ErrorCode: aws.String("Bogus"),
|
||||
ErrorMessage: aws.String("Msg"),
|
||||
},
|
||||
},
|
||||
}
|
||||
retry, fmtErrs = awscloud.DoCreateFleetRetry(cfOutput)
|
||||
require.False(t, retry)
|
||||
require.Equal(t, []string{"Bogus: Msg"}, fmtErrs)
|
||||
|
||||
cfOutput = &ec2.CreateFleetOutput{
|
||||
Errors: []ec2types.CreateFleetError{
|
||||
{
|
||||
ErrorCode: aws.String("InsufficientInstanceCapacity"),
|
||||
ErrorMessage: aws.String("Msg"),
|
||||
},
|
||||
},
|
||||
Instances: []ec2types.CreateFleetInstance{
|
||||
{
|
||||
InstanceIds: []string{
|
||||
"instance-id",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
retry, fmtErrs = awscloud.DoCreateFleetRetry(cfOutput)
|
||||
require.False(t, retry)
|
||||
require.Equal(t, []string{"InsufficientInstanceCapacity: Msg", "Already launched instance ([instance-id]), aborting create fleet"}, fmtErrs)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue