cloud/awscloud: retry CreateFleet regardless of the error code

The errors returned by create fleet are not entirely clear. It seems it
also returns `InsufficientInstanceCapacity` in addition to
`UnfulfillableCapacity`. Let's just retry three times regardless of the
create fleet error, that way there's no need to chase error codes which
aren't clearly defined.
This commit is contained in:
Sanne Raymaekers 2024-10-15 13:18:56 +02:00
parent 73968236bd
commit 5eb8227bf3
2 changed files with 6 additions and 6 deletions

View file

@ -547,14 +547,14 @@ func (a *AWS) createFleet(input *ec2.CreateFleetInput) (*ec2.CreateFleetOutput,
return nil, fmt.Errorf("Unable to create spot fleet: %w", err)
}
if len(createFleetOutput.Errors) > 0 && *createFleetOutput.Errors[0].ErrorCode == "UnfulfillableCapacity" {
logrus.Warn("Received UnfulfillableCapacity from CreateFleet, retrying CreateFleet with OnDemand instance")
if len(createFleetOutput.Errors) > 0 {
logrus.Warnf("Received error %s from CreateFleet, retrying CreateFleet with OnDemand instance", *createFleetOutput.Errors[0].ErrorCode)
input.SpotOptions = nil
createFleetOutput, err = a.ec2.CreateFleet(context.Background(), input)
}
if len(createFleetOutput.Errors) > 0 && *createFleetOutput.Errors[0].ErrorCode == "UnfulfillableCapacity" {
logrus.Warn("Received UnfulfillableCapacity from CreateFleet with OnDemand instance option, retrying across availability zones")
if err == nil && len(createFleetOutput.Errors) > 0 {
logrus.Warnf("Received error %s from CreateFleet with OnDemand instance option, retrying across availability zones", *createFleetOutput.Errors[0].ErrorCode)
input.LaunchTemplateConfigs[0].Overrides = nil
createFleetOutput, err = a.ec2.CreateFleet(context.Background(), input)
}
@ -566,7 +566,7 @@ func (a *AWS) createFleet(input *ec2.CreateFleetInput) (*ec2.CreateFleetOutput,
if len(createFleetOutput.Errors) > 0 {
fleetErrs := []string{}
for _, fleetErr := range createFleetOutput.Errors {
fleetErrs = append(fleetErrs, *fleetErr.ErrorMessage)
fleetErrs = append(fleetErrs, fmt.Sprintf("%s: %s", *fleetErr.ErrorCode, *fleetErr.ErrorMessage))
}
return nil, fmt.Errorf("Unable to create fleet: %v", strings.Join(fleetErrs, "; "))
}

View file

@ -142,7 +142,7 @@ func TestSICreateFleetFailures(t *testing.T) {
aws := awscloud.NewForTest(m, &ec2imdsmock{t, "instance-id", "region1"}, nil, nil, nil)
require.NotNil(t, aws)
// unfillable capacity should call create fleet thrice
// create fleet error should call create fleet thrice
m.failFn["CreateFleet"] = nil
si, err := aws.RunSecureInstance("iam-profile", "key-name", "cw-group", "hostname")
require.Error(t, err)