cloud/aws: add a third secure instance fallback across AZs

In case the on demand option failed as well, retry one more time across
availability zones. This significantly increases the pool of available
instances, but increases network related costs, as transferring data
between AZs is not free.
This commit is contained in:
Sanne Raymaekers 2024-10-07 12:49:25 +02:00
parent 78d3b2fde5
commit 905df418aa
2 changed files with 11 additions and 4 deletions

View file

@ -552,8 +552,15 @@ func (a *AWS) createFleet(input *ec2.CreateFleetInput) (*ec2.CreateFleetOutput,
input.SpotOptions = nil
createFleetOutput, err = a.ec2.CreateFleet(context.Background(), input)
}
if len(createFleetOutput.Errors) > 0 && *createFleetOutput.Errors[0].ErrorCode == "UnfulfillableCapacity" {
logrus.Warn("Received UnfulfillableCapacity from CreateFleet with OnDemand instance option, retrying across availability zones")
input.LaunchTemplateConfigs[0].Overrides = nil
createFleetOutput, err = a.ec2.CreateFleet(context.Background(), input)
}
if err != nil {
return nil, fmt.Errorf("Unable to create on-demand fleet: %w", err)
return nil, fmt.Errorf("Unable to create fleet, tried on-demand and across AZs: %w", err)
}
if len(createFleetOutput.Errors) > 0 {

View file

@ -142,12 +142,12 @@ func TestSICreateFleetFailures(t *testing.T) {
aws := awscloud.NewForTest(m, &ec2imdsmock{t, "instance-id", "region1"}, nil, nil, nil)
require.NotNil(t, aws)
// unfillable capacity should call create fleet twice
// unfillable capacity should call create fleet thrice
m.failFn["CreateFleet"] = nil
si, err := aws.RunSecureInstance("iam-profile", "key-name", "cw-group", "hostname")
require.Error(t, err)
require.Nil(t, si)
require.Equal(t, 2, m.calledFn["CreateFleet"])
require.Equal(t, 3, m.calledFn["CreateFleet"])
require.Equal(t, 1, m.calledFn["CreateSecurityGroup"])
require.Equal(t, 1, m.calledFn["CreateLaunchTemplate"])
require.Equal(t, 2, m.calledFn["DeleteSecurityGroup"])
@ -158,7 +158,7 @@ func TestSICreateFleetFailures(t *testing.T) {
si, err = aws.RunSecureInstance("iam-profile", "key-name", "cw-group", "hostname")
require.Error(t, err)
require.Nil(t, si)
require.Equal(t, 3, m.calledFn["CreateFleet"])
require.Equal(t, 4, m.calledFn["CreateFleet"])
require.Equal(t, 2, m.calledFn["CreateSecurityGroup"])
require.Equal(t, 2, m.calledFn["CreateLaunchTemplate"])
require.Equal(t, 4, m.calledFn["DeleteSecurityGroup"])