The `api.sh` test currently always defaults to "<REGION>-a" zone when creating instance using the built image. The resources in a zone may get exhausted and the solution is to use a different zone. Currently even a CI job retry won't help with mitigation of such error during a CI run. Modify `api.sh` to pick random GCP zone for a given region when creating a compute instance. Use only GCP zones which are "UP". The `cloud-cleaner` relied on the behavior of `api.sh` to always choose the "<REGION>-a" zone. Guessing the chosen zone in `cloud-cleaner` is not viable, but thankfully the instance name is by default unique for the whole GCP project. Modify `cloud-cleaner` to iterate over all available zones in the used region and try to delete the specific instance in each of them. Make `ComputeZonesInRegion` method from the `internal/cloud/gcp` package exported and use it in `cloud-cleaner` for getting the list of available zones in a region. Signed-off-by: Tomas Hozza <thozza@redhat.com>
172 lines
5.3 KiB
Go
172 lines
5.3 KiB
Go
// +build integration
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"sync"
|
|
|
|
"github.com/Azure/go-autorest/autorest/azure/auth"
|
|
|
|
"github.com/osbuild/osbuild-composer/internal/boot/azuretest"
|
|
"github.com/osbuild/osbuild-composer/internal/cloud/gcp"
|
|
"github.com/osbuild/osbuild-composer/internal/test"
|
|
)
|
|
|
|
func cleanupGCP(testID string, wg *sync.WaitGroup) {
|
|
defer wg.Done()
|
|
|
|
log.Println("[GCP] Running clean up")
|
|
|
|
GCPRegion, ok := os.LookupEnv("GCP_REGION")
|
|
if !ok {
|
|
log.Println("[GCP] Error: 'GCP_REGION' is not set in the environment.")
|
|
return
|
|
}
|
|
GCPBucket, ok := os.LookupEnv("GCP_BUCKET")
|
|
if !ok {
|
|
log.Println("[GCP] Error: 'GCP_BUCKET' is not set in the environment.")
|
|
return
|
|
}
|
|
// max 62 characters
|
|
// Must be a match of regex '[a-z](?:[-a-z0-9]{0,61}[a-z0-9])?|[1-9][0-9]{0,19}'
|
|
// use sha224sum to get predictable testID without invalid characters
|
|
testIDhash := fmt.Sprintf("%x", sha256.Sum224([]byte(testID)))
|
|
|
|
// Resource names to clean up
|
|
GCPInstance := fmt.Sprintf("vm-%s", testIDhash)
|
|
GCPImage := fmt.Sprintf("image-%s", testIDhash)
|
|
|
|
// It does not matter if there was any error. If the credentials file was
|
|
// read successfully then 'creds' should be non-nil, otherwise it will be
|
|
// nil. Both values are acceptable for creating a new "GCP" instance.
|
|
// If 'creds' is nil, then GCP library will try to authenticate using
|
|
// the instance permissions.
|
|
creds, err := gcp.GetCredentialsFromEnv()
|
|
if err != nil {
|
|
log.Printf("[GCP] Error: %v. This may not be an issue.", err)
|
|
}
|
|
|
|
// If this fails, there is no point in continuing
|
|
g, err := gcp.New(creds)
|
|
if err != nil {
|
|
log.Printf("[GCP] Error: %v", err)
|
|
return
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
// Try to delete potentially running instance
|
|
// api.sh chooses a random GCP Zone from the set Region. Since we
|
|
// don't know which one it is, iterate over all Zones in the Region
|
|
// and try to delete the instance. Unless the instance has set
|
|
// "VmDnsSetting:ZonalOnly", which we don't do, this is safe and the
|
|
// instance name must be unique for the whole GCP project.
|
|
GCPZones, err := g.ComputeZonesInRegion(ctx, GCPRegion)
|
|
if err != nil {
|
|
log.Printf("[GCP] Error: Failed to get available Zones for the '%s' Region: %v", GCPRegion, err)
|
|
return
|
|
}
|
|
for _, GCPZone := range GCPZones {
|
|
log.Printf("[GCP] 🧹 Deleting VM instance %s in %s. "+
|
|
"This should fail if the test succeeded.", GCPInstance, GCPZone)
|
|
err = g.ComputeInstanceDelete(ctx, GCPZone, GCPInstance)
|
|
if err == nil {
|
|
// If an instance with the given name was successfully deleted in one of the Zones, we are done.
|
|
break
|
|
} else {
|
|
log.Printf("[GCP] Error: %v", err)
|
|
}
|
|
}
|
|
|
|
// Try to clean up storage of cache objects after image import job
|
|
log.Println("[GCP] 🧹 Cleaning up cache objects from storage after image " +
|
|
"import. This should fail if the test succeeded.")
|
|
cacheObjects, errs := g.StorageImageImportCleanup(ctx, GCPImage)
|
|
for _, err = range errs {
|
|
log.Printf("[GCP] Error: %v", err)
|
|
}
|
|
for _, cacheObject := range cacheObjects {
|
|
log.Printf("[GCP] 🧹 Deleted image import job file %s", cacheObject)
|
|
}
|
|
|
|
// Try to find the potentially uploaded Storage objects using custom metadata
|
|
objects, err := g.StorageListObjectsByMetadata(ctx, GCPBucket, map[string]string{gcp.MetadataKeyImageName: GCPImage})
|
|
if err != nil {
|
|
log.Printf("[GCP] Error: %v", err)
|
|
}
|
|
for _, obj := range objects {
|
|
if err = g.StorageObjectDelete(ctx, obj.Bucket, obj.Name); err != nil {
|
|
log.Printf("[GCP] Error: %v", err)
|
|
}
|
|
log.Printf("[GCP] 🧹 Deleted object %s/%s related to build of image %s", obj.Bucket, obj.Name, GCPImage)
|
|
}
|
|
|
|
// Try to delete the imported image
|
|
log.Printf("[GCP] 🧹 Deleting image %s. This should fail if the test succeeded.", GCPImage)
|
|
err = g.ComputeImageDelete(ctx, GCPImage)
|
|
if err != nil {
|
|
log.Printf("[GCP] Error: %v", err)
|
|
}
|
|
}
|
|
|
|
func cleanupAzure(testID string, wg *sync.WaitGroup) {
|
|
defer wg.Done()
|
|
|
|
log.Println("[Azure] Running clean up")
|
|
|
|
// Load Azure credentials
|
|
creds, err := azuretest.GetAzureCredentialsFromEnv()
|
|
if err != nil {
|
|
log.Printf("[Azure] Error: %v", err)
|
|
return
|
|
}
|
|
if creds == nil {
|
|
log.Println("[Azure] Error: empty credentials")
|
|
return
|
|
}
|
|
|
|
// Delete the vhd image
|
|
imageName := "image-" + testID + ".vhd"
|
|
log.Println("[Azure] Deleting image. This should fail if the test succeeded.")
|
|
err = azuretest.DeleteImageFromAzure(creds, imageName)
|
|
if err != nil {
|
|
log.Printf("[Azure] Error: %v", err)
|
|
}
|
|
|
|
// Delete all remaining resources (see the full list in the CleanUpBootedVM function)
|
|
log.Println("[Azure] Cleaning up booted VM. This should fail if the test succeeded.")
|
|
parameters := azuretest.NewDeploymentParameters(creds, imageName, testID, "")
|
|
clientCredentialsConfig := auth.NewClientCredentialsConfig(creds.ClientID, creds.ClientSecret, creds.TenantID)
|
|
authorizer, err := clientCredentialsConfig.Authorizer()
|
|
if err != nil {
|
|
log.Printf("[Azure] Error: %v", err)
|
|
return
|
|
}
|
|
|
|
err = azuretest.CleanUpBootedVM(creds, parameters, authorizer, testID)
|
|
if err != nil {
|
|
log.Printf("[Azure] Error: %v", err)
|
|
}
|
|
}
|
|
|
|
func main() {
|
|
log.Println("Running a cloud cleanup")
|
|
|
|
// Get test ID
|
|
testID, err := test.GenerateCIArtifactName("")
|
|
if err != nil {
|
|
log.Fatalf("Failed to get testID: %v", err)
|
|
}
|
|
log.Printf("TEST_ID=%s", testID)
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(2)
|
|
go cleanupAzure(testID, &wg)
|
|
go cleanupGCP(testID, &wg)
|
|
wg.Wait()
|
|
}
|