From a3ac31a4830604cf0d25f91236e092512cddab13 Mon Sep 17 00:00:00 2001 From: Jakub Rusz Date: Mon, 26 Jul 2021 18:14:59 +0200 Subject: [PATCH] ci/tests: scheduled cloud-cleaner for Azure This introduces a script to run cloud-cleaner with a schedule. It's currently working with Azure only and therefore needed a change to cloud-cleaner code as well. Using azure-cli it gets a list of resources from $AZURE_RESOURCE_GROUP and then sorts out only resources that are older than $HOURS_BACK and are not storage accounts. Then it processes the list further leaving only list with unique TEST_ID to supply to the cloud-cleaner. --- .gitlab-ci.yml | 27 ++++++++++---- cmd/cloud-cleaner/main.go | 33 ++++++++++------ schutzbot/scheduled_cloud_cleaner.sh | 56 ++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 19 deletions(-) create mode 100755 schutzbot/scheduled_cloud_cleaner.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5791198d7..ab18cb2a9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,6 +3,7 @@ stages: - rpmbuild - prepare-rhel-internal - test + - cleanup - finish .terraform: @@ -69,7 +70,7 @@ Prepare-rhel-internal: stage: prepare-rhel-internal extends: .terraform rules: - - if: '$CI_PIPELINE_SOURCE == "schedule"' + - if: '$CI_PIPELINE_SOURCE == "schedule" && $NIGHTLY == "true"' script: - schutzbot/prepare-rhel-internal.sh artifacts: @@ -90,7 +91,7 @@ Base: extends: .terraform rules: - if: '$CI_PIPELINE_SOURCE != "schedule"' - - if: '$CI_PIPELINE_SOURCE == "schedule" && $RUNNER =~ /[\S]+rhel-[8-9]\.[\S]+/' + - if: '$CI_PIPELINE_SOURCE == "schedule" && $RUNNER =~ /[\S]+rhel-[8-9]\.[\S]+/ && $NIGHTLY == "true"' script: - schutzbot/deploy.sh - /usr/libexec/tests/osbuild-composer/base_tests.sh @@ -150,7 +151,7 @@ OSTree: extends: .terraform rules: - if: '$CI_PIPELINE_SOURCE != "schedule"' - - if: '$CI_PIPELINE_SOURCE == "schedule" && $RUNNER =~ /[\S]+rhel-[8-9]\.[\S]+/' + - if: '$CI_PIPELINE_SOURCE == "schedule" && $RUNNER =~ /[\S]+rhel-[8-9]\.[\S]+/ && $NIGHTLY == "true"' script: - schutzbot/deploy.sh - /usr/libexec/tests/osbuild-composer/ostree.sh @@ -186,7 +187,7 @@ Integration: extends: .terraform rules: - if: '$CI_PIPELINE_SOURCE != "schedule"' - - if: '$CI_PIPELINE_SOURCE == "schedule" && $RUNNER =~ /[\S]+rhel-[8-9]\.[\S]+/' + - if: '$CI_PIPELINE_SOURCE == "schedule" && $RUNNER =~ /[\S]+rhel-[8-9]\.[\S]+/ && $NIGHTLY == "true"' script: - schutzbot/deploy.sh - /usr/libexec/tests/osbuild-composer/${SCRIPT} @@ -232,7 +233,7 @@ API: extends: .terraform rules: - if: '$CI_PIPELINE_SOURCE != "schedule"' - - if: '$CI_PIPELINE_SOURCE == "schedule" && $RUNNER =~ /[\S]+rhel-[8-9]\.[\S]+/' + - if: '$CI_PIPELINE_SOURCE == "schedule" && $RUNNER =~ /[\S]+rhel-[8-9]\.[\S]+/ && $NIGHTLY == "true"' script: - schutzbot/deploy.sh - /usr/libexec/tests/osbuild-composer/api.sh ${TARGET} @@ -268,7 +269,7 @@ NIGHTLY_FAIL: tags: - shell rules: - - if: '$CI_PIPELINE_SOURCE == "schedule"' + - if: '$CI_PIPELINE_SOURCE == "schedule" && $NIGHTLY == "true"' when: on_failure script: - schutzbot/slack_notification.sh FAILED ":big-sad:" @@ -278,7 +279,7 @@ NIGHTLY_SUCCESS: tags: - shell rules: - - if: '$CI_PIPELINE_SOURCE == "schedule"' + - if: '$CI_PIPELINE_SOURCE == "schedule" && $NIGHTLY == "true"' script: - schutzbot/slack_notification.sh SUCCESS ":partymeow:" @@ -296,6 +297,18 @@ Installer: - RUNNER: - openstack/rhel-8.5-x86_64 +SCHEDULED_CLOUD_CLEANER: + stage: cleanup + tags: + - terraform + variables: + RUNNER: aws/fedora-33-x86_64 + rules: + - if: '$CI_PIPELINE_SOURCE == "schedule" && $CLEANUP == "true"' + script: + - schutzbot/deploy.sh + - schutzbot/scheduled_cloud_cleaner.sh + finish: stage: finish tags: diff --git a/cmd/cloud-cleaner/main.go b/cmd/cloud-cleaner/main.go index a53c7dafe..07b9b6bed 100644 --- a/cmd/cloud-cleaner/main.go +++ b/cmd/cloud-cleaner/main.go @@ -156,17 +156,26 @@ func cleanupAzure(testID string, wg *sync.WaitGroup) { func main() { log.Println("Running a cloud cleanup") - - // Get test ID - testID, err := test.GenerateCIArtifactName("") - if err != nil { - log.Fatalf("Failed to get testID: %v", err) - } - log.Printf("TEST_ID=%s", testID) - var wg sync.WaitGroup - wg.Add(2) - go cleanupAzure(testID, &wg) - go cleanupGCP(testID, &wg) - wg.Wait() + + // Currently scheduled cloud-cleaner supports Azure only. + // In case of scheduled cleanup get testID from env and run Azure cleanup. + // If it's empty generate it and cleanup both GCP and Azure. + testID := os.Getenv("TEST_ID") + if testID == "" { + testID, err := test.GenerateCIArtifactName("") + if err != nil { + log.Fatalf("Failed to get testID: %v", err) + } + log.Printf("TEST_ID=%s", testID) + wg.Add(2) + go cleanupAzure(testID, &wg) + go cleanupGCP(testID, &wg) + wg.Wait() + } else { + wg.Add(1) + go cleanupAzure(testID, &wg) + wg.Wait() + } + } diff --git a/schutzbot/scheduled_cloud_cleaner.sh b/schutzbot/scheduled_cloud_cleaner.sh new file mode 100755 index 000000000..aab4307d5 --- /dev/null +++ b/schutzbot/scheduled_cloud_cleaner.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Azure cleanup +if ! hash az; then + # this installation method is taken from the official docs: + # https://docs.microsoft.com/cs-cz/cli/azure/install-azure-cli-linux?pivots=dnf + sudo rpm --import https://packages.microsoft.com/keys/microsoft.asc + echo -e "[azure-cli] +name=Azure CLI +baseurl=https://packages.microsoft.com/yumrepos/azure-cli +enabled=1 +gpgcheck=1 +gpgkey=https://packages.microsoft.com/keys/microsoft.asc" | sudo tee /etc/yum.repos.d/azure-cli.repo + + greenprint "Installing azure-cli" + sudo dnf install -y azure-cli + az version +fi + +az login --service-principal --username "${AZURE_CLIENT_ID}" --password "${AZURE_CLIENT_SECRET}" --tenant "${AZURE_TENANT_ID}" + +# List all resources from AZURE_RESOURCE_GROUP +RESOURCE_LIST=$(az resource list -g "$AZURE_RESOURCE_GROUP") +RESOURCE_COUNT=$( echo "$RESOURCE_LIST" | jq .[].name | wc -l) + +# filter out resources older than X hours +DELETE_TIME=$(date -d "- $HOURS_BACK hours" +%s) +OLD_RESOURCE_LIST_NAMES=() +for i in $(seq 0 $(("$RESOURCE_COUNT"-1))); do + RESOURCE_TIME=$(echo "$RESOURCE_LIST" | jq .[$i].createdTime | tr -d '"') + RESOURCE_TYPE=$(echo "$RESOURCE_LIST" | jq .[$i].type | tr -d '"') + RESOURCE_TIME_SECONDS=$(date -d "$RESOURCE_TIME" +%s) + if [[ "$RESOURCE_TIME_SECONDS" -lt "$DELETE_TIME" && "$RESOURCE_TYPE" != Microsoft.Storage/storageAccounts ]]; then + OLD_RESOURCE_LIST_NAMES+=("$(echo "$RESOURCE_LIST" | jq .["$i"].name | sed -e 's/^[^-]*-//' | tr -d '"')") + fi +done + +#Exit early if no there are no resources to delete +if [ ${#OLD_RESOURCE_LIST_NAMES[@]} == 0 ]; then + echo "Nothing to delete." + exit 0 +fi + +# Keep only unique resource names +mapfile -t RESOURCE_TO_DELETE_LIST < <(printf "%s\n" "${OLD_RESOURCE_LIST_NAMES[@]}" | sort -u) +echo "${RESOURCE_TO_DELETE_LIST[@]}" + +TO_DELETE_COUNT=${#RESOURCE_TO_DELETE_LIST[@]} +echo "There are resources from $TO_DELETE_COUNT test runs to delete." + +for i in $(seq 0 $(("$TO_DELETE_COUNT"-1))); do + echo "Running cloud-cleaner in Azure for resources with TEST_ID: ${RESOURCE_TO_DELETE_LIST[$i]}" + TEST_ID=${RESOURCE_TO_DELETE_LIST[$i]} /usr/libexec/osbuild-composer-test/cloud-cleaner +done + +echo "Azure cleanup complete!"