Add image boot testing in Azure

This uses an image created and uploaded to Azure using composer-cli
and then terraform to spin up a linux vm from that image, check
if the machine works and then cleans up everything.
This commit is contained in:
Jakub Rusz 2021-04-15 16:28:30 +02:00 committed by Alexander Todorov
parent 33d85c82c9
commit 06361267d5
4 changed files with 431 additions and 0 deletions

View file

@ -342,6 +342,7 @@ Requires: python3-lxml
Requires: httpd
Requires: openssl
Requires: podman-plugins
Requires: dnf-plugins-core
%if 0%{?fedora}
# koji and ansible are not in RHEL repositories. Depending on them breaks RHEL
# gating (see OSCI-1541). The test script must enable EPEL and install those

19
schutzbot/Jenkinsfile vendored
View file

@ -286,6 +286,7 @@ pipeline {
AWS_API_TEST_SHARE_ACCOUNT = credentials('aws-credentials-share-account')
AZURE_CLIENT_ID = credentials('azure-client-id')
AZURE_CLIENT_SECRET = credentials('azure-client-secret')
AZURE_CREDS = credentials('azure')
}
steps {
run_tests('integration')
@ -293,6 +294,10 @@ pipeline {
post {
always {
preserve_logs('fedora32-integration')
sh (
label: "Run cloud cleaner just in case something failed",
script: "schutzbot/run_cloud_cleaner.sh"
)
}
}
}
@ -350,6 +355,7 @@ pipeline {
AWS_API_TEST_SHARE_ACCOUNT = credentials('aws-credentials-share-account')
AZURE_CLIENT_ID = credentials('azure-client-id')
AZURE_CLIENT_SECRET = credentials('azure-client-secret')
AZURE_CREDS = credentials('azure')
}
steps {
run_tests('integration')
@ -357,6 +363,10 @@ pipeline {
post {
always {
preserve_logs('fedora33-integration')
sh (
label: "Run cloud cleaner just in case something failed",
script: "schutzbot/run_cloud_cleaner.sh"
)
}
}
}
@ -458,6 +468,7 @@ pipeline {
GCP_API_TEST_SHARE_ACCOUNT = credentials('gcp-credentials-share-account')
AZURE_CLIENT_ID = credentials('azure-client-id')
AZURE_CLIENT_SECRET = credentials('azure-client-secret')
AZURE_CREDS = credentials('azure')
}
steps {
run_tests('integration')
@ -557,6 +568,7 @@ pipeline {
GCP_API_TEST_SHARE_ACCOUNT = credentials('gcp-credentials-share-account')
AZURE_CLIENT_ID = credentials('azure-client-id')
AZURE_CLIENT_SECRET = credentials('azure-client-secret')
AZURE_CREDS = credentials('azure')
}
steps {
run_tests('integration')
@ -665,6 +677,7 @@ pipeline {
GCP_API_TEST_SHARE_ACCOUNT = credentials('gcp-credentials-share-account')
AZURE_CLIENT_ID = credentials('azure-client-id')
AZURE_CLIENT_SECRET = credentials('azure-client-secret')
AZURE_CREDS = credentials('azure')
}
steps {
run_tests('integration')
@ -815,6 +828,12 @@ void run_tests(test_type) {
script: "/usr/libexec/tests/osbuild-composer/aws.sh"
)
// Run the Azure test.
sh (
label: "Integration test: AZURE",
script: "/usr/libexec/tests/osbuild-composer/azure.sh"
)
// Run the API test with AWS.
sh (
label: "Integration test: API (AWS)",

237
test/cases/azure.sh Executable file
View file

@ -0,0 +1,237 @@
#!/bin/bash
set -euo pipefail
source /etc/os-release
# Colorful output.
function greenprint {
echo -e "\033[1;32m${1}\033[0m"
}
#TODO: Remove this once there is rhel9 support for Azure image type
if [[ $DISTRO_CODE == rhel_90 ]]; then
greenprint "Skipped"
exit 0
fi
# Provision the software under tet.
/usr/libexec/osbuild-composer-test/provision.sh
# Terraform needs azure-cli to talk to Azure.
if ! hash az; then
# this installation method is taken from the official docs:
# https://docs.microsoft.com/cs-cz/cli/azure/install-azure-cli-linux?pivots=dnf
sudo rpm --import https://packages.microsoft.com/keys/microsoft.asc
echo -e "[azure-cli]
name=Azure CLI
baseurl=https://packages.microsoft.com/yumrepos/azure-cli
enabled=1
gpgcheck=1
gpgkey=https://packages.microsoft.com/keys/microsoft.asc" | sudo tee /etc/yum.repos.d/azure-cli.repo
greenprint "Installing azure-cli"
sudo dnf install -y azure-cli
az version
fi
# We need terraform to provision the vm in azure and then destroy it
if [ "$ID" == "rhel" ] || [ "$ID" == "centos" ]
then
release="RHEL"
elif [ "$ID" == "fedora" ]
then
release="fedora"
else
echo "Test is not running on neither Fedora, RHEL or CentOS, terminating!"
exit 1
fi
sudo dnf config-manager --add-repo https://rpm.releases.hashicorp.com/$release/hashicorp.repo
sudo dnf install -y terraform
ARCH=$(uname -m)
TEST_ID="$DISTRO_CODE-$ARCH-$BRANCH_NAME-$BUILD_ID"
IMAGE_KEY=image-${TEST_ID}
# Jenkins sets WORKSPACE to the job workspace, but if this script runs
# outside of Jenkins, we can set up a temporary directory instead.
if [[ ${WORKSPACE:-empty} == empty ]]; then
WORKSPACE=$(mktemp -d)
fi
# Set up temporary files.
TEMPDIR=$(mktemp -d)
AZURE_CONFIG=${TEMPDIR}/azure.toml
BLUEPRINT_FILE=${TEMPDIR}/blueprint.toml
COMPOSE_START=${TEMPDIR}/compose-start-${IMAGE_KEY}.json
COMPOSE_INFO=${TEMPDIR}/compose-info-${IMAGE_KEY}.json
# Check for the smoke test file on the Azure instance that we start.
smoke_test_check () {
SMOKE_TEST=$(sudo ssh -i key.rsa redhat@"${1}" -o StrictHostKeyChecking=no 'cat /etc/smoke-test.txt')
if [[ $SMOKE_TEST == smoke-test ]]; then
echo 1
else
echo 0
fi
}
# Get the compose log.
get_compose_log () {
COMPOSE_ID=$1
LOG_FILE=${WORKSPACE}/osbuild-${ID}-${VERSION_ID}-azure.log
# Download the logs.
sudo composer-cli compose log "$COMPOSE_ID" | tee "$LOG_FILE" > /dev/null
}
# Get the compose metadata.
get_compose_metadata () {
COMPOSE_ID=$1
METADATA_FILE=${WORKSPACE}/osbuild-${ID}-${VERSION_ID}-azure.json
# Download the metadata.
sudo composer-cli compose metadata "$COMPOSE_ID" > /dev/null
# Find the tarball and extract it.
TARBALL=$(basename "$(find . -maxdepth 1 -type f -name "*-metadata.tar")")
tar -xf "$TARBALL"
rm -f "$TARBALL"
# Move the JSON file into place.
cat "${COMPOSE_ID}".json | jq -M '.' | tee "$METADATA_FILE" > /dev/null
}
# Export Azure credentials
exec 4<"$AZURE_CREDS"
readarray -t -u 4 vars
for line in "${vars[@]}"; do export "${line?}"; done
exec 4<&-
# Write an Azure TOML file
tee "$AZURE_CONFIG" > /dev/null << EOF
provider = "azure"
[settings]
storageAccount = "${AZURE_STORAGE_ACCOUNT}"
storageAccessKey = "${AZURE_STORAGE_ACCESS_KEY}"
container = "${AZURE_CONTAINER_NAME}"
EOF
# Write a basic blueprint for our image.
tee "$BLUEPRINT_FILE" > /dev/null << EOF
name = "bash"
description = "A base system with bash"
version = "0.0.1"
[[packages]]
name = "bash"
[[packages]]
name = "cloud-init"
[customizations.services]
enabled = ["sshd", "cloud-init", "cloud-init-local", "cloud-config", "cloud-final"]
EOF
# Prepare the blueprint for the compose.
greenprint "📋 Preparing blueprint"
sudo composer-cli blueprints push "$BLUEPRINT_FILE"
sudo composer-cli blueprints depsolve bash
# Get worker unit file so we can watch the journal.
WORKER_UNIT=$(sudo systemctl list-units | grep -o -E "osbuild.*worker.*\.service")
sudo journalctl -af -n 1 -u "${WORKER_UNIT}" &
WORKER_JOURNAL_PID=$!
# Start the compose and upload to Azure.
greenprint "🚀 Starting compose"
sudo composer-cli --json compose start bash vhd "$IMAGE_KEY" "$AZURE_CONFIG" | tee "$COMPOSE_START"
COMPOSE_ID=$(jq -r '.build_id' "$COMPOSE_START")
# Wait for the compose to finish.
greenprint "⏱ Waiting for compose to finish: ${COMPOSE_ID}"
while true; do
sudo composer-cli --json compose info "${COMPOSE_ID}" | tee "$COMPOSE_INFO" > /dev/null
COMPOSE_STATUS=$(jq -r '.queue_status' "$COMPOSE_INFO")
# Is the compose finished?
if [[ $COMPOSE_STATUS != RUNNING ]] && [[ $COMPOSE_STATUS != WAITING ]]; then
break
fi
# Wait 30 seconds and try again.
sleep 30
done
# Capture the compose logs from osbuild.
greenprint "💬 Getting compose log and metadata"
get_compose_log "$COMPOSE_ID"
get_compose_metadata "$COMPOSE_ID"
# Did the compose finish with success?
if [[ $COMPOSE_STATUS != FINISHED ]]; then
echo "Something went wrong with the compose. 😢"
exit 1
fi
# Stop watching the worker journal.
sudo pkill -P ${WORKER_JOURNAL_PID}
# Set up necessary variables for terraform
export TF_VAR_RESOURCE_GROUP="$AZURE_RESOURCE_GROUP"
export TF_VAR_STORAGE_ACCOUNT="$AZURE_STORAGE_ACCOUNT"
export TF_VAR_CONTAINER_NAME="$AZURE_CONTAINER_NAME"
export TF_VAR_BLOB_NAME="$IMAGE_KEY".vhd
export TF_VAR_TEST_ID="$TEST_ID"
export BLOB_URL="https://$AZURE_STORAGE_ACCOUNT.blob.core.windows.net/$AZURE_CONTAINER_NAME/$IMAGE_KEY.vhd"
export ARM_CLIENT_ID="$AZURE_CLIENT_ID" > /dev/null
export ARM_CLIENT_SECRET="$AZURE_CLIENT_SECRET" > /dev/null
export ARM_SUBSCRIPTION_ID="$AZURE_SUBSCRIPTION_ID" > /dev/null
export ARM_TENANT_ID="$AZURE_TENANT_ID" > /dev/null
# Copy terraform main file and cloud-init to current working directory
cp /usr/share/tests/osbuild-composer/azure/main.tf .
cp /usr/share/tests/osbuild-composer/cloud-init/user-data .
# Initialize terraform
terraform init
# Import the uploaded page blob to terraform
terraform import azurerm_storage_blob.testBlob "$BLOB_URL"
# Apply the configuration
terraform apply -auto-approve
PUBLIC_IP=$(terraform output -raw public_IP)
terraform output -raw tls_private_key > key.rsa
chmod 400 key.rsa
# Check for our smoke test file.
greenprint "🛃 Checking for smoke test file"
for _ in {0..10}; do
RESULTS="$(smoke_test_check "$PUBLIC_IP")"
if [[ $RESULTS == 1 ]]; then
echo "Smoke test passed! 🥳"
break
fi
echo "Machine is not ready yet, retrying connection."
sleep 5
done
# Clean up resources in Azure
terraform destroy -auto-approve
# Also delete the compose so we don't run out of disk space
sudo composer-cli compose delete "${COMPOSE_ID}" > /dev/null
# Use the return code of the smoke test to determine if we passed or failed.
if [[ $RESULTS == 1 ]]; then
greenprint "💚 Success"
exit 0
elif [[ $RESULTS != 1 ]]; then
greenprint "❌ Failed"
exit 1
fi
exit 0

174
test/data/azure/main.tf Normal file
View file

@ -0,0 +1,174 @@
# azurerm version is hardcoded to prevent potential issues with new versions
terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "=2.56.0"
}
}
}
# Configure the Microsoft Azure Provider
provider "azurerm" {
features {}
}
# Set necessary variables
variable "RESOURCE_GROUP" {
type = string
}
variable "STORAGE_ACCOUNT" {
type = string
}
variable "CONTAINER_NAME" {
type = string
}
variable "BLOB_NAME" {
type = string
}
variable "TEST_ID" {
type = string
}
# Use existing resource group
data "azurerm_resource_group" "testResourceGroup" {
name = var.RESOURCE_GROUP
}
# Use existing storage blob
resource "azurerm_storage_blob" "testBlob" {
name = var.BLOB_NAME
storage_account_name = var.STORAGE_ACCOUNT
storage_container_name = var.CONTAINER_NAME
type = "Page"
# The following is a workaround related to https://github.com/terraform-providers/terraform-provider-azurerm/issues/8392
lifecycle {
ignore_changes = [content_md5, source, parallelism, size]
}
}
# Create vm image
resource "azurerm_image" "testimage" {
name = join("-", ["image", var.TEST_ID])
location = data.azurerm_resource_group.testResourceGroup.location
resource_group_name = data.azurerm_resource_group.testResourceGroup.name
os_disk {
os_type = "Linux"
os_state = "Generalized"
blob_uri = azurerm_storage_blob.testBlob.url
size_gb = 20
}
}
# Create virtual network
resource "azurerm_virtual_network" "testterraformnetwork" {
name = join("-", ["vnet", var.TEST_ID])
address_space = ["10.0.0.0/16"]
location = data.azurerm_resource_group.testResourceGroup.location
resource_group_name = data.azurerm_resource_group.testResourceGroup.name
}
# Create subnet
resource "azurerm_subnet" "testterraformsubnet" {
name = join("-", ["snet", var.TEST_ID])
resource_group_name = data.azurerm_resource_group.testResourceGroup.name
virtual_network_name = azurerm_virtual_network.testterraformnetwork.name
address_prefixes = ["10.0.1.0/24"]
}
# Create public IPs
resource "azurerm_public_ip" "testterraformpublicip" {
name = join("-", ["ip", var.TEST_ID])
location = data.azurerm_resource_group.testResourceGroup.location
resource_group_name = data.azurerm_resource_group.testResourceGroup.name
allocation_method = "Dynamic"
}
# Create Network Security Group and rule
resource "azurerm_network_security_group" "testterraformnsg" {
name = join("-", ["nsg", var.TEST_ID])
location = data.azurerm_resource_group.testResourceGroup.location
resource_group_name = data.azurerm_resource_group.testResourceGroup.name
security_rule {
name = "SSH"
priority = 1001
direction = "Inbound"
access = "Allow"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "22"
source_address_prefix = "*"
destination_address_prefix = "*"
}
}
# Create network interface
resource "azurerm_network_interface" "testterraformnic" {
name = join("-", ["iface", var.TEST_ID])
location = data.azurerm_resource_group.testResourceGroup.location
resource_group_name = data.azurerm_resource_group.testResourceGroup.name
ip_configuration {
name = "testNicConfiguration"
subnet_id = azurerm_subnet.testterraformsubnet.id
private_ip_address_allocation = "Dynamic"
public_ip_address_id = azurerm_public_ip.testterraformpublicip.id
}
}
# Connect the security group to the network interface
resource "azurerm_network_interface_security_group_association" "test" {
network_interface_id = azurerm_network_interface.testterraformnic.id
network_security_group_id = azurerm_network_security_group.testterraformnsg.id
}
# Create (and display) an SSH key
resource "tls_private_key" "test_ssh" {
algorithm = "RSA"
rsa_bits = 4096
}
output "tls_private_key" {
value = tls_private_key.test_ssh.private_key_pem
sensitive = true
}
# Create virtual machine
resource "azurerm_linux_virtual_machine" "testterraformvm" {
name = join("-", ["vm", var.TEST_ID])
location = data.azurerm_resource_group.testResourceGroup.location
resource_group_name = data.azurerm_resource_group.testResourceGroup.name
network_interface_ids = [azurerm_network_interface.testterraformnic.id]
size = "Standard_B1s"
custom_data = filebase64("${path.module}/user-data")
os_disk {
name = join("-", ["disk", var.TEST_ID])
caching = "ReadWrite"
storage_account_type = "Standard_LRS"
}
source_image_id = azurerm_image.testimage.id
computer_name = "testvm"
admin_username = "redhat"
disable_password_authentication = true
admin_ssh_key {
username = "redhat"
public_key = tls_private_key.test_ssh.public_key_openssh
}
}
output "public_IP" {
value = azurerm_linux_virtual_machine.testterraformvm.public_ip_address
}