From 54058eed2a6895de2708d90485cf4e9daea3414a Mon Sep 17 00:00:00 2001 From: Adrian Stobbe Date: Mon, 4 Nov 2024 08:59:16 +0100 Subject: [PATCH] terraform: fix security rule reconciliation on Azure (#3454) * fix security rule reconciliation on azure * fix simulated patch version upgrade --- .github/workflows/e2e-upgrade.yml | 93 ++++++++------------------ docs/docs/reference/migration.md | 73 ++++++++++++++------ e2e/internal/upgrade/BUILD.bazel | 1 + e2e/internal/upgrade/upgrade.go | 4 +- e2e/internal/upgrade/upgrade_test.go | 20 +++--- terraform/infrastructure/azure/main.tf | 27 +------- 6 files changed, 97 insertions(+), 121 deletions(-) diff --git a/.github/workflows/e2e-upgrade.yml b/.github/workflows/e2e-upgrade.yml index 986bd87264..ff5082848d 100644 --- a/.github/workflows/e2e-upgrade.yml +++ b/.github/workflows/e2e-upgrade.yml @@ -132,57 +132,6 @@ jobs: echo "cloudProvider=${cloudProvider}" | tee -a "$GITHUB_OUTPUT" - build-target-cli: - name: Build upgrade target version CLI - runs-on: ubuntu-24.04 - permissions: - id-token: write - checks: write - contents: read - packages: write - steps: - - name: Checkout - if: inputs.gitRef == 'head' - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - fetch-depth: 0 - ref: ${{ !github.event.pull_request.head.repo.fork && github.head_ref || '' }} - - - name: Checkout ref - if: inputs.gitRef != 'head' - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - with: - fetch-depth: 0 - ref: ${{ inputs.gitRef }} - - - name: Setup Bazel & Nix - uses: ./.github/actions/setup_bazel_nix - - - name: Log in to the Container registry - uses: ./.github/actions/container_registry_login - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Simulate patch upgrade - if: inputs.simulatedTargetVersion != '' - run: | - echo ${{ inputs.simulatedTargetVersion }} > version.txt - - - name: Build CLI - uses: ./.github/actions/build_cli - with: - enterpriseCLI: true - outputPath: "build/constellation" - push: true - - - name: Upload CLI binary - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 - with: - name: constellation-upgrade-${{ inputs.attestationVariant }} - path: build/constellation - create-cluster: name: Create upgrade origin version cluster runs-on: ubuntu-24.04 @@ -279,7 +228,6 @@ jobs: packages: write needs: - generate-input-parameters - - build-target-cli - create-cluster steps: - name: Checkout @@ -299,6 +247,32 @@ jobs: - name: Setup Bazel & Nix uses: ./.github/actions/setup_bazel_nix + - name: Log in to the Container registry + uses: ./.github/actions/container_registry_login + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # applying the version manipulation here so that the upgrade test tool is also on the simulated target version + - name: Simulate patch upgrade + if: inputs.simulatedTargetVersion != '' + run: | + echo ${{ inputs.simulatedTargetVersion }} > version.txt + + - name: Build CLI + uses: ./.github/actions/build_cli + with: + enterpriseCLI: true + outputPath: "build/constellation" + push: true + + - name: Upload CLI binary # is needed for the cleanup step + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + with: + name: constellation-upgrade-${{ inputs.attestationVariant }} + path: build/constellation + - name: Login to AWS uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 with: @@ -335,11 +309,6 @@ jobs: with: azure_credentials: ${{ secrets.AZURE_E2E_IAM_CREDENTIALS }} - - name: Download CLI - uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 - with: - name: constellation-upgrade-${{ inputs.attestationVariant }} - path: build - name: Download Working Directory (Pre-test) uses: ./.github/actions/artifact_download @@ -404,15 +373,9 @@ jobs: echo "K8s target: $KUBERNETES" echo "Microservice target: $MICROSERVICES" - if [[ -n ${MICROSERVICES} ]]; then - MICROSERVICES_FLAG="--target-microservices=$MICROSERVICES" - fi - if [[ -n ${KUBERNETES} ]]; then - KUBERNETES_FLAG="--target-kubernetes=$KUBERNETES" - fi - sudo sh -c 'echo "127.0.0.1 license.confidential.cloud" >> /etc/hosts' - bazel run --test_timeout=14400 //e2e/internal/upgrade:upgrade_test -- --want-worker "$WORKERNODES" --want-control "$CONTROLNODES" --target-image "$IMAGE" "$KUBERNETES_FLAG" "$MICROSERVICES_FLAG" + CLI=$(realpath ./build/constellation) + bazel run --test_timeout=14400 //e2e/internal/upgrade:upgrade_test -- --want-worker "$WORKERNODES" --want-control "$CONTROLNODES" --target-image "$IMAGE" --target-kubernetes "$KUBERNETES" --target-microservices "$MICROSERVICES" --cli "$CLI" - name: Remove Terraform plugin cache if: always() diff --git a/docs/docs/reference/migration.md b/docs/docs/reference/migration.md index 49cbde7021..0252c409f4 100644 --- a/docs/docs/reference/migration.md +++ b/docs/docs/reference/migration.md @@ -3,51 +3,81 @@ This document describes breaking changes and migrations between Constellation releases. Use [`constellation config migrate`](./cli.md#constellation-config-migrate) to automatically update an old config file to a new format. +## Migrations to v2.19.1 + +### Azure + +* During the upgrade, security rules are migrated and the old ones need to be cleaned up manually by the user. The below script shows how to delete them through the Azure CLI: + +```bash +#!/usr/bin/env bash +name="" # the name provided in the config +uid="" # the cluster id can be retrieved via `yq '.infrastructure.uid' constellation-state.yaml` +resource_group="" # the RG can be retrieved via `yq '.provider.azure.resourceGroup' constellation-conf.yaml` + +rules=( + "kubernetes" + "bootstrapper" + "verify" + "recovery" + "join" + "debugd" + "konnectivity" +) + +for rule in "${rules[@]}"; do + echo "Deleting rule: ${rule}" + az network nsg rule delete \ + --resource-group "${resource_group}" \ + --nsg-name "${name}-${uid}" \ + --name "${rule}" +done + +echo "All specified rules have been deleted." +``` ## Migrations to v2.19.0 ### Azure -* To allow seamless upgrades on Azure when Kubernetes services of type `LoadBalancer` are deployed, the target +* To allow seamless upgrades on Azure when Kubernetes services of type `LoadBalancer` are deployed, the target load balancer in which the `cloud-controller-manager` creates load balancing rules was changed. Instead of using the load balancer created and maintained by the CLI's Terraform code, the `cloud-controller-manager` now creates its own load balancer in Azure. If your Constellation has services of type `LoadBalancer`, please remove them before the upgrade and re-apply them - afterward. - + afterward. ## Migrating from Azure's service principal authentication to managed identity authentication (during the upgrade to Constellation v2.8.0) -- The `provider.azure.appClientID` and `provider.azure.appClientSecret` fields are no longer supported and should be removed. -- To keep using an existing UAMI, add the `Owner` permission with the scope of your `resourceGroup`. -- Otherwise, simply [create new Constellation IAM credentials](../workflows/config.md#creating-an-iam-configuration) and use the created UAMI. -- To migrate the authentication for an existing cluster on Azure to an UAMI with the necessary permissions: +* The `provider.azure.appClientID` and `provider.azure.appClientSecret` fields are no longer supported and should be removed. +* To keep using an existing UAMI, add the `Owner` permission with the scope of your `resourceGroup`. +* Otherwise, simply [create new Constellation IAM credentials](../workflows/config.md#creating-an-iam-configuration) and use the created UAMI. +* To migrate the authentication for an existing cluster on Azure to an UAMI with the necessary permissions: 1. Remove the `aadClientId` and `aadClientSecret` from the azureconfig secret. 2. Set `useManagedIdentityExtension` to `true` and use the `userAssignedIdentity` from the Constellation config for the value of `userAssignedIdentityID`. 3. Restart the CSI driver, cloud controller manager, cluster autoscaler, and Constellation operator pods. - ## Migrating from CLI versions before 2.10 -- AWS cluster upgrades require additional IAM permissions for the newly introduced `aws-load-balancer-controller`. Please upgrade your IAM roles using `iam upgrade apply`. This will show necessary changes and apply them, if desired. -- The global `nodeGroups` field was added. -- The fields `instanceType`, `stateDiskSizeGB`, and `stateDiskType` for each cloud provider are now part of the configuration of individual node groups. -- The `constellation create` command no longer uses the flags `--control-plane-count` and `--worker-count`. Instead, the initial node count is configured per node group in the `nodeGroups` field. +* AWS cluster upgrades require additional IAM permissions for the newly introduced `aws-load-balancer-controller`. Please upgrade your IAM roles using `iam upgrade apply`. This will show necessary changes and apply them, if desired. +* The global `nodeGroups` field was added. +* The fields `instanceType`, `stateDiskSizeGB`, and `stateDiskType` for each cloud provider are now part of the configuration of individual node groups. +* The `constellation create` command no longer uses the flags `--control-plane-count` and `--worker-count`. Instead, the initial node count is configured per node group in the `nodeGroups` field. ## Migrating from CLI versions before 2.9 -- The `provider.azure.appClientID` and `provider.azure.clientSecretValue` fields were removed to enforce migration to managed identity authentication +* The `provider.azure.appClientID` and `provider.azure.clientSecretValue` fields were removed to enforce migration to managed identity authentication ## Migrating from CLI versions before 2.8 -- The `measurements` field for each cloud service provider was replaced with a global `attestation` field. -- The `confidentialVM`, `idKeyDigest`, and `enforceIdKeyDigest` fields for the Azure cloud service provider were removed in favor of using the global `attestation` field. -- The optional global field `attestationVariant` was replaced by the now required `attestation` field. +* The `measurements` field for each cloud service provider was replaced with a global `attestation` field. +* The `confidentialVM`, `idKeyDigest`, and `enforceIdKeyDigest` fields for the Azure cloud service provider were removed in favor of using the global `attestation` field. +* The optional global field `attestationVariant` was replaced by the now required `attestation` field. ## Migrating from CLI versions before 2.3 -- The `sshUsers` field was deprecated in v2.2 and has been removed from the configuration in v2.3. +* The `sshUsers` field was deprecated in v2.2 and has been removed from the configuration in v2.3. As an alternative for SSH, check the workflow section [Connect to nodes](../workflows/troubleshooting.md#node-shell-access). -- The `image` field for each cloud service provider has been replaced with a global `image` field. Use the following mapping to migrate your configuration: +* The `image` field for each cloud service provider has been replaced with a global `image` field. Use the following mapping to migrate your configuration:
Show all @@ -77,10 +107,11 @@ Use [`constellation config migrate`](./cli.md#constellation-config-migrate) to a | GCP | `projects/constellation-images/global/images/constellation-v2-2-0` | `v2.2.0` | | GCP | `projects/constellation-images/global/images/constellation-v2-1-0` | `v2.1.0` | | GCP | `projects/constellation-images/global/images/constellation-v2-0-0` | `v2.0.0` | +
-- The `enforcedMeasurements` field has been removed and merged with the `measurements` field. - - To migrate your config containing a new image (`v2.3` or greater), remove the old `measurements` and `enforcedMeasurements` entries from your config and run `constellation fetch-measurements` - - To migrate your config containing an image older than `v2.3`, remove the `enforcedMeasurements` entry and replace the entries in `measurements` as shown in the example below: +* The `enforcedMeasurements` field has been removed and merged with the `measurements` field. + * To migrate your config containing a new image (`v2.3` or greater), remove the old `measurements` and `enforcedMeasurements` entries from your config and run `constellation fetch-measurements` + * To migrate your config containing an image older than `v2.3`, remove the `enforcedMeasurements` entry and replace the entries in `measurements` as shown in the example below: ```diff measurements: diff --git a/e2e/internal/upgrade/BUILD.bazel b/e2e/internal/upgrade/BUILD.bazel index 6e368e94fb..8acfc7e94a 100644 --- a/e2e/internal/upgrade/BUILD.bazel +++ b/e2e/internal/upgrade/BUILD.bazel @@ -47,6 +47,7 @@ go_test( "//e2e/internal/kubectl", "//internal/constants", "//internal/versions", + "@com_github_stretchr_testify//assert", "@com_github_stretchr_testify//require", "@io_k8s_api//core/v1:core", "@io_k8s_apimachinery//pkg/apis/meta/v1:meta", diff --git a/e2e/internal/upgrade/upgrade.go b/e2e/internal/upgrade/upgrade.go index 2dd283ef85..fd24832593 100644 --- a/e2e/internal/upgrade/upgrade.go +++ b/e2e/internal/upgrade/upgrade.go @@ -301,10 +301,10 @@ func getCLIPath(cliPathFlag string) (string, error) { pathCLI := os.Getenv("PATH_CLI") var relCLIPath string switch { - case pathCLI != "": - relCLIPath = pathCLI case cliPathFlag != "": relCLIPath = cliPathFlag + case pathCLI != "": + relCLIPath = pathCLI default: return "", errors.New("neither 'PATH_CLI' nor 'cli' flag set") } diff --git a/e2e/internal/upgrade/upgrade_test.go b/e2e/internal/upgrade/upgrade_test.go index 4206348f25..be47bb197c 100644 --- a/e2e/internal/upgrade/upgrade_test.go +++ b/e2e/internal/upgrade/upgrade_test.go @@ -23,6 +23,7 @@ import ( "github.com/edgelesssys/constellation/v2/e2e/internal/kubectl" "github.com/edgelesssys/constellation/v2/internal/constants" "github.com/edgelesssys/constellation/v2/internal/versions" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" coreV1 "k8s.io/api/core/v1" metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -81,7 +82,8 @@ func TestUpgrade(t *testing.T) { log.Println(string(data)) log.Println("Checking upgrade.") - runUpgradeCheck(require, cli, *targetKubernetes) + assert := assert.New(t) // use assert because this part is more brittle and should not fail the entire test + runUpgradeCheck(assert, cli, *targetKubernetes) log.Println("Triggering upgrade.") runUpgradeApply(require, cli) @@ -170,25 +172,25 @@ func testNodesEventuallyAvailable(t *testing.T, k *kubernetes.Clientset, wantCon // runUpgradeCheck executes 'upgrade check' and does basic checks on the output. // We can not check images upgrades because we might use unpublished images. CLI uses public CDN to check for available images. -func runUpgradeCheck(require *require.Assertions, cli, targetKubernetes string) { +func runUpgradeCheck(assert *assert.Assertions, cli, targetKubernetes string) { cmd := exec.CommandContext(context.Background(), cli, "upgrade", "check", "--debug") stdout, stderr, err := runCommandWithSeparateOutputs(cmd) - require.NoError(err, "Stdout: %s\nStderr: %s", string(stdout), string(stderr)) + assert.NoError(err, "Stdout: %s\nStderr: %s", string(stdout), string(stderr)) - require.Contains(string(stdout), "The following updates are available with this CLI:") - require.Contains(string(stdout), "Kubernetes:") + assert.Contains(string(stdout), "The following updates are available with this CLI:") + assert.Contains(string(stdout), "Kubernetes:") log.Printf("targetKubernetes: %s\n", targetKubernetes) if targetKubernetes == "" { log.Printf("true\n") - require.True(containsAny(string(stdout), versions.SupportedK8sVersions())) + assert.True(containsAny(string(stdout), versions.SupportedK8sVersions())) } else { log.Printf("false. targetKubernetes: %s\n", targetKubernetes) - require.Contains(string(stdout), targetKubernetes, fmt.Sprintf("Expected Kubernetes version %s in output.", targetKubernetes)) + assert.Contains(string(stdout), targetKubernetes, fmt.Sprintf("Expected Kubernetes version %s in output.", targetKubernetes)) } - require.Contains(string(stdout), "Services:") - require.Contains(string(stdout), fmt.Sprintf("--> %s", constants.BinaryVersion().String())) + assert.Contains(string(stdout), "Services:") + assert.Contains(string(stdout), fmt.Sprintf("--> %s", constants.BinaryVersion().String())) log.Println(string(stdout)) } diff --git a/terraform/infrastructure/azure/main.tf b/terraform/infrastructure/azure/main.tf index 147197ab3f..b670b29898 100644 --- a/terraform/infrastructure/azure/main.tf +++ b/terraform/infrastructure/azure/main.tf @@ -227,36 +227,15 @@ resource "azurerm_network_security_group" "security_group" { location = var.location resource_group_name = var.resource_group tags = local.tags - - dynamic "security_rule" { - # we keep this rule for one last release since the azurerm provider does not - # support moving security rules that are inlined (like this) to the external resource one. - # Even worse, just defining the azurerm_network_security_group without the - # "security_rule" block will NOT remove all the rules but do nothing. - # TODO(@3u13r): remove the "security_rule" block in the next release after this code has landed. - # So either after 2.19 or after 2.18.X if cherry-picked release. - for_each = [{ name = "konnectivity", priority = 1000, port = 8132 }] - content { - name = security_rule.value.name - priority = security_rule.value.priority - direction = "Inbound" - access = "Allow" - protocol = "Tcp" - source_port_range = "*" - destination_port_range = security_rule.value.port - source_address_prefix = "*" - destination_address_prefix = "*" - } - } } resource "azurerm_network_security_rule" "nsg_rule" { for_each = { for o in local.ports : o.name => o } - - name = each.value.name - priority = each.value.priority + # TODO(elchead): v2.20.0: remove name suffix and priority offset. Might need to add create_before_destroy to the NSG rule. + name = "${each.value.name}-new" + priority = each.value.priority + 10 # offset to not overlap with old rules direction = "Inbound" access = "Allow" protocol = "Tcp"