Skip to content

Commit

Permalink
terraform: fix security rule reconciliation on Azure (#3454)
Browse files Browse the repository at this point in the history
* fix security rule reconciliation on azure
* fix simulated patch version upgrade
  • Loading branch information
elchead authored Nov 4, 2024
1 parent aa7d47e commit 54058ee
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 121 deletions.
93 changes: 28 additions & 65 deletions .github/workflows/e2e-upgrade.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,57 +132,6 @@ jobs:
echo "cloudProvider=${cloudProvider}" | tee -a "$GITHUB_OUTPUT"
build-target-cli:
name: Build upgrade target version CLI
runs-on: ubuntu-24.04
permissions:
id-token: write
checks: write
contents: read
packages: write
steps:
- name: Checkout
if: inputs.gitRef == 'head'
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0
ref: ${{ !github.event.pull_request.head.repo.fork && github.head_ref || '' }}

- name: Checkout ref
if: inputs.gitRef != 'head'
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
fetch-depth: 0
ref: ${{ inputs.gitRef }}

- name: Setup Bazel & Nix
uses: ./.github/actions/setup_bazel_nix

- name: Log in to the Container registry
uses: ./.github/actions/container_registry_login
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Simulate patch upgrade
if: inputs.simulatedTargetVersion != ''
run: |
echo ${{ inputs.simulatedTargetVersion }} > version.txt
- name: Build CLI
uses: ./.github/actions/build_cli
with:
enterpriseCLI: true
outputPath: "build/constellation"
push: true

- name: Upload CLI binary
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
with:
name: constellation-upgrade-${{ inputs.attestationVariant }}
path: build/constellation

create-cluster:
name: Create upgrade origin version cluster
runs-on: ubuntu-24.04
Expand Down Expand Up @@ -279,7 +228,6 @@ jobs:
packages: write
needs:
- generate-input-parameters
- build-target-cli
- create-cluster
steps:
- name: Checkout
Expand All @@ -299,6 +247,32 @@ jobs:
- name: Setup Bazel & Nix
uses: ./.github/actions/setup_bazel_nix

- name: Log in to the Container registry
uses: ./.github/actions/container_registry_login
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

# applying the version manipulation here so that the upgrade test tool is also on the simulated target version
- name: Simulate patch upgrade
if: inputs.simulatedTargetVersion != ''
run: |
echo ${{ inputs.simulatedTargetVersion }} > version.txt
- name: Build CLI
uses: ./.github/actions/build_cli
with:
enterpriseCLI: true
outputPath: "build/constellation"
push: true

- name: Upload CLI binary # is needed for the cleanup step
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
with:
name: constellation-upgrade-${{ inputs.attestationVariant }}
path: build/constellation

- name: Login to AWS
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
Expand Down Expand Up @@ -335,11 +309,6 @@ jobs:
with:
azure_credentials: ${{ secrets.AZURE_E2E_IAM_CREDENTIALS }}

- name: Download CLI
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: constellation-upgrade-${{ inputs.attestationVariant }}
path: build

- name: Download Working Directory (Pre-test)
uses: ./.github/actions/artifact_download
Expand Down Expand Up @@ -404,15 +373,9 @@ jobs:
echo "K8s target: $KUBERNETES"
echo "Microservice target: $MICROSERVICES"
if [[ -n ${MICROSERVICES} ]]; then
MICROSERVICES_FLAG="--target-microservices=$MICROSERVICES"
fi
if [[ -n ${KUBERNETES} ]]; then
KUBERNETES_FLAG="--target-kubernetes=$KUBERNETES"
fi
sudo sh -c 'echo "127.0.0.1 license.confidential.cloud" >> /etc/hosts'
bazel run --test_timeout=14400 //e2e/internal/upgrade:upgrade_test -- --want-worker "$WORKERNODES" --want-control "$CONTROLNODES" --target-image "$IMAGE" "$KUBERNETES_FLAG" "$MICROSERVICES_FLAG"
CLI=$(realpath ./build/constellation)
bazel run --test_timeout=14400 //e2e/internal/upgrade:upgrade_test -- --want-worker "$WORKERNODES" --want-control "$CONTROLNODES" --target-image "$IMAGE" --target-kubernetes "$KUBERNETES" --target-microservices "$MICROSERVICES" --cli "$CLI"
- name: Remove Terraform plugin cache
if: always()
Expand Down
73 changes: 52 additions & 21 deletions docs/docs/reference/migration.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,51 +3,81 @@
This document describes breaking changes and migrations between Constellation releases.
Use [`constellation config migrate`](./cli.md#constellation-config-migrate) to automatically update an old config file to a new format.

## Migrations to v2.19.1

### Azure

* During the upgrade, security rules are migrated and the old ones need to be cleaned up manually by the user. The below script shows how to delete them through the Azure CLI:

```bash
#!/usr/bin/env bash
name="<insert>" # the name provided in the config
uid="<insert>" # the cluster id can be retrieved via `yq '.infrastructure.uid' constellation-state.yaml`
resource_group="<insert>" # the RG can be retrieved via `yq '.provider.azure.resourceGroup' constellation-conf.yaml`

rules=(
"kubernetes"
"bootstrapper"
"verify"
"recovery"
"join"
"debugd"
"konnectivity"
)

for rule in "${rules[@]}"; do
echo "Deleting rule: ${rule}"
az network nsg rule delete \
--resource-group "${resource_group}" \
--nsg-name "${name}-${uid}" \
--name "${rule}"
done

echo "All specified rules have been deleted."
```

## Migrations to v2.19.0

### Azure

* To allow seamless upgrades on Azure when Kubernetes services of type `LoadBalancer` are deployed, the target
* To allow seamless upgrades on Azure when Kubernetes services of type `LoadBalancer` are deployed, the target
load balancer in which the `cloud-controller-manager` creates load balancing rules was changed. Instead of using the load balancer
created and maintained by the CLI's Terraform code, the `cloud-controller-manager` now creates its own load balancer in Azure.
If your Constellation has services of type `LoadBalancer`, please remove them before the upgrade and re-apply them
afterward.

afterward.

## Migrating from Azure's service principal authentication to managed identity authentication (during the upgrade to Constellation v2.8.0)

- The `provider.azure.appClientID` and `provider.azure.appClientSecret` fields are no longer supported and should be removed.
- To keep using an existing UAMI, add the `Owner` permission with the scope of your `resourceGroup`.
- Otherwise, simply [create new Constellation IAM credentials](../workflows/config.md#creating-an-iam-configuration) and use the created UAMI.
- To migrate the authentication for an existing cluster on Azure to an UAMI with the necessary permissions:
* The `provider.azure.appClientID` and `provider.azure.appClientSecret` fields are no longer supported and should be removed.
* To keep using an existing UAMI, add the `Owner` permission with the scope of your `resourceGroup`.
* Otherwise, simply [create new Constellation IAM credentials](../workflows/config.md#creating-an-iam-configuration) and use the created UAMI.
* To migrate the authentication for an existing cluster on Azure to an UAMI with the necessary permissions:
1. Remove the `aadClientId` and `aadClientSecret` from the azureconfig secret.
2. Set `useManagedIdentityExtension` to `true` and use the `userAssignedIdentity` from the Constellation config for the value of `userAssignedIdentityID`.
3. Restart the CSI driver, cloud controller manager, cluster autoscaler, and Constellation operator pods.


## Migrating from CLI versions before 2.10

- AWS cluster upgrades require additional IAM permissions for the newly introduced `aws-load-balancer-controller`. Please upgrade your IAM roles using `iam upgrade apply`. This will show necessary changes and apply them, if desired.
- The global `nodeGroups` field was added.
- The fields `instanceType`, `stateDiskSizeGB`, and `stateDiskType` for each cloud provider are now part of the configuration of individual node groups.
- The `constellation create` command no longer uses the flags `--control-plane-count` and `--worker-count`. Instead, the initial node count is configured per node group in the `nodeGroups` field.
* AWS cluster upgrades require additional IAM permissions for the newly introduced `aws-load-balancer-controller`. Please upgrade your IAM roles using `iam upgrade apply`. This will show necessary changes and apply them, if desired.
* The global `nodeGroups` field was added.
* The fields `instanceType`, `stateDiskSizeGB`, and `stateDiskType` for each cloud provider are now part of the configuration of individual node groups.
* The `constellation create` command no longer uses the flags `--control-plane-count` and `--worker-count`. Instead, the initial node count is configured per node group in the `nodeGroups` field.

## Migrating from CLI versions before 2.9

- The `provider.azure.appClientID` and `provider.azure.clientSecretValue` fields were removed to enforce migration to managed identity authentication
* The `provider.azure.appClientID` and `provider.azure.clientSecretValue` fields were removed to enforce migration to managed identity authentication

## Migrating from CLI versions before 2.8

- The `measurements` field for each cloud service provider was replaced with a global `attestation` field.
- The `confidentialVM`, `idKeyDigest`, and `enforceIdKeyDigest` fields for the Azure cloud service provider were removed in favor of using the global `attestation` field.
- The optional global field `attestationVariant` was replaced by the now required `attestation` field.
* The `measurements` field for each cloud service provider was replaced with a global `attestation` field.
* The `confidentialVM`, `idKeyDigest`, and `enforceIdKeyDigest` fields for the Azure cloud service provider were removed in favor of using the global `attestation` field.
* The optional global field `attestationVariant` was replaced by the now required `attestation` field.

## Migrating from CLI versions before 2.3

- The `sshUsers` field was deprecated in v2.2 and has been removed from the configuration in v2.3.
* The `sshUsers` field was deprecated in v2.2 and has been removed from the configuration in v2.3.
As an alternative for SSH, check the workflow section [Connect to nodes](../workflows/troubleshooting.md#node-shell-access).
- The `image` field for each cloud service provider has been replaced with a global `image` field. Use the following mapping to migrate your configuration:
* The `image` field for each cloud service provider has been replaced with a global `image` field. Use the following mapping to migrate your configuration:
<details>
<summary>Show all</summary>

Expand Down Expand Up @@ -77,10 +107,11 @@ Use [`constellation config migrate`](./cli.md#constellation-config-migrate) to a
| GCP | `projects/constellation-images/global/images/constellation-v2-2-0` | `v2.2.0` |
| GCP | `projects/constellation-images/global/images/constellation-v2-1-0` | `v2.1.0` |
| GCP | `projects/constellation-images/global/images/constellation-v2-0-0` | `v2.0.0` |

</details>
- The `enforcedMeasurements` field has been removed and merged with the `measurements` field.
- To migrate your config containing a new image (`v2.3` or greater), remove the old `measurements` and `enforcedMeasurements` entries from your config and run `constellation fetch-measurements`
- To migrate your config containing an image older than `v2.3`, remove the `enforcedMeasurements` entry and replace the entries in `measurements` as shown in the example below:
* The `enforcedMeasurements` field has been removed and merged with the `measurements` field.
* To migrate your config containing a new image (`v2.3` or greater), remove the old `measurements` and `enforcedMeasurements` entries from your config and run `constellation fetch-measurements`
* To migrate your config containing an image older than `v2.3`, remove the `enforcedMeasurements` entry and replace the entries in `measurements` as shown in the example below:

```diff
measurements:
Expand Down
1 change: 1 addition & 0 deletions e2e/internal/upgrade/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ go_test(
"//e2e/internal/kubectl",
"//internal/constants",
"//internal/versions",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
"@io_k8s_api//core/v1:core",
"@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
Expand Down
4 changes: 2 additions & 2 deletions e2e/internal/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,10 @@ func getCLIPath(cliPathFlag string) (string, error) {
pathCLI := os.Getenv("PATH_CLI")
var relCLIPath string
switch {
case pathCLI != "":
relCLIPath = pathCLI
case cliPathFlag != "":
relCLIPath = cliPathFlag
case pathCLI != "":
relCLIPath = pathCLI
default:
return "", errors.New("neither 'PATH_CLI' nor 'cli' flag set")
}
Expand Down
20 changes: 11 additions & 9 deletions e2e/internal/upgrade/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/edgelesssys/constellation/v2/e2e/internal/kubectl"
"github.com/edgelesssys/constellation/v2/internal/constants"
"github.com/edgelesssys/constellation/v2/internal/versions"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
coreV1 "k8s.io/api/core/v1"
metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -81,7 +82,8 @@ func TestUpgrade(t *testing.T) {
log.Println(string(data))

log.Println("Checking upgrade.")
runUpgradeCheck(require, cli, *targetKubernetes)
assert := assert.New(t) // use assert because this part is more brittle and should not fail the entire test
runUpgradeCheck(assert, cli, *targetKubernetes)

log.Println("Triggering upgrade.")
runUpgradeApply(require, cli)
Expand Down Expand Up @@ -170,25 +172,25 @@ func testNodesEventuallyAvailable(t *testing.T, k *kubernetes.Clientset, wantCon

// runUpgradeCheck executes 'upgrade check' and does basic checks on the output.
// We can not check images upgrades because we might use unpublished images. CLI uses public CDN to check for available images.
func runUpgradeCheck(require *require.Assertions, cli, targetKubernetes string) {
func runUpgradeCheck(assert *assert.Assertions, cli, targetKubernetes string) {
cmd := exec.CommandContext(context.Background(), cli, "upgrade", "check", "--debug")
stdout, stderr, err := runCommandWithSeparateOutputs(cmd)
require.NoError(err, "Stdout: %s\nStderr: %s", string(stdout), string(stderr))
assert.NoError(err, "Stdout: %s\nStderr: %s", string(stdout), string(stderr))

require.Contains(string(stdout), "The following updates are available with this CLI:")
require.Contains(string(stdout), "Kubernetes:")
assert.Contains(string(stdout), "The following updates are available with this CLI:")
assert.Contains(string(stdout), "Kubernetes:")
log.Printf("targetKubernetes: %s\n", targetKubernetes)

if targetKubernetes == "" {
log.Printf("true\n")
require.True(containsAny(string(stdout), versions.SupportedK8sVersions()))
assert.True(containsAny(string(stdout), versions.SupportedK8sVersions()))
} else {
log.Printf("false. targetKubernetes: %s\n", targetKubernetes)
require.Contains(string(stdout), targetKubernetes, fmt.Sprintf("Expected Kubernetes version %s in output.", targetKubernetes))
assert.Contains(string(stdout), targetKubernetes, fmt.Sprintf("Expected Kubernetes version %s in output.", targetKubernetes))
}

require.Contains(string(stdout), "Services:")
require.Contains(string(stdout), fmt.Sprintf("--> %s", constants.BinaryVersion().String()))
assert.Contains(string(stdout), "Services:")
assert.Contains(string(stdout), fmt.Sprintf("--> %s", constants.BinaryVersion().String()))

log.Println(string(stdout))
}
Expand Down
27 changes: 3 additions & 24 deletions terraform/infrastructure/azure/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -227,36 +227,15 @@ resource "azurerm_network_security_group" "security_group" {
location = var.location
resource_group_name = var.resource_group
tags = local.tags

dynamic "security_rule" {
# we keep this rule for one last release since the azurerm provider does not
# support moving security rules that are inlined (like this) to the external resource one.
# Even worse, just defining the azurerm_network_security_group without the
# "security_rule" block will NOT remove all the rules but do nothing.
# TODO(@3u13r): remove the "security_rule" block in the next release after this code has landed.
# So either after 2.19 or after 2.18.X if cherry-picked release.
for_each = [{ name = "konnectivity", priority = 1000, port = 8132 }]
content {
name = security_rule.value.name
priority = security_rule.value.priority
direction = "Inbound"
access = "Allow"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = security_rule.value.port
source_address_prefix = "*"
destination_address_prefix = "*"
}
}
}

resource "azurerm_network_security_rule" "nsg_rule" {
for_each = {
for o in local.ports : o.name => o
}

name = each.value.name
priority = each.value.priority
# TODO(elchead): v2.20.0: remove name suffix and priority offset. Might need to add create_before_destroy to the NSG rule.
name = "${each.value.name}-new"
priority = each.value.priority + 10 # offset to not overlap with old rules
direction = "Inbound"
access = "Allow"
protocol = "Tcp"
Expand Down

0 comments on commit 54058ee

Please sign in to comment.