From 4037b10ac88f422c8f5e854f519c218b543259c4 Mon Sep 17 00:00:00 2001 From: Leonard Cohnen Date: Tue, 16 Jul 2024 13:27:42 +0200 Subject: [PATCH 1/6] k8s: use separate lb for K8s services on azure --- internal/constellation/helm/overrides.go | 2 +- terraform/infrastructure/azure/main.tf | 34 ++++++++++++++++--- .../azure/modules/scale_set/main.tf | 1 + 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/internal/constellation/helm/overrides.go b/internal/constellation/helm/overrides.go index deb515909b..fdadaac887 100644 --- a/internal/constellation/helm/overrides.go +++ b/internal/constellation/helm/overrides.go @@ -243,7 +243,7 @@ func getCCMConfig(azureState state.Azure, serviceAccURI string) ([]byte, error) ResourceGroup: azureState.ResourceGroup, LoadBalancerSku: "standard", SecurityGroupName: azureState.NetworkSecurityGroupName, - LoadBalancerName: azureState.LoadBalancerName, + LoadBalancerName: "kubernetes-lb", UseInstanceMetadata: true, VMType: "vmss", Location: creds.Location, diff --git a/terraform/infrastructure/azure/main.tf b/terraform/infrastructure/azure/main.tf index 7f82145404..46307695b5 100644 --- a/terraform/infrastructure/azure/main.tf +++ b/terraform/infrastructure/azure/main.tf @@ -37,7 +37,6 @@ locals { { name = "kubernetes", port = "6443", health_check_protocol = "Https", path = "/readyz", priority = 100 }, { name = "bootstrapper", port = "9000", health_check_protocol = "Tcp", path = null, priority = 101 }, { name = "verify", port = "30081", health_check_protocol = "Tcp", path = null, priority = 102 }, - { name = "konnectivity", port = "8132", health_check_protocol = "Tcp", path = null, priority = 103 }, { name = "recovery", port = "9999", health_check_protocol = "Tcp", path = null, priority = 104 }, { name = "join", port = "30090", health_check_protocol = "Tcp", path = null, priority = 105 }, var.debug ? [{ name = "debugd", port = "4000", health_check_protocol = "Tcp", path = null, priority = 106 }] : [], @@ -223,10 +222,13 @@ resource "azurerm_network_security_group" "security_group" { tags = local.tags dynamic "security_rule" { - for_each = concat( - local.ports, - [{ name = "nodeports", port = local.ports_node_range, priority = 200 }] - ) + # we keep this rule for one last release since the azurerm provider does not + # support moving security rules that are inlined (like this) to the external resource one. + # Even worse, just defining the azurerm_network_security_group without the + # "security_rule" block will NOT remove all the rules but do nothing. + # TODO(@3u13r): remove the "security_rule" block in the next release after this code has landed. + # So either after 2.19 or after 2.18.X if cherry-picked release. + for_each = [{ name = "konnectivity", priority = 1000, port = 8132 }] content { name = security_rule.value.name priority = security_rule.value.priority @@ -241,6 +243,28 @@ resource "azurerm_network_security_group" "security_group" { } } +resource "azurerm_network_security_rule" "nsg_rule" { + for_each = { + for o in concat( + local.ports, + [{ name = "nodeports", port = local.ports_node_range, priority = 200 }] + ) + : o.name => o + } + + name = each.value.name + priority = each.value.priority + direction = "Inbound" + access = "Allow" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = each.value.port + source_address_prefix = "*" + destination_address_prefix = "*" + resource_group_name = var.resource_group + network_security_group_name = azurerm_network_security_group.security_group.name +} + module "scale_set_group" { source = "./modules/scale_set" for_each = var.node_groups diff --git a/terraform/infrastructure/azure/modules/scale_set/main.tf b/terraform/infrastructure/azure/modules/scale_set/main.tf index 99073ef46e..1573b6a5a5 100644 --- a/terraform/infrastructure/azure/modules/scale_set/main.tf +++ b/terraform/infrastructure/azure/modules/scale_set/main.tf @@ -122,6 +122,7 @@ resource "azurerm_linux_virtual_machine_scale_set" "scale_set" { instances, # required. autoscaling modifies the instance count externally source_image_id, # required. update procedure modifies the image id externally source_image_reference, # required. update procedure modifies the image reference externally + network_interface[0].ip_configuration[0].load_balancer_backend_address_pool_ids ] } } From f4b0badfd0084664731627821f525a4ad0242071 Mon Sep 17 00:00:00 2001 From: Leonard Cohnen Date: Mon, 22 Jul 2024 02:15:11 +0200 Subject: [PATCH 2/6] terraform: introduce local revision variable and data resource --- terraform/infrastructure/aws/main.tf | 7 +++++++ terraform/infrastructure/azure/main.tf | 7 +++++++ terraform/infrastructure/gcp/main.tf | 7 +++++++ terraform/infrastructure/openstack/main.tf | 7 +++++++ terraform/infrastructure/qemu/main.tf | 7 +++++++ 5 files changed, 35 insertions(+) diff --git a/terraform/infrastructure/aws/main.tf b/terraform/infrastructure/aws/main.tf index b3bb9d2984..28bcd09a13 100644 --- a/terraform/infrastructure/aws/main.tf +++ b/terraform/infrastructure/aws/main.tf @@ -55,6 +55,13 @@ locals { in_cluster_endpoint = aws_lb.front_end.dns_name out_of_cluster_endpoint = var.internal_load_balancer && var.debug ? module.jump_host[0].ip : local.in_cluster_endpoint + revision = 1 +} + +# A way to force replacement of resources if the provider does not want to replace them +# see: https://developer.hashicorp.com/terraform/language/resources/terraform-data#example-usage-data-for-replace_triggered_by +resource "terraform_data" "replacement" { + input = local.revision } resource "random_id" "uid" { diff --git a/terraform/infrastructure/azure/main.tf b/terraform/infrastructure/azure/main.tf index 46307695b5..b217821041 100644 --- a/terraform/infrastructure/azure/main.tf +++ b/terraform/infrastructure/azure/main.tf @@ -52,6 +52,13 @@ locals { in_cluster_endpoint = var.internal_load_balancer ? azurerm_lb.loadbalancer.frontend_ip_configuration[0].private_ip_address : azurerm_public_ip.loadbalancer_ip[0].ip_address out_of_cluster_endpoint = var.debug && var.internal_load_balancer ? module.jump_host[0].ip : local.in_cluster_endpoint + revision = 1 +} + +# A way to force replacement of resources if the provider does not want to replace them +# see: https://developer.hashicorp.com/terraform/language/resources/terraform-data#example-usage-data-for-replace_triggered_by +resource "terraform_data" "replacement" { + input = local.revision } resource "random_id" "uid" { diff --git a/terraform/infrastructure/gcp/main.tf b/terraform/infrastructure/gcp/main.tf index 83fb9c182b..7224216081 100644 --- a/terraform/infrastructure/gcp/main.tf +++ b/terraform/infrastructure/gcp/main.tf @@ -60,6 +60,13 @@ locals { ] in_cluster_endpoint = var.internal_load_balancer ? google_compute_address.loadbalancer_ip_internal[0].address : google_compute_global_address.loadbalancer_ip[0].address out_of_cluster_endpoint = var.debug && var.internal_load_balancer ? module.jump_host[0].ip : local.in_cluster_endpoint + revision = 1 +} + +# A way to force replacement of resources if the provider does not want to replace them +# see: https://developer.hashicorp.com/terraform/language/resources/terraform-data#example-usage-data-for-replace_triggered_by +resource "terraform_data" "replacement" { + input = local.revision } resource "random_id" "uid" { diff --git a/terraform/infrastructure/openstack/main.tf b/terraform/infrastructure/openstack/main.tf index e571977a02..3116b4f9e6 100644 --- a/terraform/infrastructure/openstack/main.tf +++ b/terraform/infrastructure/openstack/main.tf @@ -59,6 +59,13 @@ locals { cloudsyaml_path = length(var.openstack_clouds_yaml_path) > 0 ? var.openstack_clouds_yaml_path : "~/.config/openstack/clouds.yaml" cloudsyaml = yamldecode(file(pathexpand(local.cloudsyaml_path))) cloudyaml = local.cloudsyaml.clouds[var.cloud] + revision = 1 +} + +# A way to force replacement of resources if the provider does not want to replace them +# see: https://developer.hashicorp.com/terraform/language/resources/terraform-data#example-usage-data-for-replace_triggered_by +resource "terraform_data" "replacement" { + input = local.revision } resource "random_id" "uid" { diff --git a/terraform/infrastructure/qemu/main.tf b/terraform/infrastructure/qemu/main.tf index 62ec2a013e..52b3138dcc 100644 --- a/terraform/infrastructure/qemu/main.tf +++ b/terraform/infrastructure/qemu/main.tf @@ -23,6 +23,13 @@ locals { cidr_vpc_subnet_nodes = "10.42.0.0/22" cidr_vpc_subnet_control_planes = "10.42.1.0/24" cidr_vpc_subnet_worker = "10.42.2.0/24" + revision = 1 +} + +# A way to force replacement of resources if the provider does not want to replace them +# see: https://developer.hashicorp.com/terraform/language/resources/terraform-data#example-usage-data-for-replace_triggered_by +resource "terraform_data" "replacement" { + input = local.revision } resource "random_password" "init_secret" { From 492e28e0d80e3c93c848627dddbbd222cae9e9a2 Mon Sep 17 00:00:00 2001 From: Leonard Cohnen Date: Mon, 7 Oct 2024 15:27:23 +0200 Subject: [PATCH 3/6] terraform: azure: dont expose full nodeport range --- terraform/infrastructure/azure/main.tf | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/terraform/infrastructure/azure/main.tf b/terraform/infrastructure/azure/main.tf index b217821041..147197ab3f 100644 --- a/terraform/infrastructure/azure/main.tf +++ b/terraform/infrastructure/azure/main.tf @@ -252,11 +252,7 @@ resource "azurerm_network_security_group" "security_group" { resource "azurerm_network_security_rule" "nsg_rule" { for_each = { - for o in concat( - local.ports, - [{ name = "nodeports", port = local.ports_node_range, priority = 200 }] - ) - : o.name => o + for o in local.ports : o.name => o } name = each.value.name @@ -299,12 +295,6 @@ module "scale_set_group" { subnet_id = azurerm_subnet.node_subnet.id backend_address_pool_ids = each.value.role == "control-plane" ? [module.loadbalancer_backend_control_plane.backendpool_id] : [] marketplace_image = var.marketplace_image - - # We still depend on the backends, since we are not sure if the VMs inside the VMSS have been - # "updated" to the new version (note: this is the update in Azure which "refreshes" the NICs and not - # our Constellation update). - # TODO(@3u13r): Remove this dependency after v2.18.0 has been released. - depends_on = [module.loadbalancer_backend_worker, azurerm_lb_backend_address_pool.all] } module "jump_host" { From abd03b273cbdaa6d972c95e992cce6dd2732f47a Mon Sep 17 00:00:00 2001 From: Leonard Cohnen Date: Mon, 7 Oct 2024 17:02:48 +0200 Subject: [PATCH 4/6] docs: add Azure load balancer migration --- docs/docs/reference/migration.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/docs/reference/migration.md b/docs/docs/reference/migration.md index 36680eef6b..9fa68768dc 100644 --- a/docs/docs/reference/migration.md +++ b/docs/docs/reference/migration.md @@ -3,7 +3,19 @@ This document describes breaking changes and migrations between Constellation releases. Use [`constellation config migrate`](./cli.md#constellation-config-migrate) to automatically update an old config file to a new format. -## Migrating from Azure's service principal authentication to managed identity authentication + +## Migrations to v2.19.0 + +### Azure + +* To allow seamless upgrades on Azure when Kubernetes services of type `LoadBalancer` are deployed, we changed the target + load balancer in which the `cloud-controller-maanger` create the load balancing rules. Instead of using the load balancer we + create and maintain in the CLI's Terraform code, the `cloud-controller-mananger` now creates its own load balancer in Azure. + If inside your Constellation there are services of type `LoadBalancer`, please remove them before the upgrade and re-apply them + afterwards. + + +## Migrating from Azure's service principal authentication to managed identity authentication (during the upgrade to Constellation v2.8.0) - The `provider.azure.appClientID` and `provider.azure.appClientSecret` fields are no longer supported and should be removed. - To keep using an existing UAMI, add the `Owner` permission with the scope of your `resourceGroup`. From c2c6d995b5370e4aed12f2719222907fa39f7a7b Mon Sep 17 00:00:00 2001 From: Leonard Cohnen Date: Tue, 8 Oct 2024 09:41:29 +0200 Subject: [PATCH 5/6] fixup! docs: add Azure load balancer migration --- docs/docs/reference/migration.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/docs/reference/migration.md b/docs/docs/reference/migration.md index 9fa68768dc..b680f2b7bd 100644 --- a/docs/docs/reference/migration.md +++ b/docs/docs/reference/migration.md @@ -8,11 +8,11 @@ Use [`constellation config migrate`](./cli.md#constellation-config-migrate) to a ### Azure -* To allow seamless upgrades on Azure when Kubernetes services of type `LoadBalancer` are deployed, we changed the target - load balancer in which the `cloud-controller-maanger` create the load balancing rules. Instead of using the load balancer we - create and maintain in the CLI's Terraform code, the `cloud-controller-mananger` now creates its own load balancer in Azure. - If inside your Constellation there are services of type `LoadBalancer`, please remove them before the upgrade and re-apply them - afterwards. +* To allow seamless upgrades on Azure when Kubernetes services of type `LoadBalancer` are deployed, the target + load balancer in which the `cloud-controller-maanger` created the load balancing rules was changed. Instead of using the load balancer, + created and maintained by the CLI's Terraform code, the `cloud-controller-mananger` now creates its own load balancer in Azure. + If inside your Constellation there are services of type `LoadBalancer`, please remove them before the upgrade and re-apply them + afterward. ## Migrating from Azure's service principal authentication to managed identity authentication (during the upgrade to Constellation v2.8.0) From cf686d6e32c536ceb6fa2b7b30fb4ef1f2a13d8e Mon Sep 17 00:00:00 2001 From: Leonard Cohnen Date: Wed, 9 Oct 2024 11:02:26 +0200 Subject: [PATCH 6/6] fixup! fixup! docs: add Azure load balancer migration --- docs/docs/reference/migration.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/docs/reference/migration.md b/docs/docs/reference/migration.md index b680f2b7bd..49cbde7021 100644 --- a/docs/docs/reference/migration.md +++ b/docs/docs/reference/migration.md @@ -9,9 +9,9 @@ Use [`constellation config migrate`](./cli.md#constellation-config-migrate) to a ### Azure * To allow seamless upgrades on Azure when Kubernetes services of type `LoadBalancer` are deployed, the target - load balancer in which the `cloud-controller-maanger` created the load balancing rules was changed. Instead of using the load balancer, - created and maintained by the CLI's Terraform code, the `cloud-controller-mananger` now creates its own load balancer in Azure. - If inside your Constellation there are services of type `LoadBalancer`, please remove them before the upgrade and re-apply them + load balancer in which the `cloud-controller-manager` creates load balancing rules was changed. Instead of using the load balancer + created and maintained by the CLI's Terraform code, the `cloud-controller-manager` now creates its own load balancer in Azure. + If your Constellation has services of type `LoadBalancer`, please remove them before the upgrade and re-apply them afterward.