From 777a971806667311b7d12a69cff18be167279c7a Mon Sep 17 00:00:00 2001 From: Lucian Ilie Date: Thu, 22 Jun 2023 12:19:44 +0300 Subject: [PATCH 1/2] Add ConcurrentBrokerRestartCountPerRack to RollingUpgradeConfig --- api/v1beta1/kafkacluster_types.go | 12 ++++++++++++ charts/kafka-operator/templates/crds.yaml | 17 +++++++++++++++++ .../kafka.banzaicloud.io_kafkaclusters.yaml | 17 +++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/api/v1beta1/kafkacluster_types.go b/api/v1beta1/kafkacluster_types.go index a44d358bc..09c32a502 100644 --- a/api/v1beta1/kafkacluster_types.go +++ b/api/v1beta1/kafkacluster_types.go @@ -143,6 +143,18 @@ type RollingUpgradeConfig struct { // distinct broker replicas with either offline replicas or out of sync replicas and the number of alerts triggered by // alerts with 'rollingupgrade' FailureThreshold int `json:"failureThreshold"` + + // ConcurrentBrokerRestartCountPerRack controls how many brokers can be restarted in parallel during a rolling upgrade. If + // it is set to a value greater than 1, the operator will restart up to that amount of brokers in parallel, if the + // brokers are within the same rack (as specified by "broker.rack" in broker read-only configs). Since using Kafka broker + // racks spreads out the replicas, we know that restarting multiple brokers in the same rack will not cause more than + // 1/Nth of the replicas of a topic-partition to be unavailable at the same time, where N is the number of racks used. + // This is a safe way to speed up the rolling upgrade. Note that for the rack distribution explained above, Cruise Control + // requires `com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal` to be configured. Default value is 1. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:default=1 + // +optional + ConcurrentBrokerRestartCountPerRack int `json:"concurrentBrokerRestartCountPerRack,omitempty"` } // DisruptionBudget defines the configuration for PodDisruptionBudget where the workload is managed by the kafka-operator diff --git a/charts/kafka-operator/templates/crds.yaml b/charts/kafka-operator/templates/crds.yaml index 9368b8117..f554129a7 100644 --- a/charts/kafka-operator/templates/crds.yaml +++ b/charts/kafka-operator/templates/crds.yaml @@ -21689,6 +21689,23 @@ spec: description: RollingUpgradeConfig defines the desired config of the RollingUpgrade properties: + concurrentBrokerRestartCountPerRack: + default: 1 + description: ConcurrentBrokerRestartCountPerRack controls how + many brokers can be restarted in parallel during a rolling upgrade. + If it is set to a value greater than 1, the operator will restart + up to that amount of brokers in parallel, if the brokers are + within the same rack (as specified by "broker.rack" in broker + read-only configs). Since using Kafka broker racks spreads out + the replicas, we know that restarting multiple brokers in the + same rack will not cause more than 1/Nth of the replicas of + a topic-partition to be unavailable at the same time, where + N is the number of racks used. This is a safe way to speed up + the rolling upgrade. Note that for the rack distribution explained + above, Cruise Control requires `com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal` + to be configured. Default value is 1. + minimum: 1 + type: integer failureThreshold: description: FailureThreshold controls how many failures the cluster can tolerate during a rolling upgrade. Once the number of failures diff --git a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml index b09d32ff4..8ed2611c4 100644 --- a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml +++ b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml @@ -21526,6 +21526,23 @@ spec: description: RollingUpgradeConfig defines the desired config of the RollingUpgrade properties: + concurrentBrokerRestartCountPerRack: + default: 1 + description: ConcurrentBrokerRestartCountPerRack controls how + many brokers can be restarted in parallel during a rolling upgrade. + If it is set to a value greater than 1, the operator will restart + up to that amount of brokers in parallel, if the brokers are + within the same rack (as specified by "broker.rack" in broker + read-only configs). Since using Kafka broker racks spreads out + the replicas, we know that restarting multiple brokers in the + same rack will not cause more than 1/Nth of the replicas of + a topic-partition to be unavailable at the same time, where + N is the number of racks used. This is a safe way to speed up + the rolling upgrade. Note that for the rack distribution explained + above, Cruise Control requires `com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal` + to be configured. Default value is 1. + minimum: 1 + type: integer failureThreshold: description: FailureThreshold controls how many failures the cluster can tolerate during a rolling upgrade. Once the number of failures From f01c008259e193318dc91a8c5c75f505816f039d Mon Sep 17 00:00:00 2001 From: Lucian Ilie Date: Thu, 27 Jul 2023 18:17:58 +0300 Subject: [PATCH 2/2] empty commit to retrigger tests