From dab5a01f14b4ceebd86e71ff2ae902970db8ac06 Mon Sep 17 00:00:00 2001 From: Lucian Ilie Date: Thu, 22 Jun 2023 12:19:44 +0300 Subject: [PATCH] Add ConcurrentBrokerRestartCountPerRack to RollingUpgradeConfig --- api/v1beta1/kafkacluster_types.go | 12 ++++++++++++ charts/kafka-operator/templates/crds.yaml | 17 +++++++++++++++++ .../kafka.banzaicloud.io_kafkaclusters.yaml | 17 +++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/api/v1beta1/kafkacluster_types.go b/api/v1beta1/kafkacluster_types.go index a44d358bc6..e62eccf909 100644 --- a/api/v1beta1/kafkacluster_types.go +++ b/api/v1beta1/kafkacluster_types.go @@ -143,6 +143,18 @@ type RollingUpgradeConfig struct { // distinct broker replicas with either offline replicas or out of sync replicas and the number of alerts triggered by // alerts with 'rollingupgrade' FailureThreshold int `json:"failureThreshold"` + + // ConcurrentBrokerRestartCountPerRack controls how many brokers can be restarted in parallel during a rolling upgrade. If + // it is set to a value greater than 1, the operator will restart up to that amount of brokers in parallel, if the + // brokers are within the same rack (as specified by "broker.rack" in broker read-only configs). Since using Kafka broker + // racks spreads out the replicas, we know that restarting multiple brokers in the same rack will not cause more than + // 1/Nth of the replicas of a topic-partition to be unavailable at the same time, where N is the number of racks used. + // This is a safe way to speed up the rolling upgrade. Note that for the rack distribution explained above, Cruise Control + // requires `com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal` to be configured. Default value is 1. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:default=1 + // +optional + ConcurrentBrokerRestartCountPerRack int `json:"concurrentBrokerRestartsAllowed,omitempty"` } // DisruptionBudget defines the configuration for PodDisruptionBudget where the workload is managed by the kafka-operator diff --git a/charts/kafka-operator/templates/crds.yaml b/charts/kafka-operator/templates/crds.yaml index 9368b81178..aa2b6cb8a7 100644 --- a/charts/kafka-operator/templates/crds.yaml +++ b/charts/kafka-operator/templates/crds.yaml @@ -21689,6 +21689,23 @@ spec: description: RollingUpgradeConfig defines the desired config of the RollingUpgrade properties: + concurrentBrokerRestartsAllowed: + default: 1 + description: ConcurrentBrokerRestartCountPerRack controls how + many brokers can be restarted in parallel during a rolling upgrade. + If it is set to a value greater than 1, the operator will restart + up to that amount of brokers in parallel, if the brokers are + within the same rack (as specified by "broker.rack" in broker + read-only configs). Since using Kafka broker racks spreads out + the replicas, we know that restarting multiple brokers in the + same rack will not cause more than 1/Nth of the replicas of + a topic-partition to be unavailable at the same time, where + N is the number of racks used. This is a safe way to speed up + the rolling upgrade. Note that for the rack distribution explained + above, Cruise Control requires `com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal` + to be configured. Default value is 1. + minimum: 1 + type: integer failureThreshold: description: FailureThreshold controls how many failures the cluster can tolerate during a rolling upgrade. Once the number of failures diff --git a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml index b09d32ff4c..4512b33848 100644 --- a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml +++ b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml @@ -21526,6 +21526,23 @@ spec: description: RollingUpgradeConfig defines the desired config of the RollingUpgrade properties: + concurrentBrokerRestartsAllowed: + default: 1 + description: ConcurrentBrokerRestartCountPerRack controls how + many brokers can be restarted in parallel during a rolling upgrade. + If it is set to a value greater than 1, the operator will restart + up to that amount of brokers in parallel, if the brokers are + within the same rack (as specified by "broker.rack" in broker + read-only configs). Since using Kafka broker racks spreads out + the replicas, we know that restarting multiple brokers in the + same rack will not cause more than 1/Nth of the replicas of + a topic-partition to be unavailable at the same time, where + N is the number of racks used. This is a safe way to speed up + the rolling upgrade. Note that for the rack distribution explained + above, Cruise Control requires `com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal` + to be configured. Default value is 1. + minimum: 1 + type: integer failureThreshold: description: FailureThreshold controls how many failures the cluster can tolerate during a rolling upgrade. Once the number of failures