diff --git a/api/v1beta1/kafkacluster_types.go b/api/v1beta1/kafkacluster_types.go index 1badd3aa76..cfef6325c9 100644 --- a/api/v1beta1/kafkacluster_types.go +++ b/api/v1beta1/kafkacluster_types.go @@ -137,6 +137,16 @@ type RollingUpgradeConfig struct { // distinct broker replicas with either offline replicas or out of sync replicas and the number of alerts triggered by // alerts with 'rollingupgrade' FailureThreshold int `json:"failureThreshold"` + + // ConcurrentBrokerRestartsAllowed controls how many brokers can be restarted in parallel during a rolling upgrade. If + // it is set to a value greater than 1, the operator will restart up to that amount of brokers in parallel, if the + // brokers are within the same rack (as specified by "broker.rack" in broker read-only configs). Since using Kafka broker + // racks spreads out the replicas, we know that restarting multiple brokers in the same rack will not cause more than + // 1/Nth of the replicas of a topic-partition to be unavailable at the same time, where N is the number of racks used. + // This is a safe way to speed up the rolling upgrade. Note that for the rack distribution explained above, Cruise Control + // requires `com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal` to be configured. + // +optional + ConcurrentBrokerRestartsAllowed int `json:"concurrentBrokerRestartsAllowed,omitempty"` } // DisruptionBudget defines the configuration for PodDisruptionBudget where the workload is managed by the kafka-operator diff --git a/charts/kafka-operator/templates/crds.yaml b/charts/kafka-operator/templates/crds.yaml index 9368b81178..9938579986 100644 --- a/charts/kafka-operator/templates/crds.yaml +++ b/charts/kafka-operator/templates/crds.yaml @@ -21689,6 +21689,21 @@ spec: description: RollingUpgradeConfig defines the desired config of the RollingUpgrade properties: + concurrentBrokerRestartsAllowed: + description: ConcurrentBrokerRestartsAllowed controls how many + brokers can be restarted in parallel during a rolling upgrade. + If it is set to a value greater than 1, the operator will restart + up to that amount of brokers in parallel, if the brokers are + within the same rack (as specified by "broker.rack" in broker + read-only configs). Since using Kafka broker racks spreads out + the replicas, we know that restarting multiple brokers in the + same rack will not cause more than 1/Nth of the replicas of + a topic-partition to be unavailable at the same time, where + N is the number of racks used. This is a safe way to speed up + the rolling upgrade. Note that for the rack distribution explained + above, Cruise Control requires `com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal` + to be configured. + type: integer failureThreshold: description: FailureThreshold controls how many failures the cluster can tolerate during a rolling upgrade. Once the number of failures diff --git a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml index b09d32ff4c..dd3e2b6d0a 100644 --- a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml +++ b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml @@ -21526,6 +21526,21 @@ spec: description: RollingUpgradeConfig defines the desired config of the RollingUpgrade properties: + concurrentBrokerRestartsAllowed: + description: ConcurrentBrokerRestartsAllowed controls how many + brokers can be restarted in parallel during a rolling upgrade. + If it is set to a value greater than 1, the operator will restart + up to that amount of brokers in parallel, if the brokers are + within the same rack (as specified by "broker.rack" in broker + read-only configs). Since using Kafka broker racks spreads out + the replicas, we know that restarting multiple brokers in the + same rack will not cause more than 1/Nth of the replicas of + a topic-partition to be unavailable at the same time, where + N is the number of racks used. This is a safe way to speed up + the rolling upgrade. Note that for the rack distribution explained + above, Cruise Control requires `com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal` + to be configured. + type: integer failureThreshold: description: FailureThreshold controls how many failures the cluster can tolerate during a rolling upgrade. Once the number of failures