From 10b5aa3a8f4e92b3d9256c3d770b2fde7cfda8ce Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 9 Oct 2024 11:16:54 +0800 Subject: [PATCH] [PD] add option to wait a certain time before start pd (#5696) (#5756) Signed-off-by: ti-chi-bot Co-authored-by: Xiaomou Co-authored-by: csuzhangxc --- docs/api-references/docs.md | 12 ++++++++++++ manifests/crd.yaml | 3 +++ manifests/crd/v1/pingcap.com_tidbclusters.yaml | 3 +++ manifests/crd/v1beta1/pingcap.com_tidbclusters.yaml | 2 ++ manifests/crd_v1beta1.yaml | 2 ++ pkg/apis/pingcap/v1alpha1/openapi_generated.go | 7 +++++++ pkg/apis/pingcap/v1alpha1/tidbcluster.go | 8 ++++++++ pkg/apis/pingcap/v1alpha1/types.go | 5 +++++ pkg/manager/member/startscript/v2/pd_start_script.go | 5 +++++ .../pd_start_script_with_wait_for_ip_match_test.go | 12 ++++++++++++ 10 files changed, 59 insertions(+) diff --git a/docs/api-references/docs.md b/docs/api-references/docs.md index b22cd92604..653d02d39d 100644 --- a/docs/api-references/docs.md +++ b/docs/api-references/docs.md @@ -11867,6 +11867,18 @@ int +initWaitTime
+ +int + + + +

Wait time before pd get started. This wait time is to allow the new DNS record to propagate, +ensuring that the PD DNS resolves to the same IP address as the pod.

+ + + + spareVolReplaceReplicas
int32 diff --git a/manifests/crd.yaml b/manifests/crd.yaml index 5cfafd3aa2..5770d6db42 100644 --- a/manifests/crd.yaml +++ b/manifests/crd.yaml @@ -22399,6 +22399,9 @@ spec: - name type: object type: array + initWaitTime: + default: 0 + type: integer labels: additionalProperties: type: string diff --git a/manifests/crd/v1/pingcap.com_tidbclusters.yaml b/manifests/crd/v1/pingcap.com_tidbclusters.yaml index 6c03c7855f..722cf517cd 100644 --- a/manifests/crd/v1/pingcap.com_tidbclusters.yaml +++ b/manifests/crd/v1/pingcap.com_tidbclusters.yaml @@ -4935,6 +4935,9 @@ spec: - name type: object type: array + initWaitTime: + default: 0 + type: integer labels: additionalProperties: type: string diff --git a/manifests/crd/v1beta1/pingcap.com_tidbclusters.yaml b/manifests/crd/v1beta1/pingcap.com_tidbclusters.yaml index f4f9148b7b..28883e63b4 100644 --- a/manifests/crd/v1beta1/pingcap.com_tidbclusters.yaml +++ b/manifests/crd/v1beta1/pingcap.com_tidbclusters.yaml @@ -4929,6 +4929,8 @@ spec: - name type: object type: array + initWaitTime: + type: integer labels: additionalProperties: type: string diff --git a/manifests/crd_v1beta1.yaml b/manifests/crd_v1beta1.yaml index 0f2b83374f..6cd0f6fba4 100644 --- a/manifests/crd_v1beta1.yaml +++ b/manifests/crd_v1beta1.yaml @@ -22366,6 +22366,8 @@ spec: - name type: object type: array + initWaitTime: + type: integer labels: additionalProperties: type: string diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go index 71572c4ddb..a02e128ce6 100644 --- a/pkg/apis/pingcap/v1alpha1/openapi_generated.go +++ b/pkg/apis/pingcap/v1alpha1/openapi_generated.go @@ -6252,6 +6252,13 @@ func schema_pkg_apis_pingcap_v1alpha1_PDSpec(ref common.ReferenceCallback) commo Format: "int32", }, }, + "initWaitTime": { + SchemaProps: spec.SchemaProps{ + Description: "Wait time before pd get started. This wait time is to allow the new DNS record to propagate, ensuring that the PD DNS resolves to the same IP address as the pod.", + Type: []string{"integer"}, + Format: "int32", + }, + }, "spareVolReplaceReplicas": { SchemaProps: spec.SchemaProps{ Description: "The default number of spare replicas to scale up when using VolumeReplace feature. In multi-az deployments with topology spread constraints you may need to set this to number of zones to avoid zone skew after volume replace (total replicas always whole multiples of zones). Optional: Defaults to 1", diff --git a/pkg/apis/pingcap/v1alpha1/tidbcluster.go b/pkg/apis/pingcap/v1alpha1/tidbcluster.go index 7e0da7e952..2c2a2148cb 100644 --- a/pkg/apis/pingcap/v1alpha1/tidbcluster.go +++ b/pkg/apis/pingcap/v1alpha1/tidbcluster.go @@ -46,6 +46,7 @@ const ( // shutdown a TiCDC pod. defaultTiCDCGracefulShutdownTimeout = 10 * time.Minute defaultPDStartTimeout = 30 + defaultPDInitWaitTime = 0 // the latest version versionLatest = "latest" @@ -1282,3 +1283,10 @@ func (tc *TidbCluster) PDStartTimeout() int { } return defaultPDStartTimeout } + +func (tc *TidbCluster) PDInitWaitTime() int { + if tc.Spec.PD != nil && tc.Spec.PD.InitWaitTime != 0 { + return tc.Spec.PD.InitWaitTime + } + return defaultPDInitWaitTime +} diff --git a/pkg/apis/pingcap/v1alpha1/types.go b/pkg/apis/pingcap/v1alpha1/types.go index b31d8a620c..ea3c6b1a12 100644 --- a/pkg/apis/pingcap/v1alpha1/types.go +++ b/pkg/apis/pingcap/v1alpha1/types.go @@ -544,6 +544,11 @@ type PDSpec struct { // +kubebuilder:default=30 StartTimeout int `json:"startTimeout,omitempty"` + // Wait time before pd get started. This wait time is to allow the new DNS record to propagate, + // ensuring that the PD DNS resolves to the same IP address as the pod. + // +kubebuilder:default=0 + InitWaitTime int `json:"initWaitTime,omitempty"` + // The default number of spare replicas to scale up when using VolumeReplace feature. // In multi-az deployments with topology spread constraints you may need to set this to number of zones to avoid // zone skew after volume replace (total replicas always whole multiples of zones). diff --git a/pkg/manager/member/startscript/v2/pd_start_script.go b/pkg/manager/member/startscript/v2/pd_start_script.go index 8a9e5ad89f..61077ac389 100644 --- a/pkg/manager/member/startscript/v2/pd_start_script.go +++ b/pkg/manager/member/startscript/v2/pd_start_script.go @@ -38,6 +38,7 @@ type PDStartScriptModel struct { ExtraArgs string PDAddresses string PDStartTimeout int + PDInitWaitTime int } // RenderPDStartScript renders PD start script from TidbCluster @@ -78,6 +79,8 @@ func RenderPDStartScript(tc *v1alpha1.TidbCluster) (string, error) { m.PDStartTimeout = tc.PDStartTimeout() + m.PDInitWaitTime = tc.PDInitWaitTime() + waitForDnsNameIpMatchOnStartup := slices.Contains( tc.Spec.StartScriptV2FeatureFlags, v1alpha1.StartScriptV2FeatureFlagWaitForDnsNameIpMatch) @@ -100,6 +103,8 @@ const ( pdWaitForDnsIpMatchSubScript = ` componentDomain=${PD_DOMAIN} waitThreshold={{ .PDStartTimeout }} +initWaitTime={{ .PDInitWaitTime }} +sleep initWaitTime nsLookupCmd="dig ${componentDomain} A ${componentDomain} AAAA +search +short" ` + componentCommonWaitForDnsIpMatchScript diff --git a/pkg/manager/member/startscript/v2/pd_start_script_with_wait_for_ip_match_test.go b/pkg/manager/member/startscript/v2/pd_start_script_with_wait_for_ip_match_test.go index 7d490cf9b1..1a02afeaab 100644 --- a/pkg/manager/member/startscript/v2/pd_start_script_with_wait_for_ip_match_test.go +++ b/pkg/manager/member/startscript/v2/pd_start_script_with_wait_for_ip_match_test.go @@ -58,6 +58,8 @@ PD_POD_NAME=${POD_NAME:-$HOSTNAME} PD_DOMAIN=${PD_POD_NAME}.start-script-test-pd-peer.start-script-test-ns.svc componentDomain=${PD_DOMAIN} waitThreshold=30 +initWaitTime=0 +sleep initWaitTime nsLookupCmd="dig ${componentDomain} A ${componentDomain} AAAA +search +short" elapseTime=0 @@ -172,6 +174,8 @@ PD_POD_NAME=${POD_NAME:-$HOSTNAME} PD_DOMAIN=${PD_POD_NAME}.start-script-test-pd-peer.start-script-test-ns.svc componentDomain=${PD_DOMAIN} waitThreshold=30 +initWaitTime=0 +sleep initWaitTime nsLookupCmd="dig ${componentDomain} A ${componentDomain} AAAA +search +short" elapseTime=0 @@ -286,6 +290,8 @@ PD_POD_NAME=${POD_NAME:-$HOSTNAME} PD_DOMAIN=${PD_POD_NAME}.start-script-test-pd-peer.start-script-test-ns.svc componentDomain=${PD_DOMAIN} waitThreshold=30 +initWaitTime=0 +sleep initWaitTime nsLookupCmd="dig ${componentDomain} A ${componentDomain} AAAA +search +short" elapseTime=0 @@ -400,6 +406,8 @@ PD_POD_NAME=${POD_NAME:-$HOSTNAME} PD_DOMAIN=${PD_POD_NAME}.start-script-test-pd-peer.start-script-test-ns.svc.cluster-1.com componentDomain=${PD_DOMAIN} waitThreshold=30 +initWaitTime=0 +sleep initWaitTime nsLookupCmd="dig ${componentDomain} A ${componentDomain} AAAA +search +short" elapseTime=0 @@ -515,6 +523,8 @@ PD_POD_NAME=${POD_NAME:-$HOSTNAME} PD_DOMAIN=${PD_POD_NAME}.start-script-test-pd-peer.start-script-test-ns.svc componentDomain=${PD_DOMAIN} waitThreshold=30 +initWaitTime=0 +sleep initWaitTime nsLookupCmd="dig ${componentDomain} A ${componentDomain} AAAA +search +short" elapseTime=0 @@ -630,6 +640,8 @@ PD_POD_NAME=${POD_NAME:-$HOSTNAME} PD_DOMAIN=${PD_POD_NAME}.start-script-test-pd-peer.start-script-test-ns.svc.cluster-1.com componentDomain=${PD_DOMAIN} waitThreshold=30 +initWaitTime=0 +sleep initWaitTime nsLookupCmd="dig ${componentDomain} A ${componentDomain} AAAA +search +short" elapseTime=0