From 33e728da2214dac8aef14af25a5b760f303d7609 Mon Sep 17 00:00:00 2001 From: Loic Devulder Date: Fri, 13 Sep 2024 15:31:23 +0200 Subject: [PATCH] ci/cli: add full backup/restore test This test simulates migration of a Rancher Manager server. Signed-off-by: Loic Devulder --- .github/workflows/SCHEDULING.md | 3 +- .../cli-full-backup-restore-matrix.yaml | 53 ++ .github/workflows/master_e2e.yaml | 5 + .github/workflows/sub_cli.yaml | 26 +- .github/workflows/sub_test_choice.yaml | 4 + README.md | 1 + tests/Makefile | 13 +- tests/assets/restore.yaml | 2 +- tests/e2e/backup-restore_test.go | 211 +++++-- tests/e2e/install_test.go | 163 +---- tests/e2e/suite_test.go | 563 ++++++++++++++---- tests/e2e/upgrade_test.go | 22 +- 12 files changed, 696 insertions(+), 370 deletions(-) create mode 100644 .github/workflows/cli-full-backup-restore-matrix.yaml diff --git a/.github/workflows/SCHEDULING.md b/.github/workflows/SCHEDULING.md index 76bf18adb..21ea94a40 100644 --- a/.github/workflows/SCHEDULING.md +++ b/.github/workflows/SCHEDULING.md @@ -14,12 +14,13 @@ We try to spread the tests as best as we can to avoid SPOT issue as well as not | CLI Multicluster | Sunday | 5am | us-central1-b | | CLI Regression | Saturday | 11am | us-central1-c | | CLI Rancher Manager Devel | Sunday | 8am | us-central1-c | -| UI Rancher Manager Devel | Sunday | 12pm | us-central1-a | | CLI K3s Downgrade | Sunday | 2pm | us-central1-b | +| CLI Full backup/restore (migration) | Sunday | 4pm | us-central1-c | | UI K3s | Monday to Saturday | 2am | us-central1-a | | UI K3s Upgrade | Monday to Saturday | 4am | us-central1-a | | UI RKE2 | Monday to Saturday | 2am | us-central1-b | | UI RKE2 Upgrade | Monday to Saturday | 4am | us-central1-b | +| UI Rancher Manager Devel | Sunday | 12pm | us-central1-a | | Update tests description | All days | 11pm | us-central1 | **NOTE:** please note that the GitHub scheduler uses UTC and our GCP runners are deployed in `us-central1`, so UTC-5. diff --git a/.github/workflows/cli-full-backup-restore-matrix.yaml b/.github/workflows/cli-full-backup-restore-matrix.yaml new file mode 100644 index 000000000..9bc3dd034 --- /dev/null +++ b/.github/workflows/cli-full-backup-restore-matrix.yaml @@ -0,0 +1,53 @@ +# This workflow calls the master E2E workflow with custom variables +name: CLI-Full-Backup-Restore + +on: + workflow_dispatch: + inputs: + destroy_runner: + description: Destroy the auto-generated self-hosted runner + default: true + type: boolean + k8s_downstream_version: + description: Rancher cluster downstream version to use + default: '"v1.27.13+k3s1"' + type: string + k8s_upstream_version: + description: Rancher cluster upstream version to use + default: '"v1.27.13+k3s1"' + type: string + qase_run_id: + description: Qase run ID where the results will be reported + type: string + rancher_version: + description: Rancher Manager channel/version/head_version to use + default: '"stable/latest"' + type: string + schedule: + # Every Sunday at 4pm UTC (11am in us-central1) + - cron: '0 16 * * 0' + +jobs: + cli: + strategy: + fail-fast: false + max-parallel: 4 + matrix: + k8s_downstream_version: ${{ fromJSON(format('[{0}]', inputs.k8s_downstream_version || '"v1.30.4+k3s1","v1.30.4+rke2r1"')) }} + k8s_upstream_version: ${{ fromJSON(format('[{0}]', inputs.k8s_upstream_version || '"v1.30.4+k3s1","v1.30.4+rke2r1"')) }} + rancher_version: ${{ fromJSON(format('[{0}]', inputs.rancher_version || '"latest/devel/2.9"')) }} + uses: ./.github/workflows/master_e2e.yaml + secrets: + credentials: ${{ secrets.GCP_CREDENTIALS }} + pat_token: ${{ secrets.SELF_HOSTED_RUNNER_PAT_TOKEN }} + qase_api_token: ${{ secrets.QASE_API_TOKEN }} + with: + destroy_runner: ${{ github.event_name == 'schedule' && true || inputs.destroy_runner }} + full_backup_restore: true + k8s_downstream_version: ${{ matrix.k8s_downstream_version }} + k8s_upstream_version: ${{ matrix.k8s_upstream_version }} + node_number: 3 + qase_run_id: ${{ github.event_name == 'schedule' && 'auto' || inputs.qase_run_id }} + rancher_version: ${{ matrix.rancher_version }} + test_type: cli + zone: us-central1-c diff --git a/.github/workflows/master_e2e.yaml b/.github/workflows/master_e2e.yaml index bc4a194d5..ac03ce08c 100644 --- a/.github/workflows/master_e2e.yaml +++ b/.github/workflows/master_e2e.yaml @@ -45,6 +45,10 @@ on: description: Force OS downgrade default: false type: boolean + full_backup_restore: + description: Test migration-like backup/restore functionality + default: false + type: boolean k8s_downstream_version: description: Rancher cluster downstream version to use type: string @@ -228,6 +232,7 @@ jobs: destroy_runner: ${{ inputs.destroy_runner }} elemental_ui_version: ${{ inputs.elemental_ui_version }} force_downgrade: ${{ inputs.force_downgrade }} + full_backup_restore: ${{ inputs.full_backup_restore }} k8s_downstream_version: ${{ inputs.k8s_downstream_version }} node_number: ${{ inputs.node_number }} operator_repo: ${{ inputs.operator_repo }} diff --git a/.github/workflows/sub_cli.yaml b/.github/workflows/sub_cli.yaml index 3232b192b..9fe6f2b0c 100644 --- a/.github/workflows/sub_cli.yaml +++ b/.github/workflows/sub_cli.yaml @@ -32,6 +32,9 @@ on: force_downgrade: required: true type: boolean + full_backup_restore: + required: true + type: boolean k8s_downstream_version: required: true type: string @@ -350,10 +353,25 @@ jobs: run: | cd tests && make e2e-upgrade-node && make e2e-check-app - - name: Test Backup/Restore Elemental resources with Rancher Manager + - name: Test Backup/Restore Rancher Manager/Elemental resources id: test_backup_restore + env: + CA_TYPE: ${{ inputs.ca_type }} + OPERATOR_REPO: ${{ inputs.operator_repo }} + PUBLIC_FQDN: ${{ inputs.public_fqdn }} + PUBLIC_DOMAIN: ${{ inputs.public_domain }} run: | - cd tests && make e2e-backup-restore && make e2e-check-app + cd tests + + # Run simple or full backup/restore test + if ${{ inputs.full_backup_restore == true }}; then + make e2e-full-backup-restore + else + make e2e-simple-backup-restore + fi + + # Check the installed application + make e2e-check-app - name: Extract ISO version id: iso_version @@ -426,8 +444,8 @@ jobs: env: OPERATOR_REPO: ${{ inputs.operator_repo }} # Don't test Operator uninstall if we want to keep the runner for debugging purposes - if: ${{ inputs.destroy_runner == true }} - run: cd tests && make e2e-uninstall-operator + if: ${{ inputs.destroy_runner == true && inputs.full_backup_restore == false }} + run: cd tests && make e2e-uninstall-operator && make e2e-check-app # This step must be called in each worklow that wants a summary! - name: Get logs and add summary diff --git a/.github/workflows/sub_test_choice.yaml b/.github/workflows/sub_test_choice.yaml index 8eaa47763..c68bdafa6 100644 --- a/.github/workflows/sub_test_choice.yaml +++ b/.github/workflows/sub_test_choice.yaml @@ -41,6 +41,9 @@ on: force_downgrade: required: true type: boolean + full_backup_restore: + required: true + type: boolean k8s_downstream_version: required: true type: string @@ -171,6 +174,7 @@ jobs: cluster_type: ${{ inputs.cluster_type }} destroy_runner: ${{ inputs.destroy_runner }} force_downgrade: ${{ inputs.force_downgrade }} + full_backup_restore: ${{ inputs.full_backup_restore }} k8s_downstream_version: ${{ inputs.k8s_downstream_version }} node_number: ${{ inputs.node_number }} operator_repo: ${{ inputs.operator_repo }} diff --git a/README.md b/README.md index ed2dd3762..416647c50 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ [![CLI-K3s-Downgrade](https://github.com/rancher/elemental/actions/workflows/cli-k3s-downgrade-matrix.yaml/badge.svg)](https://github.com/rancher/elemental/actions/workflows/cli-k3s-downgrade-matrix.yaml) [![CLI-K3s-Scalability](https://github.com/rancher/elemental/actions/workflows/cli-k3s-scalability-matrix.yaml/badge.svg)](https://github.com/rancher/elemental/actions/workflows/cli-k3s-scalability-matrix.yaml) [![CLI-K3s-SELinux](https://github.com/rancher/elemental/actions/workflows/cli-k3s-selinux-matrix.yaml/badge.svg)](https://github.com/rancher/elemental/actions/workflows/cli-k3s-selinux-matrix.yaml) +[![CLI-Full-Backup-Restore](https://github.com/rancher/elemental/actions/workflows/cli-full-backup-restore-matrix.yaml/badge.svg)](https://github.com/rancher/elemental/actions/workflows/cli-full-backup-restore-matrix.yaml) [![CLI-Multicluster](https://github.com/rancher/elemental/actions/workflows/cli-multicluster-matrix.yaml/badge.svg)](https://github.com/rancher/elemental/actions/workflows/cli-multicluster-matrix.yaml) [![CLI-Rancher-Manager-Head-2.7](https://github.com/rancher/elemental/actions/workflows/cli-rm-head-2.7-matrix.yaml/badge.svg)](https://github.com/rancher/elemental/actions/workflows/cli-rm-head-2.7-matrix.yaml) [![CLI-Regression](https://github.com/rancher/elemental/actions/workflows/cli-regression-matrix.yaml/badge.svg)](https://github.com/rancher/elemental/actions/workflows/cli-regression-matrix.yaml) diff --git a/tests/Makefile b/tests/Makefile index a3630efe7..481cce9da 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -61,15 +61,15 @@ e2e-airgap-rancher: deps e2e-bootstrap-node: deps ginkgo --timeout $(GINKGO_TIMEOUT)s --label-filter bootstrap -r -v ./e2e -e2e-backup-restore: deps - ginkgo --label-filter test-backup-restore -r -v ./e2e - e2e-check-app: deps ginkgo --label-filter check-app -r -v ./e2e e2e-configure-rancher: deps ginkgo --label-filter configure -r -v ./e2e +e2e-full-backup-restore: deps + ginkgo --label-filter test-full-backup-restore -r -v ./e2e + e2e-get-logs: deps ginkgo --label-filter logs -r -v ./e2e @@ -91,11 +91,14 @@ e2e-iso-image: deps e2e-multi-cluster: deps ginkgo --timeout $(GINKGO_TIMEOUT)s --label-filter multi-cluster -r -v ./e2e +e2e-prepare-archive: deps + ginkgo --label-filter prepare-archive -r -v ./e2e + e2e-reset: deps ginkgo --label-filter reset -r -v ./e2e -e2e-prepare-archive: deps - ginkgo --label-filter prepare-archive -r -v ./e2e +e2e-simple-backup-restore: deps + ginkgo --label-filter test-simple-backup-restore -r -v ./e2e e2e-ui-rancher: deps ginkgo --label-filter ui -r -v ./e2e diff --git a/tests/assets/restore.yaml b/tests/assets/restore.yaml index ff12c2475..7b8356cb3 100644 --- a/tests/assets/restore.yaml +++ b/tests/assets/restore.yaml @@ -7,4 +7,4 @@ metadata: spec: backupFilename: %BACKUP_FILE% deleteTimeoutSeconds: 10 - prune: true + prune: %PRUNE% diff --git a/tests/e2e/backup-restore_test.go b/tests/e2e/backup-restore_test.go index 6d6d4da64..9780bb6d8 100644 --- a/tests/e2e/backup-restore_test.go +++ b/tests/e2e/backup-restore_test.go @@ -15,16 +15,22 @@ limitations under the License. package e2e_test import ( + "os" + "os/exec" "strings" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/rancher-sandbox/ele-testhelpers/kubectl" - "github.com/rancher-sandbox/ele-testhelpers/rancher" "github.com/rancher-sandbox/ele-testhelpers/tools" ) +const ( + backupResourceName = "elemental-backup" + restoreResourceName = "elemental-restore" +) + var _ = Describe("E2E - Install Backup/Restore Operator", Label("install-backup-restore"), func() { // Create kubectl context // Default timeout is too small, so New() cannot be used @@ -38,64 +44,161 @@ var _ = Describe("E2E - Install Backup/Restore Operator", Label("install-backup- // Report to Qase testCaseID = 64 - // Default chart - chartRepo := "rancher-chart" + By("Installing rancher-backup-operator", func() { + InstallBackupOperator(k) + }) + }) +}) + +var _ = Describe("E2E - Test full Backup/Restore", Label("test-full-backup-restore"), func() { + // Create kubectl context + // Default timeout is too small, so New() cannot be used + k := &kubectl.Kubectl{ + Namespace: "", + PollTimeout: tools.SetTimeout(300 * time.Second), + PollInterval: 500 * time.Millisecond, + } + + var backupFile string + + It("Do a full backup/restore test", func() { + // TODO: use another case id for full backup/restore test + // Report to Qase + // testCaseID = 65 + + By("Adding a backup resource", func() { + err := kubectl.Apply(clusterNS, backupYaml) + Expect(err).To(Not(HaveOccurred())) + }) - By("Configuring Chart repository", func() { - // Set specific operator version if defined - if backupRestoreVersion != "" { - chartRepo = "https://github.com/rancher/backup-restore-operator/releases/download/" + backupRestoreVersion + By("Checking that the backup has been done", func() { + out, err := kubectl.RunWithoutErr("get", "backup", backupResourceName, + "-o", "jsonpath={.metadata.name}") + Expect(err).To(Not(HaveOccurred())) + Expect(out).To(ContainSubstring(backupResourceName)) + + // Wait for backup to be done + CheckBackupRestore("Done with backup") + }) + + By("Copying the backup file", func() { + // Get local storage path + localPath := GetBackupDir() + + // Get the backup file from the previous backup + file, err := kubectl.RunWithoutErr("get", "backup", backupResourceName, "-o", "jsonpath={.status.filename}") + Expect(err).To(Not(HaveOccurred())) + + // Share the filename across other functions + backupFile = file + + // Copy backup file + err = exec.Command("sudo", "cp", localPath+"/"+backupFile, ".").Run() + Expect(err).To(Not(HaveOccurred())) + }) + + By("Uninstalling K8s", func() { + if strings.Contains(k8sUpstreamVersion, "rke2") { + out, err := exec.Command("sudo", "/usr/local/bin/rke2-uninstall.sh").CombinedOutput() + Expect(err).To(Not(HaveOccurred()), out) } else { - RunHelmCmdWithRetry("repo", "add", chartRepo, "https://charts.rancher.io") - RunHelmCmdWithRetry("repo", "update") + out, err := exec.Command("k3s-uninstall.sh").CombinedOutput() + Expect(err).To(Not(HaveOccurred()), out) } }) + if strings.Contains(k8sUpstreamVersion, "rke2") { + By("Installing RKE2", func() { + InstallRKE2() + }) + + // Use the new Kube config + err := os.Setenv("KUBECONFIG", "/etc/rancher/rke2/rke2.yaml") + Expect(err).To(Not(HaveOccurred())) + + By("Starting RKE2", func() { + StartRKE2() + }) + + By("Waiting for RKE2 to be started", func() { + WaitForRKE2(k) + }) + + By("Installing local-path-provisionner", func() { + InstallLocalStorage(k) + }) + } else { + By("Installing K3s", func() { + InstallK3s() + }) + + // Use the new Kube config + err := os.Setenv("KUBECONFIG", "/etc/rancher/k3s/k3s.yaml") + Expect(err).To(Not(HaveOccurred())) + + By("Starting K3s", func() { + StartK3s() + }) + + By("Waiting for K3s to be started", func() { + WaitForK3s(k) + }) + } + By("Installing rancher-backup-operator", func() { - for _, chart := range []string{"rancher-backup-crd", "rancher-backup"} { - // Set the filename in chart if a custom version is defined - chartName := chart - if backupRestoreVersion != "" { - chartName = chart + "-" + strings.Trim(backupRestoreVersion, "v") + ".tgz" - } + InstallBackupOperator(k) + }) - // Global installation flags - flags := []string{ - "upgrade", "--install", chart, chartRepo + "/" + chartName, - "--namespace", "cattle-resources-system", - "--create-namespace", - "--wait", "--wait-for-jobs", - } + By("Copying backup file to restore", func() { + // Get new local storage path + localPath := GetBackupDir() - // Add specific options for the rancher-backup chart - if chart == "rancher-backup" { - flags = append(flags, - "--set", "persistence.enabled=true", - "--set", "persistence.storageClass=local-path", - ) - } + // Copy backup file + err := exec.Command("sudo", "cp", backupFile, localPath).Run() + Expect(err).To(Not(HaveOccurred())) + }) - // Install through Helm - RunHelmCmdWithRetry(flags...) + By("Adding a restore resource", func() { + // Set the backup file in the restore resource + err := tools.Sed("%BACKUP_FILE%", backupFile, restoreYaml) + Expect(err).To(Not(HaveOccurred())) - // Delay few seconds for all to be installed - time.Sleep(tools.SetTimeout(20 * time.Second)) - } + // "prune" option should be set to true here + err = tools.Sed("%PRUNE%", "false", restoreYaml) + Expect(err).To(Not(HaveOccurred())) + + // And apply + err = kubectl.Apply(clusterNS, restoreYaml) + Expect(err).To(Not(HaveOccurred())) }) - By("Waiting for rancher-backup-operator pod", func() { - // Wait for pod to be started - Eventually(func() error { - return rancher.CheckPod(k, [][]string{{"cattle-resources-system", "app.kubernetes.io/name=rancher-backup"}}) - }, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil()) + By("Checking that the restore has been done", func() { + // Wait until resources are available again + Eventually(func() string { + out, _ := kubectl.RunWithoutErr("get", "restore", restoreResourceName, + "-o", "jsonpath={.metadata.name}") + return out + }, tools.SetTimeout(5*time.Minute), 10*time.Second).Should(ContainSubstring(restoreResourceName)) + + // Wait for restore to be done + CheckBackupRestore("Done restoring") + }) + + By("Installing CertManager", func() { + InstallCertManager(k) + }) + + By("Installing Rancher Manager", func() { + InstallRancher(k) + }) + + By("Checking cluster state after restore", func() { + WaitCluster(clusterNS, clusterName) }) }) }) -var _ = Describe("E2E - Test Backup/Restore", Label("test-backup-restore"), func() { - backupResourceName := "elemental-backup" - restoreResourceName := "elemental-restore" - +var _ = Describe("E2E - Test simple Backup/Restore", Label("test-simple-backup-restore"), func() { It("Do a backup", func() { // Report to Qase testCaseID = 65 @@ -111,13 +214,8 @@ var _ = Describe("E2E - Test Backup/Restore", Label("test-backup-restore"), func Expect(err).To(Not(HaveOccurred())) Expect(out).To(ContainSubstring(backupResourceName)) - // Check operator logs - Eventually(func() string { - out, _ := kubectl.RunWithoutErr("logs", "-l app.kubernetes.io/name=rancher-backup", - "--tail=-1", "--since=5m", - "--namespace", "cattle-resources-system") - return out - }, tools.SetTimeout(5*time.Minute), 10*time.Second).Should(ContainSubstring("Done with backup")) + // Wait for backup to be done + CheckBackupRestore("Done with backup") }) }) @@ -150,6 +248,10 @@ var _ = Describe("E2E - Test Backup/Restore", Label("test-backup-restore"), func err = tools.Sed("%BACKUP_FILE%", backupFile, restoreYaml) Expect(err).To(Not(HaveOccurred())) + // "prune" option should be set to true here + err = tools.Sed("%PRUNE%", "true", restoreYaml) + Expect(err).To(Not(HaveOccurred())) + // And apply err = kubectl.Apply(clusterNS, restoreYaml) Expect(err).To(Not(HaveOccurred())) @@ -163,13 +265,8 @@ var _ = Describe("E2E - Test Backup/Restore", Label("test-backup-restore"), func return out }, tools.SetTimeout(5*time.Minute), 10*time.Second).Should(ContainSubstring(restoreResourceName)) - // Check operator logs - Eventually(func() string { - out, _ := kubectl.RunWithoutErr("logs", "-l app.kubernetes.io/name=rancher-backup", - "--tail=-1", "--since=5m", - "--namespace", "cattle-resources-system") - return out - }, tools.SetTimeout(5*time.Minute), 10*time.Second).Should(ContainSubstring("Done restoring")) + // Wait for restore to be done + CheckBackupRestore("Done restoring") }) By("Checking cluster state after restore", func() { diff --git a/tests/e2e/install_test.go b/tests/e2e/install_test.go index 87489f248..6c887634f 100644 --- a/tests/e2e/install_test.go +++ b/tests/e2e/install_test.go @@ -23,7 +23,6 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/rancher-sandbox/ele-testhelpers/kubectl" - "github.com/rancher-sandbox/ele-testhelpers/rancher" "github.com/rancher-sandbox/ele-testhelpers/tools" ) @@ -61,21 +60,7 @@ var _ = Describe("E2E - Install Rancher Manager", Label("install"), func() { testCaseID = 60 By("Installing RKE2", func() { - // Get RKE2 installation script - fileName := "rke2-install.sh" - Eventually(func() error { - return tools.GetFileFromURL("https://get.rke2.io", fileName, true) - }, tools.SetTimeout(2*time.Minute), 10*time.Second).ShouldNot(HaveOccurred()) - - // Retry in case of (sporadic) failure... - count := 1 - Eventually(func() error { - // Execute RKE2 installation - out, err := exec.Command("sudo", "--preserve-env=INSTALL_RKE2_VERSION", "sh", fileName).CombinedOutput() - GinkgoWriter.Printf("RKE2 installation loop %d:\n%s\n", count, out) - count++ - return err - }, tools.SetTimeout(2*time.Minute), 5*time.Second).Should(BeNil()) + InstallRKE2() }) if clusterType == "hardened" { @@ -86,77 +71,22 @@ var _ = Describe("E2E - Install Rancher Manager", Label("install"), func() { } By("Starting RKE2", func() { - // Copy config file, this allows custom configuration for RKE2 installation - // NOTE: CopyFile cannot be used, as we need root permissions for this file - err := exec.Command("sudo", "mkdir", "-p", "/etc/rancher/rke2").Run() - Expect(err).To(Not(HaveOccurred())) - err = exec.Command("sudo", "cp", configRKE2Yaml, "/etc/rancher/rke2/config.yaml").Run() - Expect(err).To(Not(HaveOccurred())) - - // Activate and start RKE2 - err = exec.Command("sudo", "systemctl", "enable", "--now", "rke2-server.service").Run() - Expect(err).To(Not(HaveOccurred())) - - // Delay few seconds before checking - time.Sleep(tools.SetTimeout(20 * time.Second)) - - // Set kubectl command and KUBECONFIG variable - err = exec.Command("sudo", "ln", "-s", "/var/lib/rancher/rke2/bin/kubectl", "/usr/local/bin/kubectl").Run() - Expect(err).To(Not(HaveOccurred())) - err = os.Setenv("KUBECONFIG", "/etc/rancher/rke2/rke2.yaml") - Expect(err).To(Not(HaveOccurred())) + StartRKE2() }) By("Waiting for RKE2 to be started", func() { - // Wait for all pods to be started - checkList := [][]string{ - {"kube-system", "k8s-app=kube-dns"}, - {"kube-system", "app.kubernetes.io/name=rke2-ingress-nginx"}, - } - Eventually(func() error { - return rancher.CheckPod(k, checkList) - }, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil()) - - err := k.WaitLabelFilter("kube-system", "Ready", "rke2-ingress-nginx-controller", "app.kubernetes.io/name=rke2-ingress-nginx") - Expect(err).To(Not(HaveOccurred())) + WaitForRKE2(k) }) By("Installing local-path-provisionner", func() { - localPathNS := "kube-system" - kubectl.Apply(localPathNS, localStorageYaml) - - // Wait for all pods to be started - checkList := [][]string{ - {localPathNS, "app=local-path-provisioner"}, - } - Eventually(func() error { - return rancher.CheckPod(k, checkList) - }, tools.SetTimeout(2*time.Minute), 30*time.Second).Should(BeNil()) + InstallLocalStorage(k) }) } else { // Report to Qase testCaseID = 59 By("Installing K3s", func() { - // Get K3s installation script - fileName := "k3s-install.sh" - Eventually(func() error { - return tools.GetFileFromURL("https://get.k3s.io", fileName, true) - }, tools.SetTimeout(2*time.Minute), 10*time.Second).ShouldNot(HaveOccurred()) - - // Set command and arguments - installCmd := exec.Command("sh", fileName) - installCmd.Env = append(os.Environ(), "INSTALL_K3S_EXEC=--disable metrics-server") - - // Retry in case of (sporadic) failure... - count := 1 - Eventually(func() error { - // Execute K3s installation - out, err := installCmd.CombinedOutput() - GinkgoWriter.Printf("K3s installation loop %d:\n%s\n", count, out) - count++ - return err - }, tools.SetTimeout(2*time.Minute), 5*time.Second).Should(BeNil()) + InstallK3s() }) if clusterType == "hardened" { @@ -167,24 +97,11 @@ var _ = Describe("E2E - Install Rancher Manager", Label("install"), func() { } By("Starting K3s", func() { - err := exec.Command("sudo", "systemctl", "start", "k3s").Run() - Expect(err).To(Not(HaveOccurred())) - - // Delay few seconds before checking - time.Sleep(tools.SetTimeout(20 * time.Second)) + StartK3s() }) By("Waiting for K3s to be started", func() { - // Wait for all pods to be started - checkList := [][]string{ - {"kube-system", "app=local-path-provisioner"}, - {"kube-system", "k8s-app=kube-dns"}, - {"kube-system", "app.kubernetes.io/name=traefik"}, - {"kube-system", "svccontroller.k3s.cattle.io/svcname=traefik"}, - } - Eventually(func() error { - return rancher.CheckPod(k, checkList) - }, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil()) + WaitForK3s(k) }) } @@ -208,32 +125,7 @@ var _ = Describe("E2E - Install Rancher Manager", Label("install"), func() { }) } else { By("Installing CertManager", func() { - RunHelmCmdWithRetry("repo", "add", "jetstack", "https://charts.jetstack.io") - RunHelmCmdWithRetry("repo", "update") - - // Set flags for cert-manager installation - flags := []string{ - "upgrade", "--install", "cert-manager", "jetstack/cert-manager", - "--namespace", "cert-manager", - "--create-namespace", - "--set", "installCRDs=true", - "--wait", "--wait-for-jobs", - } - - if clusterType == "hardened" { - flags = append(flags, "--version", certManagerVersion) - } - - RunHelmCmdWithRetry(flags...) - - checkList := [][]string{ - {"cert-manager", "app.kubernetes.io/component=controller"}, - {"cert-manager", "app.kubernetes.io/component=webhook"}, - {"cert-manager", "app.kubernetes.io/component=cainjector"}, - } - Eventually(func() error { - return rancher.CheckPod(k, checkList) - }, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil()) + InstallCertManager(k) }) } }) @@ -264,19 +156,7 @@ var _ = Describe("E2E - Install Rancher Manager", Label("install"), func() { Expect(err).To(Not(HaveOccurred())) } - err := rancher.DeployRancherManager(rancherHostname, rancherChannel, rancherVersion, rancherHeadVersion, caType, proxy) - Expect(err).To(Not(HaveOccurred())) - - // Wait for all pods to be started - checkList := [][]string{ - {"cattle-system", "app=rancher"}, - {"cattle-system", "app=rancher-webhook"}, - {"cattle-fleet-local-system", "app=fleet-agent"}, - {"cattle-provisioning-capi-system", "control-plane=controller-manager"}, - } - Eventually(func() error { - return rancher.CheckPod(k, checkList) - }, tools.SetTimeout(10*time.Minute), 30*time.Second).Should(BeNil()) + InstallRancher(k) // Check issuer for Private CA if caType == "private" { @@ -354,29 +234,8 @@ var _ = Describe("E2E - Install Rancher Manager", Label("install"), func() { // Report to Qase testCaseID = 62 - for _, chart := range []string{"elemental-operator-crds", "elemental-operator"} { - // Set flags for installation - flags := []string{"upgrade", "--install", chart, - operatorRepo + "/" + chart + "-chart", - "--namespace", "cattle-elemental-system", - "--create-namespace", - "--wait", "--wait-for-jobs", - } - - // TODO: maybe adding a dedicated variable for operator version instead? - // of using os2Test (this one should be kept for the OS image version) - // Variable operator_repo exists but does not exactly reflect operator's version - if strings.Contains(os2Test, "dev") { - flags = append(flags, "--devel") - } - - RunHelmCmdWithRetry(flags...) - } - - // Wait for pod to be started - Eventually(func() error { - return rancher.CheckPod(k, [][]string{{"cattle-elemental-system", "app=elemental-operator"}}) - }, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil()) + installOrder := []string{"elemental-operator-crds", "elemental-operator"} + InstallElementalOperator(k, installOrder, operatorRepo) }) } }) diff --git a/tests/e2e/suite_test.go b/tests/e2e/suite_test.go index 64027a97c..d0da3371a 100644 --- a/tests/e2e/suite_test.go +++ b/tests/e2e/suite_test.go @@ -106,122 +106,13 @@ var ( vmName string ) -/* -Wait for cluster to be in a stable state - - @param ns Namespace where the cluster is deployed - - @param cn Cluster resource name - - @returns Nothing, the function will fail through Ginkgo in case of issue -*/ -func WaitCluster(ns, cn string) { - type state struct { - conditionStatus string - conditionType string - } - - // List of conditions to check - states := []state{ - { - conditionStatus: "True", - conditionType: "AgentDeployed", - }, - { - conditionStatus: "True", - conditionType: "NoDiskPressure", - }, - { - conditionStatus: "True", - conditionType: "NoMemoryPressure", - }, - { - conditionStatus: "True", - conditionType: "Provisioned", - }, - { - conditionStatus: "True", - conditionType: "Ready", - }, - { - conditionStatus: "False", - conditionType: "Reconciling", - }, - { - conditionStatus: "False", - conditionType: "Stalled", - }, - { - conditionStatus: "True", - conditionType: "Updated", - }, - { - conditionStatus: "True", - conditionType: "Waiting", - }, - } - - // Check that the cluster is in Ready state (this means that it has been created) +func CheckBackupRestore(v string) { Eventually(func() string { - status, _ := kubectl.RunWithoutErr("get", "cluster.v1.provisioning.cattle.io", - "--namespace", ns, cn, - "-o", "jsonpath={.status.ready}") - return status - }, tools.SetTimeout(2*time.Duration(usedNodes)*time.Minute), 10*time.Second).Should(Equal("true")) - - // Check that all needed conditions are in the good state - for _, s := range states { - counter := 0 - - Eventually(func() string { - status, _ := kubectl.RunWithoutErr("get", "cluster.v1.provisioning.cattle.io", - "--namespace", ns, cn, - "-o", "jsonpath={.status.conditions[?(@.type==\""+s.conditionType+"\")].status}") - - if status != s.conditionStatus { - // Show the status in case of issue, easier to debug (but log after 10 different issues) - // NOTE: it's not perfect but it's mainly a way to inform that the cluster took time to came up - counter++ - if counter > 10 { - GinkgoWriter.Printf("!! Cluster status issue !! %s is %s instead of %s\n", - s.conditionType, status, s.conditionStatus) - - // Reset counter - counter = 0 - } - - // Check if rancher-system-agent.service has some issue - if s.conditionType == "Provisioned" || s.conditionType == "Ready" || s.conditionType == "Updated" { - msg := "error applying plan -- check rancher-system-agent.service logs on node for more information" - - // Extract the list of failed nodes - listIP, _ := kubectl.RunWithoutErr("get", "machine", - "--namespace", ns, - "-o", "jsonpath={.items[?(@.status.conditions[*].message==\""+msg+"\")].status.addresses[?(@.type==\"InternalIP\")].address}") - - // We can try to restart the rancher-system-agent service on the failing node - // because sometimes it can fail just because of a sporadic/timeout issue and a restart can fix it! - for _, ip := range strings.Fields(listIP) { - if tools.IsIPv4(ip) { - // Set 'client' to be able to access the node through SSH - cl := &tools.Client{ - Host: ip + ":22", - Username: userName, - Password: userPassword, - } - - // Log the workaround, could be useful - GinkgoWriter.Printf("!! rancher-system-agent issue !! Service has been restarted on node with IP %s\n", ip) - - // Restart rancher-system-agent service on the node - // NOTE: wait a little to be sure that all is restarted before continuing - RunSSHWithRetry(cl, "systemctl restart rancher-system-agent.service") - time.Sleep(tools.SetTimeout(15 * time.Second)) - } - } - } - } - - return status - }, tools.SetTimeout(2*time.Duration(usedNodes)*time.Minute), 10*time.Second).Should(Equal(s.conditionStatus)) - } + out, _ := kubectl.RunWithoutErr("logs", "-l app.kubernetes.io/name=rancher-backup", + "--tail=-1", "--since=5m", + "--namespace", "cattle-resources-system") + return out + }, tools.SetTimeout(5*time.Minute), 10*time.Second).Should(ContainSubstring(v)) } /* @@ -270,20 +161,6 @@ func CheckCreatedSelectorTemplate(ns, sn string) { }, tools.SetTimeout(3*time.Minute), 5*time.Second).Should(ContainSubstring(sn)) } -/* -Wait for OSVersion to be populated - - @param ns Namespace where the cluster is deployed - - @returns Nothing, the function will fail through Ginkgo in case of issue -*/ -func WaitForOSVersion(ns string) { - Eventually(func() string { - out, _ := kubectl.RunWithoutErr("get", "ManagedOSVersion", - "--namespace", ns, - "-o", "jsonpath={.items[*].metadata.name}") - return out - }, tools.SetTimeout(2*time.Minute), 5*time.Second).Should(Not(BeEmpty())) -} - /* Check SSH connection - @param cl Client (node) informations @@ -358,6 +235,24 @@ func DownloadBuiltISO(ns, seedName, filename string) { } } +/* +Get configured backup directory + - @returns Configured backup directory +*/ +func GetBackupDir() string { + claimName, err := kubectl.RunWithoutErr("get", "pod", "-l", "app.kubernetes.io/name=rancher-backup", + "--namespace", "cattle-resources-system", + "-o", "jsonpath={.items[*].spec.volumes[?(@.name==\"pv-storage\")].persistentVolumeClaim.claimName}") + Expect(err).To(Not(HaveOccurred())) + + out, err := kubectl.RunWithoutErr("get", "pv", + "--namespace", "cattle-resources-system", + "-o", "jsonpath={.items[?(@.spec.claimRef.name==\""+claimName+"\")].spec.local.path}") + Expect(err).To(Not(HaveOccurred())) + + return out +} + /* Get Elemental node information - @param hn Node hostname @@ -391,6 +286,207 @@ func GetNodeIP(hn string) string { return data.IP } +/* +Install rancher-backup operator + - @param k kubectl structure + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func InstallBackupOperator(k *kubectl.Kubectl) { + // Default chart + chartRepo := "rancher-chart" + + // Set specific operator version if defined + if backupRestoreVersion != "" { + chartRepo = "https://github.com/rancher/backup-restore-operator/releases/download/" + backupRestoreVersion + } else { + RunHelmCmdWithRetry("repo", "add", chartRepo, "https://charts.rancher.io") + RunHelmCmdWithRetry("repo", "update") + } + + for _, chart := range []string{"rancher-backup-crd", "rancher-backup"} { + // Set the filename in chart if a custom version is defined + chartName := chart + if backupRestoreVersion != "" { + chartName = chart + "-" + strings.Trim(backupRestoreVersion, "v") + ".tgz" + } + + // Global installation flags + flags := []string{ + "upgrade", "--install", chart, chartRepo + "/" + chartName, + "--namespace", "cattle-resources-system", + "--create-namespace", + "--wait", "--wait-for-jobs", + } + + // Add specific options for the rancher-backup chart + if chart == "rancher-backup" { + flags = append(flags, + "--set", "persistence.enabled=true", + "--set", "persistence.storageClass=local-path", + ) + } + + RunHelmCmdWithRetry(flags...) + + Eventually(func() error { + return rancher.CheckPod(k, [][]string{{"cattle-resources-system", "app.kubernetes.io/name=rancher-backup"}}) + }, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil()) + } +} + +/* +Install CertManager + - @param k kubectl structure + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func InstallCertManager(k *kubectl.Kubectl) { + RunHelmCmdWithRetry("repo", "add", "jetstack", "https://charts.jetstack.io") + RunHelmCmdWithRetry("repo", "update") + + // Set flags for cert-manager installation + flags := []string{ + "upgrade", "--install", "cert-manager", "jetstack/cert-manager", + "--namespace", "cert-manager", + "--create-namespace", + "--set", "installCRDs=true", + "--wait", "--wait-for-jobs", + } + + if clusterType == "hardened" { + flags = append(flags, "--version", certManagerVersion) + } + + RunHelmCmdWithRetry(flags...) + + checkList := [][]string{ + {"cert-manager", "app.kubernetes.io/component=controller"}, + {"cert-manager", "app.kubernetes.io/component=webhook"}, + {"cert-manager", "app.kubernetes.io/component=cainjector"}, + } + Eventually(func() error { + return rancher.CheckPod(k, checkList) + }, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil()) +} + +/* +Install Elemental operator + - @param k kubectl structure + - @param order Order of the chart installation, mainly useful for older versions + - @param repo Chart repository to use + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func InstallElementalOperator(k *kubectl.Kubectl, order []string, repo string) { + for _, chart := range order { + // Set flags for installation + flags := []string{"upgrade", "--install", chart, + repo + "/" + chart + "-chart", + "--namespace", "cattle-elemental-system", + "--create-namespace", + "--wait", "--wait-for-jobs", + } + + // TODO: maybe adding a dedicated variable for operator version instead? + // of using os2Test (this one should be kept for the OS image version) + // Variable operator_repo exists but does not exactly reflect operator's version + if strings.Contains(repo, "dev") { + flags = append(flags, "--devel") + } + + RunHelmCmdWithRetry(flags...) + } + + // Wait for pod to be started + Eventually(func() error { + return rancher.CheckPod(k, [][]string{{"cattle-elemental-system", "app=elemental-operator"}}) + }, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil()) +} + +/* +Install local storage + - @param k kubectl structure + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func InstallLocalStorage(k *kubectl.Kubectl) { + localPathNS := "kube-system" + kubectl.Apply(localPathNS, localStorageYaml) + + // Wait for all pods to be started + checkList := [][]string{ + {localPathNS, "app=local-path-provisioner"}, + } + Eventually(func() error { + return rancher.CheckPod(k, checkList) + }, tools.SetTimeout(2*time.Minute), 30*time.Second).Should(BeNil()) +} + +/* +Install K3s + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func InstallK3s() { + // Get K3s installation script + fileName := "k3s-install.sh" + Eventually(func() error { + return tools.GetFileFromURL("https://get.k3s.io", fileName, true) + }, tools.SetTimeout(2*time.Minute), 10*time.Second).ShouldNot(HaveOccurred()) + + // Set command and arguments + installCmd := exec.Command("sh", fileName) + installCmd.Env = append(os.Environ(), "INSTALL_K3S_EXEC=--disable metrics-server") + + // Retry in case of (sporadic) failure... + count := 1 + Eventually(func() error { + // Execute K3s installation + out, err := installCmd.CombinedOutput() + GinkgoWriter.Printf("K3s installation loop %d:\n%s\n", count, out) + count++ + return err + }, tools.SetTimeout(2*time.Minute), 5*time.Second).Should(BeNil()) +} + +/* +Install Rancher Manager + - @param k kubectl structure + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func InstallRancher(k *kubectl.Kubectl) { + err := rancher.DeployRancherManager(rancherHostname, rancherChannel, rancherVersion, rancherHeadVersion, caType, proxy) + Expect(err).To(Not(HaveOccurred())) + + checkList := [][]string{ + {"cattle-system", "app=rancher"}, + {"cattle-system", "app=rancher-webhook"}, + {"cattle-fleet-local-system", "app=fleet-agent"}, + {"cattle-provisioning-capi-system", "control-plane=controller-manager"}, + } + Eventually(func() error { + return rancher.CheckPod(k, checkList) + }, tools.SetTimeout(10*time.Minute), 30*time.Second).Should(BeNil()) +} + +/* +Install RKE2 + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func InstallRKE2() { + // Get RKE2 installation script + fileName := "rke2-install.sh" + Eventually(func() error { + return tools.GetFileFromURL("https://get.rke2.io", fileName, true) + }, tools.SetTimeout(2*time.Minute), 10*time.Second).ShouldNot(HaveOccurred()) + + // Retry in case of (sporadic) failure... + count := 1 + Eventually(func() error { + // Execute RKE2 installation + out, err := exec.Command("sudo", "--preserve-env=INSTALL_RKE2_VERSION", "sh", fileName).CombinedOutput() + GinkgoWriter.Printf("RKE2 installation loop %d:\n%s\n", count, out) + count++ + return err + }, tools.SetTimeout(2*time.Minute), 5*time.Second).Should(BeNil()) +} + /* Execute RunHelmBinaryWithCustomErr within a loop with timeout - @param s options to pass to RunHelmBinaryWithCustomErr command @@ -420,6 +516,215 @@ func RunSSHWithRetry(cl *tools.Client, cmd string) string { return out } +/* +Start K3s + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func StartK3s() { + err := exec.Command("sudo", "systemctl", "start", "k3s").Run() + Expect(err).To(Not(HaveOccurred())) +} + +/* +Start RKE2 + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func StartRKE2() { + // Copy config file, this allows custom configuration for RKE2 installation + // NOTE: CopyFile cannot be used, as we need root permissions for this file + err := exec.Command("sudo", "mkdir", "-p", "/etc/rancher/rke2").Run() + Expect(err).To(Not(HaveOccurred())) + err = exec.Command("sudo", "cp", configRKE2Yaml, "/etc/rancher/rke2/config.yaml").Run() + Expect(err).To(Not(HaveOccurred())) + + // Activate and start RKE2 + err = exec.Command("sudo", "systemctl", "enable", "--now", "rke2-server.service").Run() + Expect(err).To(Not(HaveOccurred())) + + err = exec.Command("sudo", "ln", "-s", "/var/lib/rancher/rke2/bin/kubectl", "/usr/local/bin/kubectl").Run() + Expect(err).To(Not(HaveOccurred())) +} + +/* +Wait for cluster to be in a stable state + - @param ns Namespace where the cluster is deployed + - @param cn Cluster resource name + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func WaitCluster(ns, cn string) { + type state struct { + conditionStatus string + conditionType string + } + + // List of conditions to check + states := []state{ + { + conditionStatus: "True", + conditionType: "AgentDeployed", + }, + { + conditionStatus: "True", + conditionType: "NoDiskPressure", + }, + { + conditionStatus: "True", + conditionType: "NoMemoryPressure", + }, + { + conditionStatus: "True", + conditionType: "Provisioned", + }, + { + conditionStatus: "True", + conditionType: "Ready", + }, + { + conditionStatus: "False", + conditionType: "Reconciling", + }, + { + conditionStatus: "False", + conditionType: "Stalled", + }, + { + conditionStatus: "True", + conditionType: "Updated", + }, + { + conditionStatus: "True", + conditionType: "Waiting", + }, + } + + // Check that the cluster is in Ready state (this means that it has been created) + Eventually(func() string { + status, _ := kubectl.RunWithoutErr("get", "cluster.v1.provisioning.cattle.io", + "--namespace", ns, cn, + "-o", "jsonpath={.status.ready}") + return status + }, tools.SetTimeout(2*time.Duration(usedNodes)*time.Minute), 10*time.Second).Should(Equal("true")) + + // Check that all needed conditions are in the good state + for _, s := range states { + counter := 0 + + Eventually(func() string { + status, _ := kubectl.RunWithoutErr("get", "cluster.v1.provisioning.cattle.io", + "--namespace", ns, cn, + "-o", "jsonpath={.status.conditions[?(@.type==\""+s.conditionType+"\")].status}") + + if status != s.conditionStatus { + // Show the status in case of issue, easier to debug (but log after 10 different issues) + // NOTE: it's not perfect but it's mainly a way to inform that the cluster took time to came up + counter++ + if counter > 10 { + GinkgoWriter.Printf("!! Cluster status issue !! %s is %s instead of %s\n", + s.conditionType, status, s.conditionStatus) + + // Reset counter + counter = 0 + } + + // Check if rancher-system-agent.service has some issue + if s.conditionType == "Provisioned" || s.conditionType == "Ready" || s.conditionType == "Updated" { + msg := "error applying plan -- check rancher-system-agent.service logs on node for more information" + + // Extract the list of failed nodes + listIP, _ := kubectl.RunWithoutErr("get", "machine", + "--namespace", ns, + "-o", "jsonpath={.items[?(@.status.conditions[*].message==\""+msg+"\")].status.addresses[?(@.type==\"InternalIP\")].address}") + + // We can try to restart the rancher-system-agent service on the failing node + // because sometimes it can fail just because of a sporadic/timeout issue and a restart can fix it! + for _, ip := range strings.Fields(listIP) { + if tools.IsIPv4(ip) { + // Set 'client' to be able to access the node through SSH + cl := &tools.Client{ + Host: ip + ":22", + Username: userName, + Password: userPassword, + } + + // Log the workaround, could be useful + GinkgoWriter.Printf("!! rancher-system-agent issue !! Service has been restarted on node with IP %s\n", ip) + + // Restart rancher-system-agent service on the node + // NOTE: wait a little to be sure that all is restarted before continuing + RunSSHWithRetry(cl, "systemctl restart rancher-system-agent.service") + time.Sleep(tools.SetTimeout(15 * time.Second)) + } + } + } + } + + return status + }, tools.SetTimeout(2*time.Duration(usedNodes)*time.Minute), 10*time.Second).Should(Equal(s.conditionStatus)) + } +} + +/* +Wait for K3s to start + - @param k kubectl structure + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func WaitForK3s(k *kubectl.Kubectl) { + // Delay before checking + // TODO: create and use a function that checks the real Status + // of the pod as well as the Ready field + time.Sleep(1 * time.Minute) + + checkList := [][]string{ + {"kube-system", "app=local-path-provisioner"}, + {"kube-system", "k8s-app=kube-dns"}, + {"kube-system", "app.kubernetes.io/name=traefik"}, + {"kube-system", "svccontroller.k3s.cattle.io/svcname=traefik"}, + } + Eventually(func() error { + return rancher.CheckPod(k, checkList) + }, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil()) +} + +/* +Wait for RKE2 to start + - @param k kubectl structure + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func WaitForRKE2(k *kubectl.Kubectl) { + err := os.Setenv("KUBECONFIG", "/etc/rancher/rke2/rke2.yaml") + Expect(err).To(Not(HaveOccurred())) + + // Delay before checking + // TODO: create and use a function that checks the real Status + // of the pod as well as the Ready field + time.Sleep(1 * time.Minute) + + checkList := [][]string{ + {"kube-system", "k8s-app=kube-dns"}, + {"kube-system", "app.kubernetes.io/name=rke2-ingress-nginx"}, + } + Eventually(func() error { + return rancher.CheckPod(k, checkList) + }, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil()) + + err = k.WaitLabelFilter("kube-system", "Ready", "rke2-ingress-nginx-controller", "app.kubernetes.io/name=rke2-ingress-nginx") + Expect(err).To(Not(HaveOccurred())) +} + +/* +Wait for OSVersion to be populated + - @param ns Namespace where the cluster is deployed + - @returns Nothing, the function will fail through Ginkgo in case of issue +*/ +func WaitForOSVersion(ns string) { + Eventually(func() string { + out, _ := kubectl.RunWithoutErr("get", "ManagedOSVersion", + "--namespace", ns, + "-o", "jsonpath={.items[*].metadata.name}") + return out + }, tools.SetTimeout(2*time.Minute), 5*time.Second).Should(Not(BeEmpty())) +} + func FailWithReport(message string, callerSkip ...int) { // Ensures the correct line numbers are reported Fail(message, callerSkip[0]+1) diff --git a/tests/e2e/upgrade_test.go b/tests/e2e/upgrade_test.go index 3e9590dca..0f202f390 100644 --- a/tests/e2e/upgrade_test.go +++ b/tests/e2e/upgrade_test.go @@ -78,27 +78,7 @@ var _ = Describe("E2E - Upgrading Elemental Operator", Label("upgrade-operator") upgradeOrder = []string{"elemental-operator", "elemental-operator-crds"} } - for _, chart := range upgradeOrder { - // Set flags for installation - flags := []string{"upgrade", "--install", chart, - operatorUpgrade + "/" + chart + "-chart", - "--namespace", "cattle-elemental-system", - "--create-namespace", - "--wait", "--wait-for-jobs", - } - - // TODO: maybe adding a dedicated variable for operator version instead? - if strings.Contains(operatorUpgrade, "dev") { - flags = append(flags, "--devel") - } - - RunHelmCmdWithRetry(flags...) - } - - // Wait for all pods to be started - Eventually(func() error { - return rancher.CheckPod(k, [][]string{{"cattle-elemental-system", "app=elemental-operator"}}) - }, tools.SetTimeout(4*time.Minute), 30*time.Second).Should(BeNil()) + InstallElementalOperator(k, upgradeOrder, operatorUpgrade) }) })