Skip to content

Commit

Permalink
Removed Postgres upgrade code (12 to 15)
Browse files Browse the repository at this point in the history
Signed-off-by: Danny Zaken <[email protected]>
  • Loading branch information
dannyzaken committed Apr 1, 2024
1 parent e40e407 commit f530259
Showing 1 changed file with 0 additions and 322 deletions.
322 changes: 0 additions & 322 deletions pkg/system/phase2_creating.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,6 @@ func (r *Reconciler) ReconcilePhaseCreatingForMainClusters() error {

// create the db only if postgres secret is not given
if r.NooBaa.Spec.ExternalPgSecret == nil {
if err := r.UpgradePostgresDB(); err != nil {
return err
}

if err := r.ReconcileDB(); err != nil {
return err
}
Expand Down Expand Up @@ -564,17 +560,6 @@ func (r *Reconciler) SetDesiredCoreApp() error {

r.CoreApp.Spec.Template.Annotations["noobaa.io/configmap-hash"] = r.CoreAppConfig.Annotations["noobaa.io/configmap-hash"]

replicas := int32(1)
phase := r.NooBaa.Status.UpgradePhase
if phase == nbv1.UpgradePhasePrepare || phase == nbv1.UpgradePhaseMigrate {
replicas = int32(0)
}
pgUpdatePhase := r.NooBaa.Status.PostgresUpdatePhase
if pgUpdatePhase == nbv1.UpgradePhasePrepare || pgUpdatePhase == nbv1.UpgradePhaseUpgrade || pgUpdatePhase == nbv1.UpgradePhaseClean {
replicas = int32(0)
}
r.CoreApp.Spec.Replicas = &replicas

if util.KubeCheckQuiet(r.CaBundleConf) {
configMapVolumes := []corev1.Volume{{
Name: r.CaBundleConf.Name,
Expand Down Expand Up @@ -1131,13 +1116,6 @@ func (r *Reconciler) ReconcileDB() error {
return reconcileDbError
}

phase := r.NooBaa.Status.PostgresUpdatePhase
// during postgres upgrade, don't reconcile DB
if phase != nbv1.UpgradePhaseFinished && phase != nbv1.UpgradePhaseNone {
r.Logger.Infof("during postgres db upgrade")
return nil
}

result, reconcilePostgresError := r.reconcileObjectAndGetResult(r.NooBaaPostgresDB, r.SetDesiredNooBaaDB, false)
if reconcilePostgresError != nil {
return reconcilePostgresError
Expand Down Expand Up @@ -1203,306 +1181,6 @@ func (r *Reconciler) RestartDbPods() error {
return nil
}

// ReconcileSetDbImageAndInitCode changes postgres image and upgrade-db command script
func (r *Reconciler) ReconcileSetDbImageAndInitCode(targetImage string, initScript string, addInit bool) error {
r.Logger.Infof("ReconcileSetDbImageAndInitCode:: changing DB image: %s and init contatiners script: %s",
targetImage, initScript)
if err := r.ReconcileObject(r.NooBaaPostgresDB, func() error {
podSpec := &r.NooBaaPostgresDB.Spec.Template.Spec
podSpec.ServiceAccountName = "noobaa"
if addInit {
upgradeContainer := podSpec.InitContainers[0]
upgradeContainer.Name = "upgrade-db"
upgradeContainer.Command = []string{"sh", "-x", initScript}
upgradeContainer.Image = targetImage
podSpec.InitContainers = append(podSpec.InitContainers, upgradeContainer)
} else {
for i := range podSpec.InitContainers {
c := &podSpec.InitContainers[i]
if c.Name == "upgrade-db" {
c.Command = []string{"sh", "-x", initScript}
c.Image = targetImage
}
}
}
for i := range podSpec.Containers {
c := &podSpec.Containers[i]
if c.Name == "db" {
c.Image = targetImage
}
}
return nil
}); err != nil {
r.Logger.Errorf("got error on postgres STS reconcile %v", err)
return err
}
return nil
}

// HasUpgradeDbContainerFailed checks if postgres upgrade container failed
func (r *Reconciler) HasUpgradeDbContainerFailed(dbPod *corev1.Pod) string {
//when init container restart policy is Always, pod state will not be set to failed on container error.
//check for persistent error in container
for i := range dbPod.Status.InitContainerStatuses {
if dbPod.Status.InitContainerStatuses[i].Name == "upgrade-db" {
r.Logger.Infof("HasUpgradeDbContainerFailed: Checking state of upgrade-db container: %v", dbPod.Status.InitContainerStatuses[i].State)
if dbPod.Status.InitContainerStatuses[i].State.Terminated != nil {
return dbPod.Status.InitContainerStatuses[i].State.Terminated.Reason
}
if dbPod.Status.InitContainerStatuses[i].State.Waiting != nil {
return dbPod.Status.InitContainerStatuses[i].State.Waiting.Reason
}
if dbPod.Status.InitContainerStatuses[i].State.Running != nil {
return "Running"
}
}
}
return "NotFound"
}

// RevertPostgresUpgrade reverts the postgres version to original version after failed upgrade
func (r *Reconciler) RevertPostgresUpgrade() error {
r.Logger.Infof("RevertPostgresUpgrade reverting postgres upgrade to original database image")
if err := r.ReconcileSetDbImageAndInitCode(*r.NooBaa.Status.BeforeUpgradeDbImage, "/init/revertdb.sh", false); err != nil {
r.Logger.Errorf("got error on postgres STS reconcile %v", err)
return err
}
//since DB pod is in failed state need to restart it
restartError := r.RestartDbPods()
if restartError != nil {
r.Logger.Warn("Unable to restart db pods")
}
return nil
}

// UpgradePostgresDB upgrade postgres image to newer verision
func (r *Reconciler) UpgradePostgresDB() error {
phase := r.NooBaa.Status.PostgresUpdatePhase
if phase == nbv1.UpgradePhaseFinished || phase == nbv1.UpgradePhaseNone {
return nil
}

var err error = nil
hasInitContainerFailed := false
r.Logger.Infof("UpgradePostgresDB: current phase is %s", phase)
switch phase {

case nbv1.UpgradePhaseFailed:
if _, ok := r.NooBaa.Annotations["manual_upgrade_completed"]; ok {
r.Logger.Infof("UpgradePostgresDB: annotation for manual upgrade completed was set, marking upgrade as finished")
phase = nbv1.UpgradePhaseFinished
break
}
if _, ok := r.NooBaa.Annotations["retry_upgrade"]; !ok {
return nil
}
r.Logger.Infof("UpgradePostgresDB: annotation for retrying the upgrade was set, restarting upgrade")
fallthrough
case "":
sts := util.KubeObject(bundle.File_deploy_internal_statefulset_postgres_db_yaml).(*appsv1.StatefulSet)
sts.Name = "noobaa-db-pg"
sts.Namespace = options.Namespace
//postgres database doesn't exists, no need to upgrade.
if !util.KubeCheckQuiet(sts) {
r.Logger.Infof("UpgradePostgresDB: old STS doesn't exist - no need for upgrade")
phase = nbv1.UpgradePhaseNone
break
}

oldImage, ok := os.LookupEnv("NOOBAA_PSQL_12_IMAGE")
if !ok {
r.Logger.Errorf("UpgradePostgresDB: Missing critical env variable for upgrade - NOOBAA_PSQL_12_IMAGE")
return util.NewPersistentError("MissingEnvVariable",
"Missing critical env variable for pg upgrade - NOOBAA_PSQL_12_IMAGE")
}
r.Logger.Infof("UpgradePostgresDB: found ENV of pgsql version 12: %s", oldImage)
if *r.NooBaa.Spec.DBImage != os.Getenv("NOOBAA_DB_IMAGE") {
r.Logger.Infof("UpgradePostgresDB: NooBaa CR DB image: %s and operator DB image: %s are not the same, waiting...",
*r.NooBaa.Spec.DBImage, os.Getenv("NOOBAA_DB_IMAGE"))
return nil
}
r.Recorder.Eventf(r.NooBaa, corev1.EventTypeNormal, "DbUpgrade",
"Configuring DB pod to run dbdump of PostgreSQL data")
r.Logger.Infof("UpgradePostgresDB: setting phase to %s", nbv1.UpgradePhasePrepare)
phase = nbv1.UpgradePhasePrepare
r.NooBaa.Status.BeforeUpgradeDbImage = &oldImage

case nbv1.UpgradePhasePrepare:
if err := r.SetEndpointsDeploymentReplicas(0); err != nil {
r.Logger.Errorf("UpgradePostgresDB::got error on endpoints deployment reconcile %v", err)
return err
}
if err = r.ReconcileSetDbImageAndInitCode(*r.NooBaa.Status.BeforeUpgradeDbImage, "/init/dumpdb.sh", true); err != nil {
r.Logger.Errorf("UpgradePostgresDB: got error on postgres STS reconcile %v", err)
break
}
r.Logger.Infof("UpgradePostgresDB: restarting DB pods after updating pod's init contatiner for db-dump")
restartError := r.RestartDbPods() // make sure new pods will start
if restartError != nil {
r.Logger.Warnf("UpgradePostgresDB: Unable to restart db pods %v", restartError)
return restartError
}
r.Recorder.Eventf(r.NooBaa, corev1.EventTypeNormal, "DbUpgrade",
"Starting dump. Start running DB dump in version 12 format")
phase = nbv1.UpgradePhaseUpgrade

case nbv1.UpgradePhaseUpgrade:
dbPod := &corev1.Pod{}
dbPod.Name = "noobaa-db-pg-0"
dbPod.Namespace = r.NooBaaPostgresDB.Namespace
if !util.KubeCheckQuiet(dbPod) {
return nil
}
// make sure previous step has finished
if dbPod.ObjectMeta.DeletionTimestamp != nil {
r.Logger.Infof("UpgradePostgresDB: upgrade-db is not yet running, phase is: %s and deletion time stamp is %v",
dbPod.Status.Phase, dbPod.ObjectMeta.DeletionTimestamp)
return nil
}
status := r.HasUpgradeDbContainerFailed(dbPod)
if status == "PodInitializing" || status == "Running" {
r.Logger.Infof("UpgradePostgresDB: upgrade-db container didn't finished dump yet, waiting...")
return nil
}
if status == "CrashLoopBackOff" || status == "Error" || status == "NotFound" {
r.Logger.Errorf("UpgradePostgresDB: upgrade-db container failed dump DB. set to revert the image")
hasInitContainerFailed = true
break
}
r.Logger.Infof("UpgradePostgresDB: upgrade-db container didn't fail and finished dumping the old data [status=%s]", status)
if err = r.ReconcileSetDbImageAndInitCode(GetDesiredDBImage(r.NooBaa), "/init/upgradedb.sh", false); err != nil {
r.Logger.Errorf("UpgradePostgresDB: got error on postgres STS reconcile %v", err)
break
}
r.Logger.Infof("UpgradePostgresDB: restarting DB pods after updating pod's init contatiner for upgrade-db")
restartError := r.RestartDbPods() // make sure new pods will start
if restartError != nil {
r.Logger.Warn("UpgradePostgresDB: Unable to restart db pods")
}
r.Recorder.Eventf(r.NooBaa, corev1.EventTypeNormal, "DbUpgrade",
"Finished dump. Start running upgrade of DB data from version 12 format to version 15")
phase = nbv1.UpgradePhaseClean

case nbv1.UpgradePhaseClean:
dbPod := &corev1.Pod{}
dbPod.Name = "noobaa-db-pg-0"
dbPod.Namespace = r.NooBaaPostgresDB.Namespace
if !util.KubeCheckQuiet(dbPod) {
return nil
}
// make sure previous step has finished
if dbPod.ObjectMeta.DeletionTimestamp != nil {
r.Logger.Infof("UpgradePostgresDB: upgrade-db is not yet running, phase is: %s and deletion time stamp is %v",
dbPod.Status.Phase, dbPod.ObjectMeta.DeletionTimestamp)
return nil
}
r.Logger.Info("UpgradePostgresDB: DB pod is found")
status := r.HasUpgradeDbContainerFailed(dbPod)
if status == "PodInitializing" || status == "Running" {
r.Logger.Infof("UpgradePostgresDB: upgrade-db container didn't finished upgrading data, waiting...")
return nil
}
if status == "CrashLoopBackOff" || status == "Error" || status == "NotFound" {
r.Logger.Errorf("UpgradePostgresDB: upgrade-db container failed upgrading data. set to revert the image")
hasInitContainerFailed = true
break
}
r.Logger.Infof("UpgradePostgresDB: upgrade-db container didn't fail and finished moving data to new version [status=%s]", status)
if dbPod.Status.Phase != "Running" && dbPod.ObjectMeta.DeletionTimestamp == nil {
r.Logger.Infof("UpgradePostgresDB: upgrade-db is not yet running, phase is: %s and deletion time stamp is %v",
dbPod.Status.Phase, dbPod.ObjectMeta.DeletionTimestamp)
return nil
}
//remove upgrade-db container
if err = r.ReconcileObject(r.NooBaaPostgresDB, r.removeUpgradeContainer); err != nil {
r.Logger.Errorf("got error on postgres STS reconcile %v", err)
break
}
r.Logger.Infof("UpgradePostgresDB: upgrade-db container removed with success")
if err := r.SetEndpointsDeploymentReplicas(1); err != nil {
r.Logger.Errorf("UpgradePostgresDB::got error on endpoints deployment reconcile %v", err)
return err
}
r.Recorder.Eventf(r.NooBaa, corev1.EventTypeNormal, "DbUpgrade",
"Finished data upgrade after upgrade of data to PostgreSQL 15 format")
phase = nbv1.UpgradePhaseFinished

case nbv1.UpgradePhaseReverting:
dbPod := &corev1.Pod{}
dbPod.Name = "noobaa-db-pg-0"
dbPod.Namespace = r.NooBaaPostgresDB.Namespace
if !util.KubeCheckQuiet(dbPod) {
return nil
}
// make sure previous step has finished
if dbPod.ObjectMeta.DeletionTimestamp != nil {
r.Logger.Infof("UpgradePostgresDB: upgrade-db is not yet running, phase is: %s and deletion time stamp is %v",
dbPod.Status.Phase, dbPod.ObjectMeta.DeletionTimestamp)
return nil
}
r.Logger.Info("UpgradePostgresDB: DB pod is found")
status := r.HasUpgradeDbContainerFailed(dbPod)
if status == "PodInitializing" || status == "Running" {
r.Logger.Infof("UpgradePostgresDB: upgrade-db container didn't finished upgrading data, waiting...")
return nil
}
if status == "CrashLoopBackOff" || status == "Error" {
r.Logger.Errorf("UpgradePostgresDB: upgrade-db container failed upgrading data. set to revert the image")
hasInitContainerFailed = true
break
}
r.Logger.Infof("UpgradePostgresDB: upgrade-db container didn't fail and finished reverting data to old version [status=%s]", status)
if err = r.ReconcileObject(r.NooBaaPostgresDB, r.removeUpgradeContainer); err != nil {
r.Logger.Errorf("UpgradePostgresDB: got error on postgres STS reconcile %v", err)
return err
}
if err := r.SetEndpointsDeploymentReplicas(1); err != nil {
r.Logger.Errorf("UpgradePostgresDB::got error on endpoints deployment reconcile %v", err)
return err
}
phase = nbv1.UpgradePhaseFailed
r.Logger.Infof("UpgradePostgresDB: revert has succeeded, need to fix issue and re-attempt upgrade")
r.Recorder.Eventf(r.NooBaa, corev1.EventTypeWarning, "DbUpgradeFailure",
"Failed to update DB version to pgsql15. DB reverted back to 12.")
}

if (err != nil && util.IsPersistentError(err)) || hasInitContainerFailed {
err = r.RevertPostgresUpgrade()
if err == nil {
r.Logger.Infof("UpgradePostgresDB: restarting DB pods after updating pod's init contatiner for revert-db")
} else {
r.Logger.Errorf("UpgradePostgresDB:failed reverting the DB %v, will retry revert", err)
}
phase = nbv1.UpgradePhaseReverting
}

r.NooBaa.Status.PostgresUpdatePhase = phase
err = r.UpdateStatus()

return err
}

func (r *Reconciler) removeUpgradeContainer() error {
podSpec := &r.NooBaaPostgresDB.Spec.Template.Spec
podSpec.ServiceAccountName = "noobaa"
for i := range podSpec.InitContainers {
c := &podSpec.InitContainers[i]
if c.Name == "upgrade-db" {
podSpec.InitContainers = append(podSpec.InitContainers[:i], podSpec.InitContainers[i+1:]...)
}
}
return nil
}

// SetEndpointsDeploymentReplicas updates the number of replicas on the endpoints deployment
func (r *Reconciler) SetEndpointsDeploymentReplicas(replicas int32) error {
r.Logger.Infof("SetEndpointsDeploymentReplicas:: setting endpoints replica count to %d", replicas)
return r.ReconcileObject(r.DeploymentEndpoint, func() error {
r.DeploymentEndpoint.Spec.Replicas = &replicas
return nil
})
}

// SetDesiredCoreAppConfig initiate the config map with predifined environment variables and their values
func (r *Reconciler) SetDesiredCoreAppConfig() error {
// Reowning the ConfigMap, incase the CreateOrUpdate removed the OwnerRefernce
Expand Down

0 comments on commit f530259

Please sign in to comment.