From 98019dde4cf4842270b6a293421128132a0e3730 Mon Sep 17 00:00:00 2001 From: Jian Wang Date: Mon, 5 Aug 2024 12:49:40 +0200 Subject: [PATCH] fix(manager): add detailed message when deleting pdb is not allowed Signed-off-by: Jian Wang --- controller/instance_manager_controller.go | 34 ++++++++++++----------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/controller/instance_manager_controller.go b/controller/instance_manager_controller.go index c68a3983f6..9b0f57aa33 100644 --- a/controller/instance_manager_controller.go +++ b/controller/instance_manager_controller.go @@ -814,12 +814,13 @@ func (imc *InstanceManagerController) syncInstanceManagerPDB(im *longhorn.Instan return nil } - canDeletePDB, err := imc.canDeleteInstanceManagerPDB(im) + canDeletePDB, msg, err := imc.canDeleteInstanceManagerPDB(im) if err != nil { return err } if !canDeletePDB { + imc.logger.Infof("Node %v is marked unschedulable but removing %v PDB is blocked: %s ", im.Name, imc.controllerID, msg) return nil } @@ -842,12 +843,13 @@ func (imc *InstanceManagerController) syncInstanceManagerPDB(im *longhorn.Instan } if clusterAutoscalerEnabled { - canDeletePDB, err := imc.canDeleteInstanceManagerPDB(im) + canDeletePDB, msg, err := imc.canDeleteInstanceManagerPDB(im) if err != nil { return err } if !canDeletePDB { + imc.logger.Infof("Node %v is marked unschedulable but removing %v PDB get message: %s. Autoscaler skips it", im.Name, imc.controllerID, msg) if imPDB == nil { return imc.createInstanceManagerPDB(im) } @@ -920,20 +922,20 @@ func (imc *InstanceManagerController) deleteInstanceManagerPDB(im *longhorn.Inst return nil } -func (imc *InstanceManagerController) canDeleteInstanceManagerPDB(im *longhorn.InstanceManager) (bool, error) { +func (imc *InstanceManagerController) canDeleteInstanceManagerPDB(im *longhorn.InstanceManager) (bool, string, error) { // If there is no engine instance process inside the engine instance manager, // it means that all volumes are detached. // We can delete the PodDisruptionBudget for the engine instance manager. if im.Spec.Type == longhorn.InstanceManagerTypeEngine { if len(im.Status.InstanceEngines)+len(im.Status.Instances) == 0 { // nolint: staticcheck - return true, nil + return true, "", nil } - return false, nil + return false, "some instances are still running", nil } // Make sure that the instance manager is of type replica if im.Spec.Type != longhorn.InstanceManagerTypeReplica && im.Spec.Type != longhorn.InstanceManagerTypeAllInOne { - return false, fmt.Errorf("the instance manager %v has invalid type: %v ", im.Name, im.Spec.Type) + return false, "", fmt.Errorf("the instance manager %v has invalid type: %v ", im.Name, im.Spec.Type) } // Must wait for all volumes detached from the current node first. @@ -941,31 +943,31 @@ func (imc *InstanceManagerController) canDeleteInstanceManagerPDB(im *longhorn.I // on the current node is deleted allVolumeDetached, err := imc.areAllVolumesDetachedFromNode(im.Spec.NodeID) if err != nil { - return false, err + return false, "", err } if !allVolumeDetached { - return false, nil + return false, "some volumes are still attached", nil } nodeDrainingPolicy, err := imc.ds.GetSettingValueExisted(types.SettingNameNodeDrainPolicy) if err != nil { - return false, err + return false, "", err } if nodeDrainingPolicy == string(types.NodeDrainPolicyAlwaysAllow) { - return true, nil + return true, "", nil } replicasOnCurrentNode, err := imc.ds.ListReplicasByNodeRO(im.Spec.NodeID) if err != nil { if datastore.ErrorIsNotFound(err) { - return true, nil + return true, "", nil } - return false, err + return false, "", err } if nodeDrainingPolicy == string(types.NodeDrainPolicyBlockForEviction) && len(replicasOnCurrentNode) > 0 { // We must wait for ALL replicas to be evicted before removing the PDB. - return false, nil + return false, "some replicas block eviction", nil } targetReplicas := []*longhorn.Replica{} @@ -987,7 +989,7 @@ func (imc *InstanceManagerController) canDeleteInstanceManagerPDB(im *longhorn.I pdbProtectedHealthyReplicas, err := imc.ds.ListVolumePDBProtectedHealthyReplicasRO(replica.Spec.VolumeName) if err != nil { - return false, err + return false, "", err } for _, pdbProtectedHealthyReplica := range pdbProtectedHealthyReplicas { if pdbProtectedHealthyReplica.Spec.NodeID != im.Spec.NodeID { @@ -1005,11 +1007,11 @@ func (imc *InstanceManagerController) canDeleteInstanceManagerPDB(im *longhorn.I replica.Spec.NodeID == im.Spec.NodeID if !hasPDBOnAnotherNode && !isUnusedReplicaOnCurrentNode { - return false, nil + return false, "no pdb on another node, wait", nil } } - return true, nil + return true, "", nil } func (imc *InstanceManagerController) areAllVolumesDetachedFromNode(nodeName string) (bool, error) {