Skip to content

Commit

Permalink
Merge branch 'master' into multiagent-secret-bug
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxMcAdam authored Nov 14, 2023
2 parents 4e821eb + 3276509 commit 9575725
Show file tree
Hide file tree
Showing 6 changed files with 347 additions and 140 deletions.
245 changes: 181 additions & 64 deletions agent-install/agent-install.sh

Large diffs are not rendered by default.

33 changes: 22 additions & 11 deletions agent-install/agent-uninstall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ DEPLOYMENT_NAME="agent"
SERVICE_ACCOUNT_NAME="agent-service-account"
CLUSTER_ROLE_BINDING_NAME="openhorizon-agent-cluster-rule"
SECRET_NAME="openhorizon-agent-secrets"
IMAGE_PULL_SECRET_NAME="registry-creds"
IMAGE_REGISTRY_SECRET_NAME="openhorizon-agent-secrets-docker-cert"
CONFIGMAP_NAME="openhorizon-agent-config"
PVC_NAME="openhorizon-agent-pvc"
Expand Down Expand Up @@ -197,7 +198,7 @@ function validate_positive_int() {

function get_agent_pod_id() {
log_debug "get_agent_pod_id() begin"
if [[ $($KUBECTL get pods -n ${AGENT_NAMESPACE} -l app=agent -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}') != "True" ]]; then
if [[ $($KUBECTL get pods -n ${AGENT_NAMESPACE} -l app=agent,type!=auto-upgrade-cronjob -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}') != "True" ]]; then
AGENT_POD_READY="false"
else
AGENT_POD_READY="true"
Expand Down Expand Up @@ -334,23 +335,23 @@ function deleteAgentResources() {
# give pods sometime to terminate by themselves
sleep 10

log_info "Checking if pods are deleted"
PODS=$($KUBECTL get pod -n $AGENT_NAMESPACE 2>/dev/null)
log_info "Checking if agent pods are deleted"
PODS=$($KUBECTL get pod -l app=agent -n $AGENT_NAMESPACE 2>/dev/null)
if [[ -n "$PODS" ]]; then
log_info "Pods are not deleted by deleting deployment, delete pods now"
log_info "Agent pods are not deleted by deleting deployment, delete pods now"
if [ "$USE_DELETE_FORCE" != true ]; then
$KUBECTL delete --all pods --namespace=$AGENT_NAMESPACE --grace-period=$DELETE_TIMEOUT
$KUBECTL delete pods -l app=agent --namespace=$AGENT_NAMESPACE --grace-period=$DELETE_TIMEOUT

PODS=$($KUBECTL get pod -n $AGENT_NAMESPACE 2>/dev/null)
PODS=$($KUBECTL get pod -l app=agent -n $AGENT_NAMESPACE 2>/dev/null)
if [[ -n "$PODS" ]]; then
log_info "Pods still exist"
log_info "Agent pods still exist"
PODS_STILL_EXIST="true"
fi
fi

if [ "$USE_DELETE_FORCE" == true ] || [ "$PODS_STILL_EXIST" == true ]; then
log_info "Force deleting all the pods under $AGENT_NAMESPACE"
$KUBECTL delete --all pods --namespace=$AGENT_NAMESPACE --force=true --grace-period=0
log_info "Force deleting agent pods under $AGENT_NAMESPACE"
$KUBECTL delete pods -l app=agent --namespace=$AGENT_NAMESPACE --force=true --grace-period=0
pkill -f anax.service
fi
fi
Expand All @@ -362,6 +363,7 @@ function deleteAgentResources() {
log_info "Deleting secret..."
$KUBECTL delete secret $SECRET_NAME -n $AGENT_NAMESPACE
$KUBECTL delete secret $IMAGE_REGISTRY_SECRET_NAME -n $AGENT_NAMESPACE
$KUBECTL delete secret $IMAGE_PULL_SECRET_NAME -n $AGENT_NAMESPACE
$KUBECTL delete secret ${SECRET_NAME}-backup -n $AGENT_NAMESPACE
set -e

Expand All @@ -381,8 +383,15 @@ function deleteAgentResources() {
log_info "Deleting serviceaccount..."
$KUBECTL delete serviceaccount $SERVICE_ACCOUNT_NAME -n $AGENT_NAMESPACE

log_info "Deleting namespace..."
$KUBECTL delete namespace $AGENT_NAMESPACE --force=true --grace-period=0
log_info "Checking deployment and statefulset under namespace $AGENT_NAMESPACE"
deployment=$($KUBECTL get deployment -n $AGENT_NAMESPACE)
statefulset=$($KUBECTL get statefulset -n $AGENT_NAMESPACE)
if [[ -z "$deployment" ]] && [[ -z "$statefulset" ]]; then
log_info "No deployment and statefulset left under namespace $AGENT_NAMESPACE, deleting it..."
$KUBECTL delete namespace $AGENT_NAMESPACE --force=true --grace-period=0
else
log_info "Deployment or statefulset exists in the namespace $AGENT_NAMESPACE, skip deleting namespace $AGENT_NAMESPACE. Please delete namespace manually"
fi

log_info "Deleting cert file from /etc/default/cert ..."
rm /etc/default/cert/agent-install.crt
Expand All @@ -400,6 +409,8 @@ function uninstall_cluster() {

if [[ "$AGENT_POD_READY" == "true" ]]; then
removeNodeFromLocalAndManagementHub
else
log_info "agent pod under $AGENT_NAMESPACE is not ready, skip unregister process. Please remove node from management hub later if needed"
fi

deleteAgentResources
Expand Down
15 changes: 14 additions & 1 deletion agent-install/k8s/auto-upgrade-cronjob-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ apiVersion: __KubernetesApi__
kind: CronJob
metadata:
name: auto-upgrade-cronjob
labels:
app: agent
openhorizon.org/component: agent
type: auto-upgrade-cronjob
spec:
schedule: '*/1 * * * *'
concurrencyPolicy: Forbid
Expand All @@ -11,11 +15,20 @@ spec:
spec:
backoffLimit: 0
template:
metadata:
labels:
app: agent
openhorizon.org/component: agent
type: auto-upgrade-cronjob
spec:
volumes:
- name: agent-pvc-storage
persistentVolumeClaim:
claimName: openhorizon-agent-pvc
# START_REMOTE_ICR
imagePullSecrets:
- name: registry-creds
# END_REMOTE_ICR
containers:
- name: agent-auto-upgrade
securityContext:
Expand All @@ -33,7 +46,7 @@ spec:
- '-c'
- >-
/usr/local/bin/auto-upgrade-cronjob.sh
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /var/horizon
name: agent-pvc-storage
Expand Down
14 changes: 12 additions & 2 deletions agent-install/k8s/deployment-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,20 @@ kind: Deployment
metadata:
name: agent
namespace: __AgentNameSpace__
labels:
app: agent
openhorizon.org/component: agent
spec:
replicas: 1
selector:
matchLabels:
app: agent
openhorizon.org/component: agent
template:
metadata:
labels:
app: agent
openhorizon.org/component: agent
spec:
serviceAccountName: agent-service-account
volumes:
Expand All @@ -29,7 +34,8 @@ spec:
# START_NOT_FOR_OCP
initContainers:
- name: initcontainer
image: alpine:latest
image: __InitContainerImagePath__
imagePullPolicy: IfNotPresent
securityContext:
runAsNonRoot: false
command:
Expand All @@ -44,10 +50,14 @@ spec:
name: agent-pvc-storage
subPath: horizon
# END_NOT_FOR_OCP
# START_REMOTE_ICR
imagePullSecrets:
- name: registry-creds
# END_REMOTE_ICR
containers:
- name: anax
image: __ImagePath__
imagePullPolicy: Always
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /etc/default/horizon
subPath: horizon
Expand Down
18 changes: 14 additions & 4 deletions clusterupgrade/cluster_upgrade_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ const (
)

const (
DEFAULT_CERT_PATH = "/etc/default/cert/"
DEFAULT_CERT_PATH = "/etc/default/cert/"
DEFAULT_IMAGE_REGISTRY_IN_DEPLOYMENT = "__ImageRegistryHost__"
)

const (
Expand Down Expand Up @@ -702,8 +703,9 @@ func checkAgentImage(kubeClient *KubeClient, workDir string) (bool, string, stri
}
glog.Infof(cuwlog(fmt.Sprintf("Get image %v from tar file, extracted image tag: %v", fullImageTag, imageTag)))

if currentAgentVersion != imageTag {
// push image to image registry
if currentAgentVersion != imageTag && !agentUseRemoteRegistry() {
// push image to image registry if use edge cluster local registry
// If AGENT_CLUSTER_IMAGE_REGISTRY_HOST env is not set, it means agent is using remote image registry, and no need to push image
imageRegistry := os.Getenv("AGENT_CLUSTER_IMAGE_REGISTRY_HOST")
if imageRegistry == "" {
return false, "", "", fmt.Errorf("failed to get edge cluster image registry host from environment veriable: %v", imageRegistry)
Expand Down Expand Up @@ -751,7 +753,6 @@ func checkAgentImage(kubeClient *KubeClient, workDir string) (bool, string, stri
}
glog.Infof(cuwlog(fmt.Sprintf("Successfully pushed image %v", newImageRepoWithTag)))
}

return (currentAgentVersion == imageTag), imageTag, currentAgentVersion, nil
}

Expand All @@ -767,3 +768,12 @@ func checkAgentImageAgainstStatusFile(workDir string) (bool, error) {
return true, nil
}
}

func agentUseRemoteRegistry() bool {
useRemoteRegistry := false
imageRegistry := os.Getenv("AGENT_CLUSTER_IMAGE_REGISTRY_HOST")
if imageRegistry == DEFAULT_IMAGE_REGISTRY_IN_DEPLOYMENT {
useRemoteRegistry = true
}
return useRemoteRegistry
}
Loading

0 comments on commit 9575725

Please sign in to comment.