From a58bee195f8fd8f093ecf8701144b19efe9a3050 Mon Sep 17 00:00:00 2001 From: Yang Chiu Date: Mon, 22 Apr 2024 15:32:49 +0800 Subject: [PATCH] ci: clean up instances without owner Signed-off-by: Yang Chiu --- cleanup/Jenkinsfile | 9 ++--- cleanup/scripts/cleanup.sh | 68 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/cleanup/Jenkinsfile b/cleanup/Jenkinsfile index 2d6d681639..3eadb00e84 100644 --- a/cleanup/Jenkinsfile +++ b/cleanup/Jenkinsfile @@ -4,10 +4,6 @@ def BUILD_TRIGGER_BY = "\n${currentBuild.getBuildCauses()[0].shortDescription}" node { - if (params.SEND_SLACK_NOTIFICATION) { - notifyBuild('STARTED', BUILD_TRIGGER_BY, params.NOTIFY_SLACK_CHANNEL) - } - checkout scm withCredentials([usernamePassword(credentialsId: 'AWS_CREDS_RANCHER_QA', passwordVariable: 'AWS_SECRET_KEY', usernameVariable: 'AWS_ACCESS_KEY')]) { @@ -25,6 +21,7 @@ node { try { stage('cleanup') { sh "docker exec ${JOB_BASE_NAME}-${BUILD_NUMBER} ${TF_VAR_tf_workspace}/scripts/cleanup.sh" + summary = sh script: "docker exec ${JOB_BASE_NAME}-${BUILD_NUMBER} cat /tmp/long-running-instances || true", returnStdout: true } } catch (e) { currentBuild.result = "FAILED" @@ -36,8 +33,8 @@ node { sh "docker rm -v ${JOB_BASE_NAME}-${BUILD_NUMBER}" sh "docker rmi ${imageName}" - if (params.SEND_SLACK_NOTIFICATION) { - notifyBuild(currentBuild.result, "", params.NOTIFY_SLACK_CHANNEL) + if (params.SEND_SLACK_NOTIFICATION || summary) { + notifyBuild(currentBuild.result, summary, params.NOTIFY_SLACK_CHANNEL) } } } diff --git a/cleanup/scripts/cleanup.sh b/cleanup/scripts/cleanup.sh index 5560696d09..9922e1e2b3 100755 --- a/cleanup/scripts/cleanup.sh +++ b/cleanup/scripts/cleanup.sh @@ -44,12 +44,20 @@ for SUFFIX in ${SUFFIX_ARR[@]}; do done echo " (2) delete volumes:" + echo " delete volumes associated with instances:" VOLUME_IDS=$(aws ec2 describe-volumes --filters Name=tag:Name,Values=*"${SUFFIX}"* | jq '.Volumes[].VolumeId' | tr -d '"') for VOLUME_ID in ${VOLUME_IDS} do aws ec2 delete-volume --volume-id "${VOLUME_ID}" echo " volume ${VOLUME_ID} deleted" done + echo " delete idle volumes:" + VOLUME_IDS=$(aws ec2 describe-volumes | jq '.Volumes[] | select(.State != "in-use") | .VolumeId' | tr -d '"') + for VOLUME_ID in ${VOLUME_IDS} + do + aws ec2 delete-volume --volume-id "${VOLUME_ID}" + echo " volume ${VOLUME_ID} deleted" + done echo " (3) delete load balancers:" LOAD_BALANCER_ARNS=$(aws elbv2 describe-load-balancers | jq ".LoadBalancers[] | select(.LoadBalancerArn | contains(\"${SUFFIX}\")).LoadBalancerArn" | tr -d '"') @@ -140,4 +148,64 @@ for SUFFIX in ${SUFFIX_ARR[@]}; do echo " key pair ${KEY_NAME} deleted" done +done + +echo "[Step 4] Prepare to delete long-running resources without owner:" +ALL_INSTANCES=$(aws ec2 describe-instances --query 'Reservations[].Instances[?!not_null(Tags[?Key == `Owner`].Value)] | []' | jq '.[] | select(.State.Name != "terminated") | {LaunchTime: .LaunchTime, InstanceId: .InstanceId, Tags: .Tags}' | jq -c) +INSTANCE_IDS=() +for INSTANCE in ${ALL_INSTANCES[@]}; do + INSTANCE_ID=$(echo "${INSTANCE}" | jq '.InstanceId' | tr -d '"') + echo " * Instance ${INSTANCE_ID} ==>" + LAUNCH_TIME=$(echo "${INSTANCE}" | jq '.LaunchTime' | tr -d '"') + TIMESTAMP=$(date -D "%Y-%m-%dT%H:%M:%S+00:00" -d "${LAUNCH_TIME}" +%s) + TIME_DIFF=$((CURRENT_TIMESTAMP-TIMESTAMP)) + echo " Launch Time: ${LAUNCH_TIME} (${TIMESTAMP}), Diff: ${TIME_DIFF}" + if [[ $TIME_DIFF -gt $THRESHOLD_IN_SEC ]]; then INSTANCE_IDS+=("$INSTANCE_ID"); fi +done +aws ec2 terminate-instances --instance-ids "${INSTANCE_IDS[@]}" +echo " instances ${INSTANCE_IDS[*]} shutting-down" +while [[ -n $(aws ec2 describe-instances --instance-ids "${INSTANCE_IDS[@]}" | jq '.Reservations[].Instances[].State.Name' | grep -v "terminated") ]]; do + echo "Wait for instances terminated ..." + sleep 5s +done + +echo "[Step 5] List long-running resources with owner:" +THRESHOLD_IN_SEC=$((86400 * 3)) +LONG_RUNNING_INSTANCES="/tmp/long-running-instances" +ALL_INSTANCES=$(aws ec2 describe-instances --query 'Reservations[].Instances[?not_null(Tags[?Key == `Owner`].Value)] | []' | jq '.[] | select(.State.Name != "terminated") | {LaunchTime: .LaunchTime, InstanceId: .InstanceId, Tags: .Tags}' | jq -c) +for INSTANCE in ${ALL_INSTANCES[@]}; do + INSTANCE_ID=$(echo "${INSTANCE}" | jq '.InstanceId' | tr -d '"') + echo " * Instance ${INSTANCE_ID} ==>" + LAUNCH_TIME=$(echo "${INSTANCE}" | jq '.LaunchTime' | tr -d '"') + TIMESTAMP=$(date -D "%Y-%m-%dT%H:%M:%S+00:00" -d "${LAUNCH_TIME}" +%s) + TIME_DIFF=$((CURRENT_TIMESTAMP-TIMESTAMP)) + echo " Launch Time: ${LAUNCH_TIME} (${TIMESTAMP}), Diff: ${TIME_DIFF}" + if [[ $TIME_DIFF -gt $THRESHOLD_IN_SEC ]]; then + NAME=$(echo "${INSTANCE}" | jq '.Tags | map(select(.Key=="Name"))[] | .Value' | tr -d '"') + OWNER=$(echo "${INSTANCE}" | jq '.Tags | map(select(.Key=="Owner"))[] | .Value' | tr -d '"') + echo -e "${NAME} (${INSTANCE_ID}) owned by ${OWNER}\n" >> "${LONG_RUNNING_INSTANCES}" + fi +done +if [[ -e "${LONG_RUNNING_INSTANCES}" ]]; then + echo -e "\nEC2 instances running for more than 3 days:\n$(cat ${LONG_RUNNING_INSTANCES})" > "${LONG_RUNNING_INSTANCES}" +fi + +echo "[Step 6] Prepare to delete long-running resources with owner:" +THRESHOLD_IN_SEC=$((86400 * 7)) +ALL_INSTANCES=$(aws ec2 describe-instances --query 'Reservations[].Instances[?not_null(Tags[?Key == `Owner`].Value)] | []' | jq '.[] | select(.State.Name != "terminated") | {LaunchTime: .LaunchTime, InstanceId: .InstanceId, Tags: .Tags}' | jq -c) +INSTANCE_IDS=() +for INSTANCE in ${ALL_INSTANCES[@]}; do + INSTANCE_ID=$(echo "${INSTANCE}" | jq '.InstanceId' | tr -d '"') + echo " * Instance ${INSTANCE_ID} ==>" + LAUNCH_TIME=$(echo "${INSTANCE}" | jq '.LaunchTime' | tr -d '"') + TIMESTAMP=$(date -D "%Y-%m-%dT%H:%M:%S+00:00" -d "${LAUNCH_TIME}" +%s) + TIME_DIFF=$((CURRENT_TIMESTAMP-TIMESTAMP)) + echo " Launch Time: ${LAUNCH_TIME} (${TIMESTAMP}), Diff: ${TIME_DIFF}" + if [[ $TIME_DIFF -gt $THRESHOLD_IN_SEC ]]; then INSTANCE_IDS+=("$INSTANCE_ID"); fi +done +aws ec2 terminate-instances --instance-ids "${INSTANCE_IDS[@]}" +echo " instance ${INSTANCE_IDS[*]} shutting-down" +while [[ -n $(aws ec2 describe-instances --instance-ids "${INSTANCE_IDS[@]}" | jq '.Reservations[].Instances[].State.Name' | grep -v "terminated") ]]; do + echo "Wait for instances terminated ..." + sleep 5s done \ No newline at end of file