Skip to content

Commit

Permalink
ci: fix test failures caused by unstable harvester cluster
Browse files Browse the repository at this point in the history
Signed-off-by: Yang Chiu <[email protected]>
  • Loading branch information
yangchiu committed Oct 17, 2024
1 parent 460c7b0 commit c84dd0a
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 14 deletions.
46 changes: 32 additions & 14 deletions e2e/libs/host/harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,36 +57,54 @@ def power_off_node(self, node_name):
vm_id = self.mapping[node_name]

url = f"{self.url}/{vm_id}"
resp = requests.post(f"{url}?action=stop", cookies=self.cookies, verify=False)
logging(f"resp = {resp}")
assert resp.status_code == 204, f"Failed to stop vm {vm_id} response: {resp.status_code} {resp.reason}, request: {resp.request.url} {resp.request.headers}"
for i in range(self.retry_count):
logging(f"Trying to stop vm {vm_id} ... ({i})")
try:
resp = requests.post(f"{url}?action=stop", cookies=self.cookies, verify=False)
logging(f"resp = {resp}")
assert resp.status_code == 204, f"Failed to stop vm {vm_id} response: {resp.status_code} {resp.reason}, request: {resp.request.url} {resp.request.headers}"
break
except Exception as e:
logging(f"Stopping vm failed with error {e}")
logging(f"Stopping vm {vm_id}")

stopped = False
for i in range(self.retry_count):
logging(f"Waiting for vm {vm_id} stopped ... ({i})")
resp = requests.get(url, cookies=self.cookies, verify=False)
if "Stopped" in resp.json()['metadata']['fields']:
stopped = True
break
try:
resp = requests.get(url, cookies=self.cookies, verify=False)
if "Stopped" in resp.json()['metadata']['fields']:
stopped = True
break
except Exception as e:
logging(f"Getting vm status failed with error {e}")
time.sleep(self.retry_interval)
assert stopped, f"Expected vm {vm_id} to be stopped but it's not"

def power_on_node(self, node_name):
vm_id = self.mapping[node_name]

url = f"{self.url}/{vm_id}"
resp = requests.post(f"{url}?action=start", cookies=self.cookies, verify=False)
logging(f"resp = {resp}")
assert resp.status_code == 204, f"Failed to start vm {vm_id} response: {resp.status_code} {resp.reason}, request: {resp.request.url} {resp.request.headers}"
for i in range(self.retry_count):
logging(f"Trying to start vm {vm_id} ... ({i})")
try:
resp = requests.post(f"{url}?action=start", cookies=self.cookies, verify=False)
logging(f"resp = {resp}")
assert resp.status_code == 204, f"Failed to start vm {vm_id} response: {resp.status_code} {resp.reason}, request: {resp.request.url} {resp.request.headers}"
break
except Exception as e:
logging(f"Starting vm failed with error {e}")
logging(f"Starting vm {vm_id}")

started = False
for i in range(self.retry_count):
logging(f"Waiting for vm {vm_id} started ... ({i})")
resp = requests.get(url, cookies=self.cookies, verify=False)
if "Running" in resp.json()['metadata']['fields']:
started = True
break
try:
resp = requests.get(url, cookies=self.cookies, verify=False)
if "Running" in resp.json()['metadata']['fields']:
started = True
break
except Exception as e:
logging(f"Getting vm status failed with error {e}")
time.sleep(self.retry_interval)
assert started, f"Expected vm {vm_id} to be started but it's not"
4 changes: 4 additions & 0 deletions pipelines/utilities/terraform_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ if [[ ${LONGHORN_TEST_CLOUDPROVIDER} == "aws" ]]; then
elif [[ ${LONGHORN_TEST_CLOUDPROVIDER} == "harvester" ]]; then
terraform -chdir=test_framework/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw kube_config > test_framework/kube_config.yaml
terraform -chdir=test_framework/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO} output -raw cluster_id > /tmp/cluster_id
until [ "$(KUBECONFIG=${PWD}/test_framework/kube_config.yaml kubectl get nodes -o jsonpath='{.items[*].status.conditions}' | jq '.[] | select(.type == "Ready").status' | grep -ci true)" -eq 4 ]; do
echo "waiting for harvester cluster nodes to be running"
sleep 2
done
KUBECONFIG=${PWD}/test_framework/kube_config.yaml kubectl get nodes --no-headers --selector=node-role.kubernetes.io/control-plane -owide | awk '{print $6}' > /tmp/controlplane_public_ip
KUBECONFIG=${PWD}/test_framework/kube_config.yaml kubectl get nodes --no-headers -ojson | jq '.items[].metadata.name' | tr -d '"' > /tmp/instance_mapping
jq -Rn 'reduce inputs as $line ({}; .[$line] = $line)' /tmp/instance_mapping | sponge /tmp/instance_mapping
Expand Down

0 comments on commit c84dd0a

Please sign in to comment.