Skip to content

Commit

Permalink
Add case test_metric_longhorn_backup
Browse files Browse the repository at this point in the history
longhorn/longhorn#9430

Signed-off-by: Roger Yao <[email protected]>
  • Loading branch information
roger-ryao committed Oct 18, 2024
1 parent 460c7b0 commit 32cfb33
Showing 1 changed file with 161 additions and 5 deletions.
166 changes: 161 additions & 5 deletions manager/integration/tests/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from kubernetes.stream import stream
from prometheus_client.parser import text_string_to_metric_families

from common import client, core_api, pod, volume_name # NOQA
from common import client, core_api, pod, volume_name, batch_v1_api # NOQA

from common import crash_engine_process_with_sigkill
from common import delete_replica_processes
Expand Down Expand Up @@ -35,6 +35,25 @@
from common import DEFAULT_DISK_PATH
from common import Gi

from backupstore import set_random_backupstore # NOQA
from common import create_recurring_jobs
from common import check_recurring_jobs
from common import wait_for_cron_job_count
from common import create_backup
from common import wait_for_backup_count
from common import delete_backup_volume

RECURRING_JOB_NAME = "recurring-test"
TASK = "task"
GROUPS = "groups"
CRON = "cron"
RETAIN = "retain"
BACKUP = "backup"
CONCURRENCY = "concurrency"
LABELS = "labels"
DEFAULT = "default"
SCHEDULE_1MIN = "* * * * *"

# The dictionaries use float type of value because the value obtained from
# prometheus_client is in float type.
# https://github.com/longhorn/longhorn-tests/pull/1531#issuecomment-1833349994
Expand Down Expand Up @@ -138,6 +157,21 @@ def examine_metric_value(found_metric, metric_labels, expected_value=None):
assert found_metric.value >= 0.0


def wait_for_metric_sum_on_all_nodes(client, core_api, metric_name, metric_labels, expected_value): # NOQA
for _ in range(RETRY_COUNTS):
time.sleep(RETRY_INTERVAL)

try:
check_metric_sum_on_all_nodes(client, core_api, metric_name,
metric_labels, expected_value)
return
except AssertionError:
continue

check_metric_sum_on_all_nodes(client, core_api, metric_name,
metric_labels, expected_value)


def check_metric_sum_on_all_nodes(client, core_api, metric_name, expected_labels, expected_value=None): # NOQA
# Initialize total_metrics to store the sum of the metric values.
total_metrics = {"labels": defaultdict(None), "value": 0.0}
Expand Down Expand Up @@ -440,12 +474,12 @@ def test_metric_longhorn_snapshot_actual_size_bytes(client, core_api, volume_nam
When 1 snapshot is created by user
And 1 snapshot is created by system
Then has a metric longhorn_snapshot_actual_size_bytes value equals to the
size of the user created snapshot,
Then has a metric longhorn_snapshot_actual_size_bytes value
equals to the size of the user created snapshot,
and volume label is the volume name
and user_created label is true
And has a metric longhorn_snapshot_actual_size_bytes value equals to the
size of the system created snapshot,
And has a metric longhorn_snapshot_actual_size_bytes value
equals to the size of the system created snapshot,
and volume label is the volume name
and user_created label is false
Expand Down Expand Up @@ -615,3 +649,125 @@ def test_node_metrics(client, core_api): # NOQA
wait_for_node_update(client, lht_hostId, "allowScheduling", False)
check_metric_with_condition(core_api, "longhorn_node_status",
metric_labels, 0.0)


def test_metric_longhorn_backup(set_random_backupstore, client, core_api, batch_v1_api, volume_name): # NOQA
"""
Scenario: test metric longhorn_backup_actual_size_bytes and
longhorn_backup_state
Issue: https://github.com/longhorn/longhorn/issues/9429
Given a volume
When a backup is created by user
Then has a metric longhorn_backup_actual_size_bytes value
equals to the size of the backup,
and volume label is the volume name
and recurring_job label is empty
And has a metric longhorn_backup_state value equals to 3 (Completed),
and volume label is the volume name
and recurring_job label is empty
When a recurring backup job is created
Then should have a metric longhorn_backup_actual_size_bytes value
equals to the size of the backup,
and volume label is the volume name
and recurring_job label is the job name
And should have a metric longhorn_backup_state value equals to 3 (Completed),
and volume label is the volume name
and recurring_job label is the job name
"""
self_hostId = get_self_host_id()

# create a volume and attach it to a node.
volume_size = 50 * Mi
client.create_volume(name=volume_name,
numberOfReplicas=1,
size=str(volume_size))
volume = wait_for_volume_detached(client, volume_name)
volume.attach(hostId=self_hostId)
volume = wait_for_volume_healthy(client, volume_name)

# create the user backup.
data_size = 10 * Mi
backup_data = {'pos': 0,
'len': data_size,
'content': generate_random_data(data_size)}
write_volume_data(volume, backup_data)
create_backup(client, volume_name)
bv = client.by_id_backupVolume(volume_name)
wait_for_backup_count(bv, 1)

# get the backup size.
backup_size = 0
backups = bv.backupList().data
for backup in backups:
if backup['snapshotName'] == "volume-head":
continue

backup_size = int(backup['size'])
assert backup_size > 0

# assert the metric values for the user backup.
user_backup_metric_labels = {
"volume": volume_name,
"recurring_job": "",
}
wait_for_metric_sum_on_all_nodes(client, core_api,
"longhorn_backup_actual_size_bytes",
user_backup_metric_labels,
backup_size)

wait_for_metric_sum_on_all_nodes(client, core_api,
"longhorn_backup_state",
user_backup_metric_labels,
3)

# delete the existing backup before creating a recurring backup job.
delete_backup_volume(client, volume_name)

# create a recurring backup job.
recurring_jobs = {
RECURRING_JOB_NAME: {
TASK: BACKUP,
GROUPS: [DEFAULT],
CRON: SCHEDULE_1MIN,
RETAIN: 1,
CONCURRENCY: 1,
LABELS: {},
},
}
create_recurring_jobs(client, recurring_jobs)
check_recurring_jobs(client, recurring_jobs)
wait_for_cron_job_count(batch_v1_api, 1)

# wait for the recurring backup job to run.
time.sleep(60)
bv = client.by_id_backupVolume(volume_name)
wait_for_backup_count(bv, 1)

# get the recurring backup size.
recurring_backup_size = 0
backups = bv.backupList().data
for backup in backups:
if backup['snapshotName'] == "volume-head":
continue

recurring_backup_size = int(backup['size'])
assert recurring_backup_size > 0

# assert the metric values for the recurring backup.
recurring_backup_metric_labels = {
"volume": volume_name,
"recurring_job": RECURRING_JOB_NAME,
}
wait_for_metric_sum_on_all_nodes(client, core_api,
"longhorn_backup_actual_size_bytes",
recurring_backup_metric_labels,
recurring_backup_size)

wait_for_metric_sum_on_all_nodes(client, core_api,
"longhorn_backup_state",
recurring_backup_metric_labels,
3)

0 comments on commit 32cfb33

Please sign in to comment.