From bb56e9eedd170e713892e904670d3c2fc1383d50 Mon Sep 17 00:00:00 2001 From: Panos Paparrigopoulos Date: Tue, 16 Jan 2024 10:57:13 +0100 Subject: [PATCH] CMS: combines the three space probes and adds minfreespace calculation --- cms/check_free_space | 70 ------------------------- cms/check_report_free_space | 40 --------------- cms/check_report_used_space | 59 --------------------- cms/check_used_space | 100 ++++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+), 169 deletions(-) delete mode 100644 cms/check_free_space delete mode 100644 cms/check_report_free_space delete mode 100644 cms/check_report_used_space create mode 100644 cms/check_used_space diff --git a/cms/check_free_space b/cms/check_free_space deleted file mode 100644 index 6168c887..00000000 --- a/cms/check_free_space +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python -# Copyright 2012-2020 CERN -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Authors: -# - Vincent Garonne, , 2015 -# - Donata Mielaikaite, , 2020 -# - Fernando Garzon, , 2021 - -''' -Probe to check the free space at each rse. -''' - -import sys - -import traceback - -from prometheus_client import CollectorRegistry, Gauge, push_to_gateway -from rucio.api.rse import list_rses, get_rse_usage -from rucio.common.config import config_get - -PROM_SERVERS = config_get('monitor', 'prometheus_servers', raise_exception=False, default='') -if PROM_SERVERS != '': - PROM_SERVERS = PROM_SERVERS.split(',') - -# Exit statuses -OK, WARNING, CRITICAL, UNKNOWN = 0, 1, 2, 3 - - -if __name__ == '__main__': - try: - registry = CollectorRegistry() - free_space_gauge = Gauge('judge_free_space', - '', labelnames=('rse',), registry=registry) - for rse in list_rses(): - limits = get_rse_usage(rse['rse'], issuer='transfer_ops') - rucio_used = None - static_used = None - for usage in limits: - if usage['source'] == 'rucio': - rucio_used = usage['used'] - if usage['source'] == 'static': - static_used = usage['used'] - if (rucio_used is None) or (static_used is None): - free_space = None - else: - free_space = int(static_used) - int(rucio_used) - print(rse['rse'], free_space) - free_space_gauge.labels(**{'rse': rse['rse']}).set(free_space) - if len(PROM_SERVERS): - for server in PROM_SERVERS: - try: - push_to_gateway(server.strip(), job='check_free_space', registry=registry) - except: - continue - except: - print (traceback.format_exc()) - sys.exit(UNKNOWN) - sys.exit(OK) diff --git a/cms/check_report_free_space b/cms/check_report_free_space deleted file mode 100644 index caf97f7e..00000000 --- a/cms/check_report_free_space +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 -# Copyright European Organization for Nuclear Research (CERN) 2013 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# You may not use this file except in compliance with the License. -# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 -# -# Authors: -# - Donata Mielaikaite, , 2020 -# - Eric Vaandering, , 2021 - -""" -Probe to check the free space at each rse. -""" - -import sys -import traceback - -from rucio.core.rse import list_rses, get_rse_usage - -from utils import common - -probe_metrics = common.probe_metrics - -# Exit statuses -OK, WARNING, CRITICAL, UNKNOWN = 0, 1, 2, 3 - -if __name__ == "__main__": - try: - for rse in list_rses(): - limits = get_rse_usage(rse['id']) - for usage in limits: - if usage['source'] == 'rucio': - free_space = int(usage['total']) - int(usage['used']) - probe_metrics.gauge('judge.free_space.{rse}').labels(rse=rse['rse']).set(free_space) - - except: - print(traceback.format_exc()) - sys.exit(UNKNOWN) - sys.exit(OK) diff --git a/cms/check_report_used_space b/cms/check_report_used_space deleted file mode 100644 index cda9d3b4..00000000 --- a/cms/check_report_used_space +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2012-2020 CERN -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Authors: -# - Donata Mielaikaite, , 2020 -# - Fernando Garzon, , 2020 -# - Eric Vaandering, , 2021 - -""" -Probe to check used space. -""" - -import sys -import traceback - -from rucio.core.rse import list_rses, get_rse_usage, list_rse_attributes -from rucio.db.sqla import models -from rucio.db.sqla.session import get_session - -from utils import common - -PrometheusPusher = common.PrometheusPusher - -# Exit statuses -OK, WARNING, CRITICAL, UNKNOWN = 0, 1, 2, 3 - -if __name__ == '__main__': - try: - session = get_session() - with PrometheusPusher() as manager: - for rse in list_rses(): - sources = get_rse_usage(rse['id']) - attributes = list_rse_attributes(rse['id']) - country = attributes.get('country', 'UNKNOWN') - rse_type = session.query(models.RSE.rse_type).filter(models.RSE.id == rse['id']).scalar() - rse_type = str(rse_type).split('.', 1)[1] - for usage in sources: - source = usage['source'] - prom_labels = {'rse': rse['rse'], 'country': country, 'rse_type': rse_type, 'source': source} - (manager.gauge(name='report_used_space.{rse}.{country}.{rse_type}.{source}', - documentation='Space used at an RSE from various sources') - .labels(rse=rse['rse'], country=country, rse_type=rse_type, source=source) - .set(usage['used'])) - print(rse['rse'], country, rse_type, source, usage['used']) - except: - print(traceback.format_exc()) - sys.exit(UNKNOWN) diff --git a/cms/check_used_space b/cms/check_used_space new file mode 100644 index 00000000..87fd0779 --- /dev/null +++ b/cms/check_used_space @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# Copyright 2012-2020 CERN +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Authors: +# - Donata Mielaikaite, , 2020 +# - Fernando Garzon, , 2020 +# - Eric Vaandering, , 2021 +# - Panos Paparrigopoulos, , 2024 + +""" +Probe to check used space. +""" + +import sys +import traceback + +from rucio.core.rse import list_rses, get_rse_usage, list_rse_attributes, get_rse_limits +from rucio.db.sqla import models +from rucio.db.sqla.session import get_session + +from utils import common + +PrometheusPusher = common.PrometheusPusher + +# Exit statuses +OK, WARNING, CRITICAL, UNKNOWN = 0, 1, 2, 3 + +if __name__ == '__main__': + try: + session = get_session() + with PrometheusPusher() as manager: + for rse in list_rses(): + sources = get_rse_usage(rse['id']) + attributes = list_rse_attributes(rse['id']) + country = attributes.get('country', 'UNKNOWN') + rse_type = session.query(models.RSE.rse_type).filter(models.RSE.id == rse['id']).scalar() + rse_type = str(rse_type).split('.', 1)[1] + limits = get_rse_limits(rse['id']) + rucio_used = None + static_used = None + free_space = None + prom_labels = {'rse': rse['rse'], 'country': country, 'rse_type': rse_type, 'source': ''} + label_names = ['rse', 'country', 'rse_type', 'source'] + for usage in sources: + + # Calculate free rucio space of RSE and push it + if usage['source'] == 'rucio': + prom_labels['source'] = 'rucio_free_space' + rucio_used = usage['used'] + rucio_free_space = int(usage['total']) - int(usage['used']) + (manager.gauge(name='rucio_free_space', + documentation='Space used at an RSE from various sources', labelnames=label_names) + .labels(**prom_labels) + .set(rucio_free_space)) + print(rse['rse'], country, rse_type, 'rucio_free_space', rucio_free_space) + + # Calculate total free space of RSE (static-rucio) and push it + if usage['source'] == 'static': + static_used = usage['used'] + + if rucio_used and static_used: + prom_labels['source'] = 'free_space' + free_space = int(static_used) - int(rucio_used) + (manager.gauge(name='free_space', + documentation='Space used at an RSE from various sources', labelnames=label_names) + .labels(**prom_labels) + .set(free_space)) + print(rse['rse'], country, rse_type, 'free_space', free_space) + + source = usage['source'] + prom_labels['source'] = source + (manager.gauge(name='{source}', + documentation='Space used at an RSE from various sources', labelnames=label_names) + .labels(**prom_labels) + .set(usage['used'])) + print(rse['rse'], country, rse_type, source, usage['used']) + + # export and push `MinFreeSpace` value from RSE limits + if limits.get('MinFreeSpace'): + prom_labels['source'] = 'min_free_space' + (manager.gauge(name='min_free_space', + documentation='Space used at an RSE from various sources', labelnames=label_names) + .labels(**prom_labels) + .set(limits.get('MinFreeSpace'))) + print(rse['rse'], country, rse_type, 'min_free_space', limits.get('MinFreeSpace')) + except: + print(traceback.format_exc()) + sys.exit(UNKNOWN) \ No newline at end of file