From 8c95d3db661832415dc16bba162bb153b7da2e21 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 25 Oct 2024 17:52:33 +0200 Subject: [PATCH 1/9] added stats calc tool --- pyproject.toml | 1 + src/ctapipe/tools/stats_calculation.py | 171 +++++++++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 src/ctapipe/tools/stats_calculation.py diff --git a/pyproject.toml b/pyproject.toml index 6c81e30568a..b8202e3d2e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,7 @@ ctapipe-process = "ctapipe.tools.process:main" ctapipe-merge = "ctapipe.tools.merge:main" ctapipe-fileinfo = "ctapipe.tools.fileinfo:main" ctapipe-quickstart = "ctapipe.tools.quickstart:main" +ctapipe-stats-calculation = "ctapipe.tools.stats_calculation:main" ctapipe-train-energy-regressor = "ctapipe.tools.train_energy_regressor:main" ctapipe-train-particle-classifier = "ctapipe.tools.train_particle_classifier:main" ctapipe-train-disp-reconstructor = "ctapipe.tools.train_disp_reconstructor:main" diff --git a/src/ctapipe/tools/stats_calculation.py b/src/ctapipe/tools/stats_calculation.py new file mode 100644 index 00000000000..080d1b74c45 --- /dev/null +++ b/src/ctapipe/tools/stats_calculation.py @@ -0,0 +1,171 @@ +""" +Perform statistics calculation from DL1 data +""" + +import pathlib + +import numpy as np +from astropy.table import vstack + +from ctapipe.core import Tool +from ctapipe.core.tool import ToolConfigurationError +from ctapipe.core.traits import ( + Bool, + CaselessStrEnum, + Path, + Unicode, + classes_with_traits, +) +from ctapipe.instrument import SubarrayDescription +from ctapipe.io import write_table +from ctapipe.io.tableloader import TableLoader +from ctapipe.monitoring.calculator import PixelStatisticsCalculator + + +class StatisticsCalculatorTool(Tool): + """ + Perform statistics calculation for DL1 data + """ + + name = "StatisticsCalculatorTool" + description = "Perform statistics calculation for DL1 data" + + examples = """ + To calculate statistics of DL1 data files: + + > ctapipe-stats-calculation --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite + + """ + + input_url = Path( + help="Input CTA HDF5 files for DL1 data", + allow_none=True, + exists=True, + directory_ok=False, + file_ok=True, + ).tag(config=True) + + dl1_column_name = CaselessStrEnum( + ["image", "peak_time", "variance"], + default_value="image", + allow_none=False, + help="Column name of the DL1 data to calculate statistics", + ).tag(config=True) + + output_column_name = Unicode( + default_value="statistics", + allow_none=False, + help="Column name of the output statistics", + ).tag(config=True) + + output_path = Path( + help="Output filename", default_value=pathlib.Path("monitoring.h5") + ).tag(config=True) + + overwrite = Bool(help="Overwrite output file if it exists").tag(config=True) + + aliases = { + ("i", "input_url"): "StatisticsCalculatorTool.input_url", + ("o", "output_path"): "StatisticsCalculatorTool.output_path", + } + + flags = { + "overwrite": ( + {"StatisticsCalculatorTool": {"overwrite": True}}, + "Overwrite existing files", + ), + } + + classes = classes_with_traits(PixelStatisticsCalculator) + + def setup(self): + # Check that the input and output files are not the same + if self.input_url == self.output_path: + raise ToolConfigurationError( + "Input and output files are same. Fix your configuration / cli arguments." + ) + + # Load the subarray description from the input file + subarray = SubarrayDescription.from_hdf(self.input_url) + if subarray.n_tels != 1: + raise IOError( + "This tool is only intended for single telescope data." + "Please provide a file with only one telescope." + ) + self.tel_id = subarray.tel_ids[0] + # Set the table name for the output file + self.table_name = ( + f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{self.tel_id:03d}" + ) + # Initialization of the statistics calculator + self.stats_calculator = PixelStatisticsCalculator( + parent=self, subarray=subarray + ) + # Read the whole dl1 images with the 'TableLoader' + input_data = TableLoader(input_url=self.input_url) + self.dl1_table = input_data.read_telescope_events_by_id( + dl1_images=True, + dl1_parameters=False, + dl1_muons=False, + dl2=False, + simulated=False, + true_images=False, + true_parameters=False, + instrument=False, + pointing=False, + )[self.tel_id] + + def start(self): + # Perform the first pass of the statistics calculation + aggregated_stats = self.stats_calculator.first_pass( + table=self.dl1_table, + tel_id=self.tel_id, + col_name=self.dl1_column_name, + ) + # Check if 'chunk_shift' is selected + if self.stats_calculator.chunk_shift is not None: + # Check if there are any faulty chunks to perform a second pass over the data + if np.any(~aggregated_stats["is_valid"].data): + # Perform the second pass of the statistics calculation + aggregated_stats_secondpass = self.stats_calculator.second_pass( + table=self.dl1_table, + valid_chunks=aggregated_stats["is_valid"].data, + tel_id=self.tel_id, + col_name=self.dl1_column_name, + ) + # Stack the statistic values from the first and second pass + aggregated_stats = vstack( + [aggregated_stats, aggregated_stats_secondpass] + ) + # Sort the stacked aggregated statistic values by starting time + aggregated_stats.sort(["time_start"]) + else: + self.log.info( + "No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", + self.tel_id, + ) + # Write the aggregated statistics and their outlier mask to the output file + write_table( + aggregated_stats, + self.output_path, + self.table_name, + overwrite=self.overwrite, + ) + + def finish(self): + self.log.info( + "DL1 monitoring data was stored in '%s' under '%s'", + self.output_path, + self.table_name, + ) + self.log.info("Tool is shutting down") + + +def main(): + # Run the tool + tool = StatisticsCalculatorTool() + tool.run() + + +if __name__ == "main": + main() From 69ae37fcdda6660f2dc6149a5475fbab723ad92a Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 25 Oct 2024 18:02:19 +0200 Subject: [PATCH 2/9] added example config for stats calc --- src/ctapipe/resources/stats_calc_config.yaml | 36 ++++++++++++++++++++ src/ctapipe/tools/quickstart.py | 1 + 2 files changed, 37 insertions(+) create mode 100644 src/ctapipe/resources/stats_calc_config.yaml diff --git a/src/ctapipe/resources/stats_calc_config.yaml b/src/ctapipe/resources/stats_calc_config.yaml new file mode 100644 index 00000000000..59cc601e468 --- /dev/null +++ b/src/ctapipe/resources/stats_calc_config.yaml @@ -0,0 +1,36 @@ +StatisticsCalculatorTool: + dl1_column_name: "image" + output_column_name: "statistics" + +PixelStatisticsCalculator: + stats_aggregator_type: [["id", 1, "SigmaClippingAggregator"]] + chunk_shift: 1000 + faulty_pixels_fraction: 0.1 + outlier_detector_list: [ + { + "apply_to": "median", + "name": "MedianOutlierDetector", + "config": { + "median_range_factors": [-15, 15], + }, + }, + { + "apply_to": "median", + "name": "RangeOutlierDetector", + "config": { + "validity_range": [-20, 120], + } + } + { + "apply_to": "std", + "name": "StdOutlierDetector", + "config": { + "std_range_factors": [-15, 15], + }, + } + ] + +SigmaClippingAggregator: + chunk_size: 2500 + max_sigma: 4 + iterations: 5 diff --git a/src/ctapipe/tools/quickstart.py b/src/ctapipe/tools/quickstart.py index 67410458d95..9cab9c97d99 100644 --- a/src/ctapipe/tools/quickstart.py +++ b/src/ctapipe/tools/quickstart.py @@ -15,6 +15,7 @@ "stage1_config.yaml", "stage2_config.yaml", "ml_preprocessing_config.yaml", + "stats_calc_config.yaml", "train_energy_regressor.yaml", "train_particle_classifier.yaml", "train_disp_reconstructor.yaml", From 6b8feff47839b5b5ec7ab5aa896a9cc9ccda79f5 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 09:55:45 +0100 Subject: [PATCH 3/9] allow to process multiple tels Since we should also support the processing of MCs, we might want to run the stats calc tool over multiple tels. --- src/ctapipe/resources/stats_calc_config.yaml | 7 +- src/ctapipe/tools/stats_calculation.py | 120 ++++++++++--------- 2 files changed, 68 insertions(+), 59 deletions(-) diff --git a/src/ctapipe/resources/stats_calc_config.yaml b/src/ctapipe/resources/stats_calc_config.yaml index 59cc601e468..965bd6dae68 100644 --- a/src/ctapipe/resources/stats_calc_config.yaml +++ b/src/ctapipe/resources/stats_calc_config.yaml @@ -1,9 +1,10 @@ StatisticsCalculatorTool: - dl1_column_name: "image" + allowed_tels: [1,2,3,4] + dl1a_column_name: "image" output_column_name: "statistics" PixelStatisticsCalculator: - stats_aggregator_type: [["id", 1, "SigmaClippingAggregator"]] + stats_aggregator_type: [["type", "*", "SigmaClippingAggregator"]] chunk_shift: 1000 faulty_pixels_fraction: 0.1 outlier_detector_list: [ @@ -20,7 +21,7 @@ PixelStatisticsCalculator: "config": { "validity_range": [-20, 120], } - } + }, { "apply_to": "std", "name": "StdOutlierDetector", diff --git a/src/ctapipe/tools/stats_calculation.py b/src/ctapipe/tools/stats_calculation.py index 080d1b74c45..b472ba461ae 100644 --- a/src/ctapipe/tools/stats_calculation.py +++ b/src/ctapipe/tools/stats_calculation.py @@ -1,5 +1,5 @@ """ -Perform statistics calculation from DL1 data +Perform statistics calculation from DL1a image data """ import pathlib @@ -12,7 +12,9 @@ from ctapipe.core.traits import ( Bool, CaselessStrEnum, + CInt, Path, + Set, Unicode, classes_with_traits, ) @@ -24,32 +26,42 @@ class StatisticsCalculatorTool(Tool): """ - Perform statistics calculation for DL1 data + Perform statistics calculation for DL1a image data """ name = "StatisticsCalculatorTool" - description = "Perform statistics calculation for DL1 data" + description = "Perform statistics calculation for DL1a image data" examples = """ - To calculate statistics of DL1 data files: + To calculate statistics of DL1a image data files: > ctapipe-stats-calculation --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite """ input_url = Path( - help="Input CTA HDF5 files for DL1 data", + help="Input CTA HDF5 files including DL1a image data", allow_none=True, exists=True, directory_ok=False, file_ok=True, ).tag(config=True) - dl1_column_name = CaselessStrEnum( + allowed_tels = Set( + trait=CInt(), + default_value=None, + allow_none=True, + help=( + "List of allowed tel_ids, others will be ignored. " + "If None, all telescopes in the input stream will be included." + ), + ).tag(config=True) + + dl1a_column_name = CaselessStrEnum( ["image", "peak_time", "variance"], default_value="image", allow_none=False, - help="Column name of the DL1 data to calculate statistics", + help="Column name of the DL1a image data to calculate statistics", ).tag(config=True) output_column_name = Unicode( @@ -87,23 +99,17 @@ def setup(self): # Load the subarray description from the input file subarray = SubarrayDescription.from_hdf(self.input_url) - if subarray.n_tels != 1: - raise IOError( - "This tool is only intended for single telescope data." - "Please provide a file with only one telescope." - ) - self.tel_id = subarray.tel_ids[0] - # Set the table name for the output file - self.table_name = ( - f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{self.tel_id:03d}" - ) # Initialization of the statistics calculator self.stats_calculator = PixelStatisticsCalculator( parent=self, subarray=subarray ) - # Read the whole dl1 images with the 'TableLoader' + # Read the input data with the 'TableLoader' input_data = TableLoader(input_url=self.input_url) - self.dl1_table = input_data.read_telescope_events_by_id( + # Get the telescope ids from the input data or use the allowed_tels configuration + tel_ids = subarray.tel_ids if self.allowed_tels is None else self.allowed_tels + # Read the whole dl1 images + self.dl1_tables = input_data.read_telescope_events_by_id( + telescopes=tel_ids, dl1_images=True, dl1_parameters=False, dl1_muons=False, @@ -113,50 +119,52 @@ def setup(self): true_parameters=False, instrument=False, pointing=False, - )[self.tel_id] + ) def start(self): - # Perform the first pass of the statistics calculation - aggregated_stats = self.stats_calculator.first_pass( - table=self.dl1_table, - tel_id=self.tel_id, - col_name=self.dl1_column_name, - ) - # Check if 'chunk_shift' is selected - if self.stats_calculator.chunk_shift is not None: - # Check if there are any faulty chunks to perform a second pass over the data - if np.any(~aggregated_stats["is_valid"].data): - # Perform the second pass of the statistics calculation - aggregated_stats_secondpass = self.stats_calculator.second_pass( - table=self.dl1_table, - valid_chunks=aggregated_stats["is_valid"].data, - tel_id=self.tel_id, - col_name=self.dl1_column_name, - ) - # Stack the statistic values from the first and second pass - aggregated_stats = vstack( - [aggregated_stats, aggregated_stats_secondpass] - ) - # Sort the stacked aggregated statistic values by starting time - aggregated_stats.sort(["time_start"]) - else: - self.log.info( - "No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", - self.tel_id, - ) - # Write the aggregated statistics and their outlier mask to the output file - write_table( - aggregated_stats, - self.output_path, - self.table_name, - overwrite=self.overwrite, - ) + # Iterate over the telescope ids and their corresponding dl1 tables + for tel_id, dl1_table in self.dl1_tables.items(): + # Perform the first pass of the statistics calculation + aggregated_stats = self.stats_calculator.first_pass( + table=dl1_table, + tel_id=tel_id, + col_name=self.dl1a_column_name, + ) + # Check if 'chunk_shift' is selected + if self.stats_calculator.chunk_shift is not None: + # Check if there are any faulty chunks to perform a second pass over the data + if np.any(~aggregated_stats["is_valid"].data): + # Perform the second pass of the statistics calculation + aggregated_stats_secondpass = self.stats_calculator.second_pass( + table=dl1_table, + valid_chunks=aggregated_stats["is_valid"].data, + tel_id=tel_id, + col_name=self.dl1a_column_name, + ) + # Stack the statistic values from the first and second pass + aggregated_stats = vstack( + [aggregated_stats, aggregated_stats_secondpass] + ) + # Sort the stacked aggregated statistic values by starting time + aggregated_stats.sort(["time_start"]) + else: + self.log.info( + "No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", + tel_id, + ) + # Write the aggregated statistics and their outlier mask to the output file + write_table( + aggregated_stats, + self.output_path, + f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{tel_id:03d}", + overwrite=self.overwrite, + ) def finish(self): self.log.info( "DL1 monitoring data was stored in '%s' under '%s'", self.output_path, - self.table_name, + f"/dl1/monitoring/telescope/{self.output_column_name}", ) self.log.info("Tool is shutting down") From 35e5082bc0e685cc768fe098107f647cdbfc9605 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 09:58:54 +0100 Subject: [PATCH 4/9] added unit test for stats calc tool --- src/ctapipe/tools/tests/test_stats_calc.py | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/ctapipe/tools/tests/test_stats_calc.py diff --git a/src/ctapipe/tools/tests/test_stats_calc.py b/src/ctapipe/tools/tests/test_stats_calc.py new file mode 100644 index 00000000000..97157ace955 --- /dev/null +++ b/src/ctapipe/tools/tests/test_stats_calc.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" +Test ctapipe-stats-calculation tool +""" + +from traitlets.config.loader import Config + +from ctapipe.core import run_tool +from ctapipe.io import read_table +from ctapipe.tools.stats_calculation import StatisticsCalculatorTool + + +def test_stats_calc_tool(tmp_path, dl1_image_file): + """check statistics calculation from DL1a files""" + + # Create a configuration suitable for the test + tel_id = 3 + config = Config( + { + "StatisticsCalculatorTool": { + "allowed_tels": [tel_id], + "dl1a_column_name": "image", + "output_column_name": "statistics", + }, + "PixelStatisticsCalculator": { + "stats_aggregator_type": [ + ("id", tel_id, "PlainAggregator"), + ], + }, + "PlainAggregator": { + "chunk_size": 1, + }, + } + ) + # Set the output file path + monitoring_file = tmp_path / "monitoring.dl1.h5" + # Run the tool with the configuration and the input file + run_tool( + StatisticsCalculatorTool(config=config), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={monitoring_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + # Check that the output file has been created + assert monitoring_file.exists() + # Check that the output file is not empty + assert ( + read_table( + monitoring_file, + path=f"/dl1/monitoring/telescope/statistics/tel_{tel_id:03d}", + )["mean"] + is not None + ) From 78e3fc5b0f2fefcbfa11e56250fbdf9e6429b104 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 10:05:23 +0100 Subject: [PATCH 5/9] add changelog --- docs/changes/2628.features.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changes/2628.features.rst diff --git a/docs/changes/2628.features.rst b/docs/changes/2628.features.rst new file mode 100644 index 00000000000..f57b32854af --- /dev/null +++ b/docs/changes/2628.features.rst @@ -0,0 +1 @@ +Add a generic stats-calculation tool utilizing the PixelStatisticsCalculator. From 234382e858206caf1ddd2086aa904eaea63083f8 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 10:26:04 +0100 Subject: [PATCH 6/9] polish docs --- docs/user-guide/tools.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/user-guide/tools.rst b/docs/user-guide/tools.rst index 619b08bf793..f9c6c1b36cd 100644 --- a/docs/user-guide/tools.rst +++ b/docs/user-guide/tools.rst @@ -17,6 +17,7 @@ Data Processing Tools * ``ctapipe-quickstart``: create some default analysis configurations and a working directory * ``ctapipe-process``: Process event data in any supported format from R0/R1/DL0 to DL1 or DL2 HDF5 files. * ``ctapipe-apply-models``: Tool to apply machine learning models in bulk (as opposed to event by event). +* ``ctapipe-stats-calculation``: Tool to aggregate statistics and detect outliers from DL1a image data. * ``ctapipe-train-disp-reconstructor`` : Train the ML models for the `ctapipe.reco.DispReconstructor` (monoscopic reconstruction) * ``ctapipe-train-energy-regressor``: Train the ML models for the `ctapipe.reco.EnergyRegressor` (energy estimation) * ``ctapipe-train-particle-classifier``: Train the ML models for the `ctapipe.reco.ParticleClassifier` (gamma-hadron separation) From ec8785f54584439bb325b69439749f002fc8687f Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 14:44:35 +0100 Subject: [PATCH 7/9] include first round of comments rename the tool and file name only keep dl1 table of the particular telescope into RAM added tests for tool config errors rename input col name adopt yaml syntax in example config for stats calculation --- docs/user-guide/tools.rst | 2 +- pyproject.toml | 2 +- .../resources/calculate_pixel_stats.yaml | 32 ++++++ src/ctapipe/resources/stats_calc_config.yaml | 37 ------- ...alculation.py => calculate_pixel_stats.py} | 62 ++++++----- .../tools/tests/test_calculate_pixel_stats.py | 102 ++++++++++++++++++ src/ctapipe/tools/tests/test_stats_calc.py | 57 ---------- 7 files changed, 170 insertions(+), 124 deletions(-) create mode 100644 src/ctapipe/resources/calculate_pixel_stats.yaml delete mode 100644 src/ctapipe/resources/stats_calc_config.yaml rename src/ctapipe/tools/{stats_calculation.py => calculate_pixel_stats.py} (72%) create mode 100644 src/ctapipe/tools/tests/test_calculate_pixel_stats.py delete mode 100644 src/ctapipe/tools/tests/test_stats_calc.py diff --git a/docs/user-guide/tools.rst b/docs/user-guide/tools.rst index f9c6c1b36cd..1a0b2320d9b 100644 --- a/docs/user-guide/tools.rst +++ b/docs/user-guide/tools.rst @@ -17,7 +17,7 @@ Data Processing Tools * ``ctapipe-quickstart``: create some default analysis configurations and a working directory * ``ctapipe-process``: Process event data in any supported format from R0/R1/DL0 to DL1 or DL2 HDF5 files. * ``ctapipe-apply-models``: Tool to apply machine learning models in bulk (as opposed to event by event). -* ``ctapipe-stats-calculation``: Tool to aggregate statistics and detect outliers from DL1a image data. +* ``ctapipe-calculate-pixel-statistics``: Tool to aggregate statistics and detect outliers from pixel-wise image data. * ``ctapipe-train-disp-reconstructor`` : Train the ML models for the `ctapipe.reco.DispReconstructor` (monoscopic reconstruction) * ``ctapipe-train-energy-regressor``: Train the ML models for the `ctapipe.reco.EnergyRegressor` (energy estimation) * ``ctapipe-train-particle-classifier``: Train the ML models for the `ctapipe.reco.ParticleClassifier` (gamma-hadron separation) diff --git a/pyproject.toml b/pyproject.toml index b8202e3d2e5..769667fb22f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,7 +99,7 @@ ctapipe-process = "ctapipe.tools.process:main" ctapipe-merge = "ctapipe.tools.merge:main" ctapipe-fileinfo = "ctapipe.tools.fileinfo:main" ctapipe-quickstart = "ctapipe.tools.quickstart:main" -ctapipe-stats-calculation = "ctapipe.tools.stats_calculation:main" +ctapipe-calculate-pixel-statistics = "ctapipe.tools.calculate_pixel_stats:main" ctapipe-train-energy-regressor = "ctapipe.tools.train_energy_regressor:main" ctapipe-train-particle-classifier = "ctapipe.tools.train_particle_classifier:main" ctapipe-train-disp-reconstructor = "ctapipe.tools.train_disp_reconstructor:main" diff --git a/src/ctapipe/resources/calculate_pixel_stats.yaml b/src/ctapipe/resources/calculate_pixel_stats.yaml new file mode 100644 index 00000000000..e4d1f0b3866 --- /dev/null +++ b/src/ctapipe/resources/calculate_pixel_stats.yaml @@ -0,0 +1,32 @@ +StatisticsCalculatorTool: + allowed_tels: [1,2,3,4] + input_column_name: image + output_column_name: statistics + +PixelStatisticsCalculator: + stats_aggregator_type: + - ["type", "LST*", "SigmaClippingAggregator"], + - ["type", "MST*", "PlainAggregator"], + + chunk_shift: 1000 + faulty_pixels_fraction: 0.1 + outlier_detector_list: + - name: MedianOutlierDetector + apply_to: median + config: + median_range_factors: [-15, 15] + - name: RangeOutlierDetector + apply_to: median + config: + validity_range: [-20, 120] + - name: StdOutlierDetector + apply_to: std + config: + std_range_factors: [-15, 15] + +SigmaClippingAggregator: + chunk_size: 2500 + max_sigma: 4 + iterations: 5 +PlainAggregator: + chunk_size: 2500 diff --git a/src/ctapipe/resources/stats_calc_config.yaml b/src/ctapipe/resources/stats_calc_config.yaml deleted file mode 100644 index 965bd6dae68..00000000000 --- a/src/ctapipe/resources/stats_calc_config.yaml +++ /dev/null @@ -1,37 +0,0 @@ -StatisticsCalculatorTool: - allowed_tels: [1,2,3,4] - dl1a_column_name: "image" - output_column_name: "statistics" - -PixelStatisticsCalculator: - stats_aggregator_type: [["type", "*", "SigmaClippingAggregator"]] - chunk_shift: 1000 - faulty_pixels_fraction: 0.1 - outlier_detector_list: [ - { - "apply_to": "median", - "name": "MedianOutlierDetector", - "config": { - "median_range_factors": [-15, 15], - }, - }, - { - "apply_to": "median", - "name": "RangeOutlierDetector", - "config": { - "validity_range": [-20, 120], - } - }, - { - "apply_to": "std", - "name": "StdOutlierDetector", - "config": { - "std_range_factors": [-15, 15], - }, - } - ] - -SigmaClippingAggregator: - chunk_size: 2500 - max_sigma: 4 - iterations: 5 diff --git a/src/ctapipe/tools/stats_calculation.py b/src/ctapipe/tools/calculate_pixel_stats.py similarity index 72% rename from src/ctapipe/tools/stats_calculation.py rename to src/ctapipe/tools/calculate_pixel_stats.py index b472ba461ae..4616c70b4e9 100644 --- a/src/ctapipe/tools/stats_calculation.py +++ b/src/ctapipe/tools/calculate_pixel_stats.py @@ -1,5 +1,5 @@ """ -Perform statistics calculation from DL1a image data +Perform statistics calculation from pixel-wise image data """ import pathlib @@ -11,7 +11,6 @@ from ctapipe.core.tool import ToolConfigurationError from ctapipe.core.traits import ( Bool, - CaselessStrEnum, CInt, Path, Set, @@ -26,21 +25,21 @@ class StatisticsCalculatorTool(Tool): """ - Perform statistics calculation for DL1a image data + Perform statistics calculation for pixel-wise image data """ name = "StatisticsCalculatorTool" - description = "Perform statistics calculation for DL1a image data" + description = "Perform statistics calculation for pixel-wise image data" examples = """ - To calculate statistics of DL1a image data files: + To calculate statistics of pixel-wise image data files: - > ctapipe-stats-calculation --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite + > ctapipe-calculate-pixel-statistics --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite """ input_url = Path( - help="Input CTA HDF5 files including DL1a image data", + help="Input CTA HDF5 files including pixel-wise image data", allow_none=True, exists=True, directory_ok=False, @@ -57,11 +56,10 @@ class StatisticsCalculatorTool(Tool): ), ).tag(config=True) - dl1a_column_name = CaselessStrEnum( - ["image", "peak_time", "variance"], + input_column_name = Unicode( default_value="image", allow_none=False, - help="Column name of the DL1a image data to calculate statistics", + help="Column name of the pixel-wise image data to calculate statistics", ).tag(config=True) output_column_name = Unicode( @@ -104,31 +102,39 @@ def setup(self): parent=self, subarray=subarray ) # Read the input data with the 'TableLoader' - input_data = TableLoader(input_url=self.input_url) + self.input_data = TableLoader(input_url=self.input_url) # Get the telescope ids from the input data or use the allowed_tels configuration - tel_ids = subarray.tel_ids if self.allowed_tels is None else self.allowed_tels - # Read the whole dl1 images - self.dl1_tables = input_data.read_telescope_events_by_id( - telescopes=tel_ids, - dl1_images=True, - dl1_parameters=False, - dl1_muons=False, - dl2=False, - simulated=False, - true_images=False, - true_parameters=False, - instrument=False, - pointing=False, + self.tel_ids = ( + subarray.tel_ids if self.allowed_tels is None else self.allowed_tels ) def start(self): - # Iterate over the telescope ids and their corresponding dl1 tables - for tel_id, dl1_table in self.dl1_tables.items(): + # Iterate over the telescope ids and calculate the statistics + for tel_id in self.tel_ids: + # Read the whole dl1 images for one particular telescope + dl1_table = self.input_data.read_telescope_events_by_id( + telescopes=tel_id, + dl1_images=True, + dl1_parameters=False, + dl1_muons=False, + dl2=False, + simulated=False, + true_images=False, + true_parameters=False, + instrument=False, + pointing=False, + )[tel_id] + # Check if the input column name is in the table + if self.input_column_name not in dl1_table.colnames: + raise ToolConfigurationError( + f"Column '{self.input_column_name}' not found " + f"in the input data for telescope 'tel_id={tel_id}'." + ) # Perform the first pass of the statistics calculation aggregated_stats = self.stats_calculator.first_pass( table=dl1_table, tel_id=tel_id, - col_name=self.dl1a_column_name, + col_name=self.input_column_name, ) # Check if 'chunk_shift' is selected if self.stats_calculator.chunk_shift is not None: @@ -139,7 +145,7 @@ def start(self): table=dl1_table, valid_chunks=aggregated_stats["is_valid"].data, tel_id=tel_id, - col_name=self.dl1a_column_name, + col_name=self.input_column_name, ) # Stack the statistic values from the first and second pass aggregated_stats = vstack( diff --git a/src/ctapipe/tools/tests/test_calculate_pixel_stats.py b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py new file mode 100644 index 00000000000..38e61354b65 --- /dev/null +++ b/src/ctapipe/tools/tests/test_calculate_pixel_stats.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +""" +Test ctapipe-calculate-pixel-statistics tool +""" + +import pytest +from traitlets.config.loader import Config + +from ctapipe.core import run_tool +from ctapipe.core.tool import ToolConfigurationError +from ctapipe.io import read_table +from ctapipe.tools.calculate_pixel_stats import StatisticsCalculatorTool + + +def test_calculate_pixel_stats_tool(tmp_path, dl1_image_file): + """check statistics calculation from pixel-wise image data files""" + + # Create a configuration suitable for the test + tel_id = 3 + config = Config( + { + "StatisticsCalculatorTool": { + "allowed_tels": [tel_id], + "input_column_name": "image", + "output_column_name": "statistics", + }, + "PixelStatisticsCalculator": { + "stats_aggregator_type": [ + ("id", tel_id, "PlainAggregator"), + ], + }, + "PlainAggregator": { + "chunk_size": 1, + }, + } + ) + # Set the output file path + monitoring_file = tmp_path / "monitoring.dl1.h5" + # Run the tool with the configuration and the input file + run_tool( + StatisticsCalculatorTool(config=config), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={monitoring_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + # Check that the output file has been created + assert monitoring_file.exists() + # Check that the output file is not empty + assert ( + read_table( + monitoring_file, + path=f"/dl1/monitoring/telescope/statistics/tel_{tel_id:03d}", + )["mean"] + is not None + ) + + +def test_tool_config_error(tmp_path, dl1_image_file): + """check tool configuration error""" + + # Run the tool with the configuration and the input file + config = Config( + { + "StatisticsCalculatorTool": { + "allowed_tels": [3], + "input_column_name": "image_charges", + "output_column_name": "statistics", + } + } + ) + # Set the output file path + monitoring_file = tmp_path / "monitoring.dl1.h5" + # Check if ToolConfigurationError is raised + # when the column name of the pixel-wise image data is not correct + with pytest.raises(ToolConfigurationError): + run_tool( + StatisticsCalculatorTool(config=config), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={monitoring_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + # Check if ToolConfigurationError is raised + # when the input and output files are the same + with pytest.raises(ToolConfigurationError): + run_tool( + StatisticsCalculatorTool(), + argv=[ + f"--input_url={dl1_image_file}", + f"--output_path={dl1_image_file}", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) diff --git a/src/ctapipe/tools/tests/test_stats_calc.py b/src/ctapipe/tools/tests/test_stats_calc.py deleted file mode 100644 index 97157ace955..00000000000 --- a/src/ctapipe/tools/tests/test_stats_calc.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python3 -""" -Test ctapipe-stats-calculation tool -""" - -from traitlets.config.loader import Config - -from ctapipe.core import run_tool -from ctapipe.io import read_table -from ctapipe.tools.stats_calculation import StatisticsCalculatorTool - - -def test_stats_calc_tool(tmp_path, dl1_image_file): - """check statistics calculation from DL1a files""" - - # Create a configuration suitable for the test - tel_id = 3 - config = Config( - { - "StatisticsCalculatorTool": { - "allowed_tels": [tel_id], - "dl1a_column_name": "image", - "output_column_name": "statistics", - }, - "PixelStatisticsCalculator": { - "stats_aggregator_type": [ - ("id", tel_id, "PlainAggregator"), - ], - }, - "PlainAggregator": { - "chunk_size": 1, - }, - } - ) - # Set the output file path - monitoring_file = tmp_path / "monitoring.dl1.h5" - # Run the tool with the configuration and the input file - run_tool( - StatisticsCalculatorTool(config=config), - argv=[ - f"--input_url={dl1_image_file}", - f"--output_path={monitoring_file}", - "--overwrite", - ], - cwd=tmp_path, - raises=True, - ) - # Check that the output file has been created - assert monitoring_file.exists() - # Check that the output file is not empty - assert ( - read_table( - monitoring_file, - path=f"/dl1/monitoring/telescope/statistics/tel_{tel_id:03d}", - )["mean"] - is not None - ) From 13d725d811457ec3d3e43cdce049ec75bf491691 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 14:59:48 +0100 Subject: [PATCH 8/9] rename config file also in quickstart tool --- src/ctapipe/tools/quickstart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ctapipe/tools/quickstart.py b/src/ctapipe/tools/quickstart.py index 9cab9c97d99..f8dfaff0d3c 100644 --- a/src/ctapipe/tools/quickstart.py +++ b/src/ctapipe/tools/quickstart.py @@ -12,10 +12,10 @@ CONFIGS_TO_WRITE = [ "base_config.yaml", + "calculate_pixel_stats.yaml", "stage1_config.yaml", "stage2_config.yaml", "ml_preprocessing_config.yaml", - "stats_calc_config.yaml", "train_energy_regressor.yaml", "train_particle_classifier.yaml", "train_disp_reconstructor.yaml", From 1e73fe28c79f9608ccfc71fe459340ad7b93718d Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 28 Oct 2024 16:28:16 +0100 Subject: [PATCH 9/9] remove redundant , in stats calc example config --- src/ctapipe/resources/calculate_pixel_stats.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ctapipe/resources/calculate_pixel_stats.yaml b/src/ctapipe/resources/calculate_pixel_stats.yaml index e4d1f0b3866..48e262d3ab2 100644 --- a/src/ctapipe/resources/calculate_pixel_stats.yaml +++ b/src/ctapipe/resources/calculate_pixel_stats.yaml @@ -5,8 +5,8 @@ StatisticsCalculatorTool: PixelStatisticsCalculator: stats_aggregator_type: - - ["type", "LST*", "SigmaClippingAggregator"], - - ["type", "MST*", "PlainAggregator"], + - ["type", "LST*", "SigmaClippingAggregator"] + - ["type", "MST*", "PlainAggregator"] chunk_shift: 1000 faulty_pixels_fraction: 0.1