-
Notifications
You must be signed in to change notification settings - Fork 268
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Stats calc tool #2628
base: main
Are you sure you want to change the base?
Stats calc tool #2628
Changes from all commits
8c95d3d
69ae37f
6b8feff
35e5082
78e3fc5
234382e
ec8785f
13d725d
1e73fe2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Add a generic stats-calculation tool utilizing the PixelStatisticsCalculator. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
StatisticsCalculatorTool: | ||
allowed_tels: [1,2,3,4] | ||
input_column_name: image | ||
output_column_name: statistics | ||
|
||
PixelStatisticsCalculator: | ||
stats_aggregator_type: | ||
- ["type", "LST*", "SigmaClippingAggregator"] | ||
- ["type", "MST*", "PlainAggregator"] | ||
|
||
chunk_shift: 1000 | ||
faulty_pixels_fraction: 0.1 | ||
outlier_detector_list: | ||
- name: MedianOutlierDetector | ||
apply_to: median | ||
config: | ||
median_range_factors: [-15, 15] | ||
- name: RangeOutlierDetector | ||
apply_to: median | ||
config: | ||
validity_range: [-20, 120] | ||
- name: StdOutlierDetector | ||
apply_to: std | ||
config: | ||
std_range_factors: [-15, 15] | ||
|
||
SigmaClippingAggregator: | ||
chunk_size: 2500 | ||
max_sigma: 4 | ||
iterations: 5 | ||
PlainAggregator: | ||
chunk_size: 2500 |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,185 @@ | ||||||||
""" | ||||||||
Perform statistics calculation from pixel-wise image data | ||||||||
""" | ||||||||
|
||||||||
import pathlib | ||||||||
|
||||||||
import numpy as np | ||||||||
from astropy.table import vstack | ||||||||
|
||||||||
from ctapipe.core import Tool | ||||||||
from ctapipe.core.tool import ToolConfigurationError | ||||||||
from ctapipe.core.traits import ( | ||||||||
Bool, | ||||||||
CInt, | ||||||||
Path, | ||||||||
Set, | ||||||||
Unicode, | ||||||||
classes_with_traits, | ||||||||
) | ||||||||
from ctapipe.instrument import SubarrayDescription | ||||||||
from ctapipe.io import write_table | ||||||||
from ctapipe.io.tableloader import TableLoader | ||||||||
from ctapipe.monitoring.calculator import PixelStatisticsCalculator | ||||||||
|
||||||||
|
||||||||
class StatisticsCalculatorTool(Tool): | ||||||||
""" | ||||||||
Perform statistics calculation for pixel-wise image data | ||||||||
""" | ||||||||
|
||||||||
name = "StatisticsCalculatorTool" | ||||||||
description = "Perform statistics calculation for pixel-wise image data" | ||||||||
|
||||||||
examples = """ | ||||||||
To calculate statistics of pixel-wise image data files: | ||||||||
|
||||||||
> ctapipe-calculate-pixel-statistics --input_url input.dl1.h5 --output_path /path/monitoring.h5 --overwrite | ||||||||
|
||||||||
""" | ||||||||
|
||||||||
input_url = Path( | ||||||||
help="Input CTA HDF5 files including pixel-wise image data", | ||||||||
allow_none=True, | ||||||||
exists=True, | ||||||||
directory_ok=False, | ||||||||
file_ok=True, | ||||||||
).tag(config=True) | ||||||||
|
||||||||
allowed_tels = Set( | ||||||||
trait=CInt(), | ||||||||
default_value=None, | ||||||||
allow_none=True, | ||||||||
help=( | ||||||||
"List of allowed tel_ids, others will be ignored. " | ||||||||
"If None, all telescopes in the input stream will be included." | ||||||||
), | ||||||||
).tag(config=True) | ||||||||
|
||||||||
input_column_name = Unicode( | ||||||||
default_value="image", | ||||||||
allow_none=False, | ||||||||
help="Column name of the pixel-wise image data to calculate statistics", | ||||||||
).tag(config=True) | ||||||||
|
||||||||
output_column_name = Unicode( | ||||||||
default_value="statistics", | ||||||||
allow_none=False, | ||||||||
help="Column name of the output statistics", | ||||||||
).tag(config=True) | ||||||||
|
||||||||
output_path = Path( | ||||||||
help="Output filename", default_value=pathlib.Path("monitoring.h5") | ||||||||
).tag(config=True) | ||||||||
|
||||||||
overwrite = Bool(help="Overwrite output file if it exists").tag(config=True) | ||||||||
|
||||||||
aliases = { | ||||||||
("i", "input_url"): "StatisticsCalculatorTool.input_url", | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
("o", "output_path"): "StatisticsCalculatorTool.output_path", | ||||||||
} | ||||||||
|
||||||||
flags = { | ||||||||
"overwrite": ( | ||||||||
{"StatisticsCalculatorTool": {"overwrite": True}}, | ||||||||
"Overwrite existing files", | ||||||||
), | ||||||||
} | ||||||||
|
||||||||
classes = classes_with_traits(PixelStatisticsCalculator) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you need to add classes = [TableLoader,] + classes_with_traits(PixelStatisticsCalculator) |
||||||||
|
||||||||
def setup(self): | ||||||||
# Check that the input and output files are not the same | ||||||||
if self.input_url == self.output_path: | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will also need to change all instances of |
||||||||
raise ToolConfigurationError( | ||||||||
"Input and output files are same. Fix your configuration / cli arguments." | ||||||||
) | ||||||||
|
||||||||
# Load the subarray description from the input file | ||||||||
subarray = SubarrayDescription.from_hdf(self.input_url) | ||||||||
# Initialization of the statistics calculator | ||||||||
self.stats_calculator = PixelStatisticsCalculator( | ||||||||
parent=self, subarray=subarray | ||||||||
) | ||||||||
# Read the input data with the 'TableLoader' | ||||||||
self.input_data = TableLoader(input_url=self.input_url) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs to be configurable, so you need to pass |
||||||||
# Get the telescope ids from the input data or use the allowed_tels configuration | ||||||||
self.tel_ids = ( | ||||||||
subarray.tel_ids if self.allowed_tels is None else self.allowed_tels | ||||||||
) | ||||||||
|
||||||||
def start(self): | ||||||||
# Iterate over the telescope ids and calculate the statistics | ||||||||
for tel_id in self.tel_ids: | ||||||||
# Read the whole dl1 images for one particular telescope | ||||||||
dl1_table = self.input_data.read_telescope_events_by_id( | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I get a crash here: % ctapipe-calculate-pixel-statistics -i events.dl1.h5 -o stats.h5
dl1_table = self.input_data.read_telescope_events_by_id(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/kkosack/Projects/CTA/Working/ctapipe/src/ctapipe/io/tableloader.py", line 1089, in read_telescope_events_by_id
tel_ids = self.subarray.get_tel_ids(telescopes)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/kkosack/Projects/CTA/Working/ctapipe/src/ctapipe/instrument/subarray.py", line 549, in get_tel_ids
for telescope in telescopes:
TypeError: 'numpy.int16' object is not iterable Seems to be due to passing an integer instead of a list, which is what is required by
Suggested change
How does this work in the tests? |
||||||||
telescopes=tel_id, | ||||||||
dl1_images=True, | ||||||||
dl1_parameters=False, | ||||||||
dl1_muons=False, | ||||||||
dl2=False, | ||||||||
simulated=False, | ||||||||
true_images=False, | ||||||||
true_parameters=False, | ||||||||
instrument=False, | ||||||||
pointing=False, | ||||||||
)[tel_id] | ||||||||
# Check if the input column name is in the table | ||||||||
if self.input_column_name not in dl1_table.colnames: | ||||||||
raise ToolConfigurationError( | ||||||||
f"Column '{self.input_column_name}' not found " | ||||||||
f"in the input data for telescope 'tel_id={tel_id}'." | ||||||||
) | ||||||||
# Perform the first pass of the statistics calculation | ||||||||
aggregated_stats = self.stats_calculator.first_pass( | ||||||||
table=dl1_table, | ||||||||
tel_id=tel_id, | ||||||||
col_name=self.input_column_name, | ||||||||
) | ||||||||
# Check if 'chunk_shift' is selected | ||||||||
if self.stats_calculator.chunk_shift is not None: | ||||||||
# Check if there are any faulty chunks to perform a second pass over the data | ||||||||
if np.any(~aggregated_stats["is_valid"].data): | ||||||||
# Perform the second pass of the statistics calculation | ||||||||
aggregated_stats_secondpass = self.stats_calculator.second_pass( | ||||||||
table=dl1_table, | ||||||||
valid_chunks=aggregated_stats["is_valid"].data, | ||||||||
tel_id=tel_id, | ||||||||
col_name=self.input_column_name, | ||||||||
) | ||||||||
# Stack the statistic values from the first and second pass | ||||||||
aggregated_stats = vstack( | ||||||||
[aggregated_stats, aggregated_stats_secondpass] | ||||||||
) | ||||||||
# Sort the stacked aggregated statistic values by starting time | ||||||||
aggregated_stats.sort(["time_start"]) | ||||||||
else: | ||||||||
self.log.info( | ||||||||
"No faulty chunks found for telescope 'tel_id=%d'. Skipping second pass.", | ||||||||
tel_id, | ||||||||
) | ||||||||
# Write the aggregated statistics and their outlier mask to the output file | ||||||||
write_table( | ||||||||
aggregated_stats, | ||||||||
self.output_path, | ||||||||
f"/dl1/monitoring/telescope/{self.output_column_name}/tel_{tel_id:03d}", | ||||||||
overwrite=self.overwrite, | ||||||||
) | ||||||||
|
||||||||
def finish(self): | ||||||||
self.log.info( | ||||||||
"DL1 monitoring data was stored in '%s' under '%s'", | ||||||||
self.output_path, | ||||||||
f"/dl1/monitoring/telescope/{self.output_column_name}", | ||||||||
) | ||||||||
self.log.info("Tool is shutting down") | ||||||||
|
||||||||
|
||||||||
def main(): | ||||||||
# Run the tool | ||||||||
tool = StatisticsCalculatorTool() | ||||||||
tool.run() | ||||||||
|
||||||||
|
||||||||
if __name__ == "main": | ||||||||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
Test ctapipe-calculate-pixel-statistics tool | ||
""" | ||
|
||
import pytest | ||
from traitlets.config.loader import Config | ||
|
||
from ctapipe.core import run_tool | ||
from ctapipe.core.tool import ToolConfigurationError | ||
from ctapipe.io import read_table | ||
from ctapipe.tools.calculate_pixel_stats import StatisticsCalculatorTool | ||
|
||
|
||
def test_calculate_pixel_stats_tool(tmp_path, dl1_image_file): | ||
"""check statistics calculation from pixel-wise image data files""" | ||
|
||
# Create a configuration suitable for the test | ||
tel_id = 3 | ||
config = Config( | ||
{ | ||
"StatisticsCalculatorTool": { | ||
"allowed_tels": [tel_id], | ||
"input_column_name": "image", | ||
"output_column_name": "statistics", | ||
}, | ||
"PixelStatisticsCalculator": { | ||
"stats_aggregator_type": [ | ||
("id", tel_id, "PlainAggregator"), | ||
], | ||
}, | ||
"PlainAggregator": { | ||
"chunk_size": 1, | ||
}, | ||
} | ||
) | ||
# Set the output file path | ||
monitoring_file = tmp_path / "monitoring.dl1.h5" | ||
# Run the tool with the configuration and the input file | ||
run_tool( | ||
StatisticsCalculatorTool(config=config), | ||
argv=[ | ||
f"--input_url={dl1_image_file}", | ||
f"--output_path={monitoring_file}", | ||
"--overwrite", | ||
], | ||
cwd=tmp_path, | ||
raises=True, | ||
) | ||
# Check that the output file has been created | ||
assert monitoring_file.exists() | ||
# Check that the output file is not empty | ||
assert ( | ||
read_table( | ||
monitoring_file, | ||
path=f"/dl1/monitoring/telescope/statistics/tel_{tel_id:03d}", | ||
)["mean"] | ||
is not None | ||
) | ||
|
||
|
||
def test_tool_config_error(tmp_path, dl1_image_file): | ||
"""check tool configuration error""" | ||
|
||
# Run the tool with the configuration and the input file | ||
config = Config( | ||
{ | ||
"StatisticsCalculatorTool": { | ||
"allowed_tels": [3], | ||
"input_column_name": "image_charges", | ||
"output_column_name": "statistics", | ||
} | ||
} | ||
) | ||
# Set the output file path | ||
monitoring_file = tmp_path / "monitoring.dl1.h5" | ||
# Check if ToolConfigurationError is raised | ||
# when the column name of the pixel-wise image data is not correct | ||
with pytest.raises(ToolConfigurationError): | ||
run_tool( | ||
StatisticsCalculatorTool(config=config), | ||
argv=[ | ||
f"--input_url={dl1_image_file}", | ||
f"--output_path={monitoring_file}", | ||
"--overwrite", | ||
], | ||
cwd=tmp_path, | ||
raises=True, | ||
) | ||
# Check if ToolConfigurationError is raised | ||
# when the input and output files are the same | ||
with pytest.raises(ToolConfigurationError): | ||
run_tool( | ||
StatisticsCalculatorTool(), | ||
argv=[ | ||
f"--input_url={dl1_image_file}", | ||
f"--output_path={dl1_image_file}", | ||
"--overwrite", | ||
], | ||
cwd=tmp_path, | ||
raises=True, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should remove this option, and instead use the parameter provided by TableLoader.