diff --git a/docs/contribute/beforepushing.rst b/docs/contribute/beforepushing.rst index d337f4af..271cbeff 100644 --- a/docs/contribute/beforepushing.rst +++ b/docs/contribute/beforepushing.rst @@ -84,7 +84,7 @@ Same for *pyirf*. Integration tests ^^^^^^^^^^^^^^^^^ -These are neither unit-tests nor benchmarks, but rather functions that tests +These are neither unit-tests nor benchmarks, but rather functions that test whole functionalities and not just single API functions. In the case of the pipeline, such functionalities are the scripts/tools diff --git a/protopipe/perf/__init__.py b/protopipe/perf/__init__.py index 943dda47..aed1b5c2 100644 --- a/protopipe/perf/__init__.py +++ b/protopipe/perf/__init__.py @@ -1,2 +1,2 @@ from .temp import * -from .utils import * \ No newline at end of file +from .utils import * diff --git a/protopipe/perf/temp.py b/protopipe/perf/temp.py index 05bec17d..1b6d66e3 100644 --- a/protopipe/perf/temp.py +++ b/protopipe/perf/temp.py @@ -1,9 +1,9 @@ -"""Temporary development code from pyirf master branch.""" +"""Temporary development code in-between pyirf releases.""" -from astropy.table import Table import numpy as np -from scipy.stats import norm +from astropy.table import Table import astropy.units as u +from scipy.stats import norm from pyirf.binning import calculate_bin_indices from pyirf.benchmarks.energy_bias_resolution import inter_quantile_distance @@ -47,6 +47,8 @@ def energy_bias_resolution( table["bin_index"] = calculate_bin_indices( table[f"{energy_type}_energy"].quantity, energy_bins ) + n_bins = len(energy_bins) - 1 + mask = (table["bin_index"] >= 0) & (table["bin_index"] < n_bins) result = Table() result[f"{energy_type}_energy_low"] = energy_bins[:-1] @@ -56,8 +58,13 @@ def energy_bias_resolution( result["bias"] = np.nan result["resolution"] = np.nan + if not len(events): + # if we get an empty input (no selected events available) + # we return the table filled with NaNs + return result + # use groupby operations to calculate the percentile in each bin - by_bin = table.group_by("bin_index") + by_bin = table[mask].group_by("bin_index") index = by_bin.groups.keys["bin_index"] result["bias"][index] = by_bin["rel_error"].groups.aggregate(bias_function) @@ -89,10 +96,12 @@ def angular_resolution( Returns ------- result : astropy.table.Table - Table containing the 68% containment of the angular - distance distribution per each reconstructed energy bin. + Table containing the 68% containment of the angular distance + distribution per each reconstructed energy bin. """ + ONE_SIGMA_QUANTILE = norm.cdf(1) - norm.cdf(-1) + # create a table to make use of groupby operations table = Table(events[[f"{energy_type}_energy", "theta"]]) @@ -100,6 +109,9 @@ def angular_resolution( table[f"{energy_type}_energy"].quantity, energy_bins ) + n_bins = len(energy_bins) - 1 + mask = (table["bin_index"] >= 0) & (table["bin_index"] < n_bins) + result = Table() result[f"{energy_type}_energy_low"] = energy_bins[:-1] result[f"{energy_type}_energy_high"] = energy_bins[1:] @@ -108,11 +120,10 @@ def angular_resolution( result["angular_resolution"] = np.nan * u.deg # use groupby operations to calculate the percentile in each bin - by_bin = table.group_by("bin_index") + by_bin = table[mask].group_by("bin_index") index = by_bin.groups.keys["bin_index"] - ONE_SIGMA_PERCENTILE = norm.cdf(1) - norm.cdf(-1) result["angular_resolution"][index] = by_bin["theta"].groups.aggregate( - lambda x: np.percentile(x, 100 * ONE_SIGMA_PERCENTILE) + lambda x: np.quantile(x, ONE_SIGMA_QUANTILE) ) return result diff --git a/protopipe/perf/utils.py b/protopipe/perf/utils.py index 1856ab46..54f401a9 100644 --- a/protopipe/perf/utils.py +++ b/protopipe/perf/utils.py @@ -1,3 +1,4 @@ +import argparse import logging import numpy as np @@ -11,6 +12,55 @@ from pyirf.simulations import SimulatedEventsInfo +def initialize_script_arguments(): + """Initialize the parser of any protopipe.scripts.make_performance_XXX script. + + Returns + ------- + args : argparse.Namespace + Populated argparse namespace. + """ + + parser = argparse.ArgumentParser(description='Make performance files') + parser.add_argument('--config_file', type=str, required=True, help='') + + mode_group = parser.add_mutually_exclusive_group() + mode_group.add_argument('--wave', dest="mode", action='store_const', + const="wave", default="tail", + help="if set, use wavelet cleaning") + mode_group.add_argument('--tail', dest="mode", action='store_const', + const="tail", + help="if set, use tail cleaning (default)") + parser.add_argument( + "-i", + "--indir", + type=str, + default=None, + help="Directory containing the required DL2 input files" + ) + parser.add_argument( + "--template_input_file", + type=str, + default=None, + help="template for the filename of DL2 files (default: read from config file)", + ) + parser.add_argument( + "--outdir_path", + type=str, + default=None, + help="Output directory for DL3 file" + ) + parser.add_argument( + "--out_file_name", + type=str, + default=None, + help="Desired name for DL3 file" + ) + + args = parser.parse_args() + + return args + def percentiles(values, bin_values, bin_edges, percentile): # Seems complicated for vector defined as [inf, inf, .., inf] percentiles_binned = np.squeeze( diff --git a/protopipe/scripts/make_performance_EventDisplay.py b/protopipe/scripts/make_performance_EventDisplay.py index b85375e1..f4ae002b 100755 --- a/protopipe/scripts/make_performance_EventDisplay.py +++ b/protopipe/scripts/make_performance_EventDisplay.py @@ -5,12 +5,11 @@ import astropy.units as u from astropy import table from astropy.io import fits -import argparse import numpy as np import operator from protopipe.pipeline.utils import load_config -from protopipe.perf.utils import read_DL2_pyirf +from protopipe.perf.utils import initialize_script_arguments, read_DL2_pyirf from pyirf.binning import ( add_overflow_bins, @@ -43,7 +42,6 @@ create_rad_max_hdu, create_background_2d_hdu, ) -# from pyirf.benchmarks import energy_bias_resolution, angular_resolution from protopipe.perf.temp import energy_bias_resolution, angular_resolution log = logging.getLogger("pyirf") @@ -51,36 +49,44 @@ def main(): - # Read arguments - parser = argparse.ArgumentParser(description='Make performance files') - parser.add_argument('--config_file', type=str, required=True, help='') + # INITIALIZE CLI arguments + args = initialize_script_arguments() - mode_group = parser.add_mutually_exclusive_group() - mode_group.add_argument('--wave', dest="mode", action='store_const', - const="wave", default="tail", - help="if set, use wavelet cleaning") - mode_group.add_argument('--tail', dest="mode", action='store_const', - const="tail", - help="if set, use tail cleaning (default)") - - args = parser.parse_args() - - # Read configuration file + # LOAD CONFIGURATION FILE cfg = load_config(args.config_file) - # Create output directory if necessary - outdir = os.path.join(cfg['general']['outdir'], 'performance_protopipe_{}_CTA{}_{}_Zd{}_{}_Time{:.2f}{}'.format( - cfg['general']['prod'], - cfg['general']['site'], - cfg['general']['array'], - cfg['general']['zenith'], - cfg['general']['azimuth'], - cfg['analysis']['obs_time']['value'], - cfg['analysis']['obs_time']['unit']), - ) - - indir = cfg['general']['indir'] - template_input_file = cfg['general']['template_input_file'] + # Set final input parameters + # Command line as higher priority on configuration file + if args.indir is None: + indir = cfg["general"]["indir"] + else: + indir = args.indir + + if args.outdir_path is None: + outdir_path = cfg["general"]["outdir"] + else: + outdir_path = args.outdir_path + if not os.path.exists(outdir_path): + os.makedirs(outdir_path) + + if args.indir is None: + template_input_file = cfg['general']['template_input_file'] + else: + template_input_file = args.template_input_file + + if args.out_file_name is None: + out_file_name = 'performance_protopipe_{}_CTA{}_{}_Zd{}_{}_Time{:.2f}{}'.format( + cfg['general']['prod'], + cfg['general']['site'], + cfg['general']['array'], + cfg['general']['zenith'], + cfg['general']['azimuth'], + cfg['analysis']['obs_time']['value'], + cfg['analysis']['obs_time']['unit']) + else: + out_file_name = args.out_file_name + + outdir = os.path.join(outdir_path, out_file_name) T_OBS = cfg['analysis']['obs_time']['value'] * u.Unit(cfg['analysis']['obs_time']['unit']) diff --git a/protopipe/scripts/tests/test_config_analysis_north.yaml b/protopipe/scripts/tests/test_config_analysis_north.yaml index 719ba22f..bbf632f6 100644 --- a/protopipe/scripts/tests/test_config_analysis_north.yaml +++ b/protopipe/scripts/tests/test_config_analysis_north.yaml @@ -59,7 +59,7 @@ ImageCleaning: - DigiCam: [0,0] # values left unset for future studies - CHEC: [0,0] # values left unset for future studies - SCTCam: [0,0] # values left unset for future studies - keep_isolated_pixels: False + keep_isolated_pixels: True min_number_picture_neighbors: 1 wave: @@ -92,7 +92,11 @@ ImageSelection: # Minimal number of telescopes to consider events Reconstruction: - min_tel: 2 + # for events with <2 LST images the single-LST image is removed + # before shower geometry + LST_stereo: True + # after this we check if the remaining images satisfy the min_tel condition + min_tel: 2 # any tel_type # Parameters for energy estimation EnergyRegressor: diff --git a/protopipe/scripts/tests/test_config_analysis_south.yaml b/protopipe/scripts/tests/test_config_analysis_south.yaml index 30dd9e4b..5bf59856 100644 --- a/protopipe/scripts/tests/test_config_analysis_south.yaml +++ b/protopipe/scripts/tests/test_config_analysis_south.yaml @@ -59,7 +59,7 @@ ImageCleaning: - DigiCam: [0,0] # values left unset for future studies - CHEC: [0,0] # values left unset for future studies - SCTCam: [0,0] # values left unset for future studies - keep_isolated_pixels: False + keep_isolated_pixels: True min_number_picture_neighbors: 1 wave: @@ -92,7 +92,11 @@ ImageSelection: # Minimal number of telescopes to consider events Reconstruction: - min_tel: 2 + # for events with <2 LST images the single-LST image is removed + # before shower geometry + LST_stereo: False + # after this we check if the remaining images satisfy the min_tel condition + min_tel: 2 # any tel_type # Parameters for energy estimation EnergyRegressor: diff --git a/protopipe/scripts/tests/test_performance_ED_prod3b.yaml b/protopipe/scripts/tests/test_performance_ED_prod3b.yaml new file mode 100644 index 00000000..ae9825e0 --- /dev/null +++ b/protopipe/scripts/tests/test_performance_ED_prod3b.yaml @@ -0,0 +1,52 @@ +general: + # Directory with input data file + # [...] = your analysis local full path OUTSIDE the Vagrant box + indir: '[...]/shared_folder/analyses/v0.4.0_dev1/data/DL2' + # Template name for output file + prod: 'Prod3b' + site: 'North' + array: 'baseline_full_array' + zenith: '20deg' + azimuth: '180deg' # 0deg -> north 180deg -> south + template_input_file: 'DL2_{}_{}_merged.h5' # filled with mode and particle type + # Directory for output files + outdir: '[...]/shared_folder/analyses/v0.4.0_dev1/data/DL3' + +analysis: + obs_time: + value: 50 + unit: 'h' + cut_on_multiplicity: 4 + # Normalisation between ON and OFF regions + alpha: 0.2 + + # Radius to use for calculating bg rate + max_bg_radius: 1. + +particle_information: + gamma: + num_use: 10 + num_showers: 100000 + e_min: 0.003 + e_max: 330 + gen_radius: 1400 + gen_gamma: -2 + diff_cone: 0 + + proton: + num_use: 20 + num_showers: 200000 + e_min: 0.004 + e_max: 600 + gen_radius: 1900 + gen_gamma: -2 + diff_cone: 10 + + electron: + num_use: 20 + num_showers: 100000 + e_min: 0.003 + e_max: 330 + gen_radius: 1900 + gen_gamma: -2 + diff_cone: 10 diff --git a/protopipe/scripts/tests/test_pipeline.py b/protopipe/scripts/tests/test_pipeline.py index dd13f069..d4498544 100644 --- a/protopipe/scripts/tests/test_pipeline.py +++ b/protopipe/scripts/tests/test_pipeline.py @@ -1,3 +1,4 @@ +from pathlib import Path from os import system from pkg_resources import resource_filename @@ -5,7 +6,10 @@ import pytest from ctapipe.utils.datasets import get_dataset_path -from protopipe.scripts import data_training, build_model, write_dl2 +from protopipe.scripts import (data_training, + build_model, + write_dl2, + make_performance_EventDisplay) # PROD 3B @@ -16,6 +20,7 @@ config_AdaBoostRegressor = resource_filename("protopipe", "scripts/tests/test_AdaBoostRegressor.yaml") config_RandomForestRegressor = resource_filename("protopipe", "scripts/tests/test_RandomForestRegressor.yaml") config_RandomForestClassifier = resource_filename("protopipe", "scripts/tests/test_RandomForestClassifier.yaml") +config_DL3_ED_prod3b = resource_filename("protopipe", "scripts/tests/test_performance_ED_prod3b.yaml") # TEST FILES @@ -298,7 +303,7 @@ def test_GET_DL2_GAMMAS(test_case, pipeline_testdir): regressor_path = pipeline_testdir / f"energy_model_{test_case}" classifier_path = pipeline_testdir / f"classification_model_{test_case}" - outpath = pipeline_testdir / f"test_gamma3_noImages_{test_case}.h5" + outpath = pipeline_testdir / f"test_DL2_tail_gamma_noImages_{test_case}.h5" command = f"python {write_dl2.__file__}\ --config_file {input_data[test_case]['config']}\ @@ -338,7 +343,7 @@ def test_GET_DL2_PROTONS(test_case, pipeline_testdir): regressor_path = pipeline_testdir / f"energy_model_{test_case}" classifier_path = pipeline_testdir / f"classification_model_{test_case}" - outpath = pipeline_testdir / f"test_gamma3_noImages_{test_case}.h5" + outpath = pipeline_testdir / f"test_DL2_tail_proton_noImages_{test_case}.h5" command = f"python {write_dl2.__file__}\ --config_file {input_data[test_case]['config']}\ @@ -378,7 +383,7 @@ def test_GET_DL2_ELECTRONS(test_case, pipeline_testdir): regressor_path = pipeline_testdir / f"energy_model_{test_case}" classifier_path = pipeline_testdir / f"classification_model_{test_case}" - outpath = pipeline_testdir / f"test_gamma3_noImages_{test_case}.h5" + outpath = pipeline_testdir / f"test_DL2_tail_electron_noImages_{test_case}.h5" command = f"python {write_dl2.__file__}\ --config_file {input_data[test_case]['config']}\ @@ -405,4 +410,45 @@ def test_GET_DL2_ELECTRONS(test_case, pipeline_testdir): # check that the produced HDF5 file is non-empty with tables.open_file(outpath) as file: - assert file.get_filesize() > 0 \ No newline at end of file + assert file.get_filesize() > 0 + + +@pytest.mark.parametrize("test_case", [ + pytest.param("PROD3B_CTA_NORTH", marks=pytest.mark.dependency(name="DL3N", + depends=["g3N", "p2N", "elN"])), + pytest.param("PROD3B_CTA_SOUTH", marks=pytest.mark.dependency(name="DL3S", + depends=["g3S", "p2S", "elS"])), +]) +def test_GET_DL3_ED_prod3b(test_case, pipeline_testdir): + + template_input_file = f"test_DL2_{{}}_{{}}_noImages_{test_case}.h5" + + command = f"python {make_performance_EventDisplay.__file__}\ + --config_file {config_DL3_ED_prod3b}\ + --indir {pipeline_testdir}\ + --outdir_path {pipeline_testdir}\ + --out_file_name 'test_DL3_{test_case}'\ + --template_input_file {template_input_file}" + + print( # only with "pytest -s" + f''' + You can reproduce this test by running the following command, + + {command} + ''' + ) + + exit_status = system(command) + + # check that the script ends without crashing + assert exit_status == 0 + + # check that the output file exists and it is not empty + path = Path(pipeline_testdir) / f"test_DL3_{test_case}.fits.gz" + assert path.exists() and (path.stat().st_size > 0) + + from astropy.io import fits + with fits.open(path) as hdul: + assert len(hdul) == 19 # check that all HDUs are there + for hdu in hdul[1:]: + assert hdu.size > 0 # check presence of data