From 78a859fb107afbf1a4f678ba623923a31db6b9bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CRobHanna-NOAA=E2=80=9D?= <“Robert.Hanna@NOAA.gov”> Date: Fri, 10 May 2024 20:31:16 +0000 Subject: [PATCH 1/9] update code --- .../ble/ble_benchmark/create_ble_benchmark.py | 105 +++++++++++++++++- src/utils/shared_validators.py | 35 ++++++ 2 files changed, 135 insertions(+), 5 deletions(-) diff --git a/data/ble/ble_benchmark/create_ble_benchmark.py b/data/ble/ble_benchmark/create_ble_benchmark.py index 1bbd34bb0..f2caa6250 100644 --- a/data/ble/ble_benchmark/create_ble_benchmark.py +++ b/data/ble/ble_benchmark/create_ble_benchmark.py @@ -1,11 +1,14 @@ #!/usr/bin/env python3 import argparse +import datetime as dt +import logging import os from datetime import datetime from glob import glob from io import BytesIO from urllib.request import urlopen +from urllib.parse import urlsplit from zipfile import ZipFile import numpy as np @@ -13,6 +16,28 @@ from create_flow_forecast_file import create_flow_forecast_file from osgeo import gdal from preprocess_benchmark import preprocess_benchmark_static +from src.utils.shared_validators import is_valid_huc + + +def __setup_logger(output_folder_path): + start_time = datetime.now() + file_dt_string = start_time.strftime("%Y_%m_%d-%H_%M_%S") + log_file_name = f"create_ble_benchmark-{file_dt_string}.log" + + log_file_path = os.path.join(output_folder_path, log_file_name) + + file_handler = logging.FileHandler(log_file_path) + file_handler.setLevel(logging.INFO) + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + + logger = logging.getLogger() + logger.addHandler(file_handler) + logger.addHandler(console_handler) + logger.setLevel(logging.DEBUG) + + logging.info(f'Started : {start_time.strftime("%m/%d/%Y %H:%M:%S")}') + logging.info("----------------") def create_ble_benchmark( @@ -59,12 +84,22 @@ def create_ble_benchmark( if not os.path.exists(save_folder): os.makedirs(save_folder) + __setup_logger(save_folder) + + start_time = dt.datetime.now(dt.timezone.utc) + + print("==================================") + logging.info("Starting load of BLE benchmark data") + logging.info(f"Start time: {start_time.strftime('%m/%d/%Y %H:%M:%S')}") + print() + + # EBFE_urls_20230608.xlsx acquired from FEMA (fethomps@usgs.gov) ext = os.path.splitext(input_file)[1] if ext == '.xlsx': - data = pd.read_excel(input_file, header=None, names=['size', 'units', 'URL']) + data = pd.read_excel(input_file, header=None, names=['Size', 'Units', 'URL']) elif ext == '.csv': - data = pd.read_csv(input_file, header=None, names=['size', 'units', 'URL']) + data = pd.read_csv(input_file, header=None, names=['Size', 'Units', 'URL']) # Subset Spatial Data URLs spatial_df = data[data['URL'].str.contains('SpatialData')] @@ -78,8 +113,13 @@ def create_ble_benchmark( spatial_df, ble_geodatabase = download_and_extract_rasters(spatial_df, save_folder) + print() + logging.info ("=======================================") for i, row in spatial_df.iterrows(): huc = row['HUC'] + logging.info("++++++++++++++++++++++++++") + logging.info(f"HUC is {huc}") + # reference_raster is used to set the metadata for benchmark_raster reference_raster = os.path.join(reference_folder, f'{huc}/branches/0/rem_zeroed_masked_0.tif') for benchmark_raster in row['rasters']: @@ -92,8 +132,10 @@ def create_ble_benchmark( out_raster_path = os.path.join(out_raster_dir, f"ble_huc_{huc}_extent_{magnitude}.tif") # Make benchmark inundation raster + logging.info(f" {huc} : magnitude : {magnitude} : preprocessing stats") preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_path) + logging.info(f" {huc} : magnitude : {magnitude} : creating flow file") create_flow_forecast_file( huc, ble_geodatabase, @@ -104,6 +146,25 @@ def create_ble_benchmark( nwm_feature_id_field, ) + # Get time metrics + end_time = dt.datetime.now(dt.timezone.utc) + logging.info("") + print("==================================") + logging.info("Complete") + logging.info(f" End time: {end_time.strftime('%m/%d/%Y %H:%M:%S')}") + + time_delta = end_time - start_time + total_seconds = int(time_delta.total_seconds()) + + ___, rem_seconds = divmod(total_seconds, 60 * 60 * 24) + total_hours, rem_seconds = divmod(rem_seconds, 60 * 60) + total_mins, seconds = divmod(rem_seconds, 60) + + time_fmt = f"{total_hours:02d} hours {total_mins:02d} mins {seconds:02d} secs" + + logging.info(f"Duration: {time_fmt}") + print() + def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): """ @@ -131,20 +192,47 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): huc_names = [] out_files = [] out_list = [] + + logging.info ("=======================================") + logging.info ("Extracting and downloading spatial data") + print() for i, row in spatial_df.iterrows(): # Extract HUC and HUC Name from URL url = row['URL'] + + if url is None: + continue + + logging.info("++++++++++++++++++++++++++") + logging.info(f"URL is {url}") + huc, huc_name = os.path.basename(os.path.dirname(url)).split('_') + + # Check to ensure it is a valid HUC (some are not) + is_ble_huc_valid, ___ = is_valid_huc(huc) + + if not is_ble_huc_valid: + logging.info(f"Invalid huc") + continue + + # logging.info + logging.info(f"{huc} : Downloading and extracting for {huc_name}") hucs.append(huc) huc_names.append(huc_name) - # Download and unzip file + # Download file + logging.info(f" {huc} : Unzipping") save_file = os.path.join(save_folder, os.path.basename(url)) if not os.path.exists(save_file): http_response = urlopen(url) zipfile = ZipFile(BytesIO(http_response.read())) zipfile.extractall(path=save_file) + else: + logging.info(f" {huc} : zip file already exists: {save_file}") + + # Loading gdb + logging.info(f" {huc} : Loading gdb") gdb_list = glob(os.path.join(save_file, '**', '*.gdb'), recursive=True) if len(gdb_list) == 1: @@ -158,13 +246,16 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): if not os.path.exists(out_file): # Read raster data from GDB - print(f'Reading {depth_raster} for {huc}') + print(f'.. {huc} : Reading {depth_raster}') depth_raster_path = [item[0] for item in subdatasets if depth_raster in item[1]][0] extract_raster(depth_raster_path, out_file) out_list.append(out_file) + else: + logging.info(f" {huc} : gdb_list does not equal 1") + if len(out_list) > 0: out_files.append(out_list) @@ -172,6 +263,10 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): spatial_df['HUC_Name'] = huc_names spatial_df['rasters'] = out_files + print() + logging.info ("Extracting and downloading spatial data - COMPLETE") + print() + return spatial_df, ble_geodatabase @@ -208,7 +303,7 @@ def extract_raster(in_raster: str, out_raster: str): parser = argparse.ArgumentParser( description='Create BLE benchmark files', usage='''python3 /foss_fim/data/ble/ble_benchmark/create_ble_benchmark.py - -i /data/inputs/ble/ble_benchmark/EBFE_urls_20230608.xlsx + -i /data/inputs/ble/ble_benchmark/EBFE_urls_20240416.xlsx -s /data/temp/ble_benchmark -r /data/outputs/my_run/ -o /data/test_cases/ble_test_cases/validation_data_ble diff --git a/src/utils/shared_validators.py b/src/utils/shared_validators.py index b51251283..4a5417464 100644 --- a/src/utils/shared_validators.py +++ b/src/utils/shared_validators.py @@ -67,3 +67,38 @@ def is_valid_crs(crs): return False, err_msg, "" return True, "", crs_number + + +# ------------------------------------------------- +def is_valid_huc(huc_number): + """ + Returns: + - First is bool (True or False). True meaning it is a valid huc + - Second is a string, which might be empty, but if failed, then this will be the reason for failure + + It is up to the calling code to decide if an exception should be raised. + + Usage: + valid_huc, err_msg = is_valid_huc(huc_number) + + (if you choose to hand it this way....) + if (valid_huc == False): + raise ValueError(err_msg) + + """ + + err_msg = "" + + if huc_number is None: + err_msg = "huc number not set" + return False, err_msg + + if len(str(huc_number)) != 8: + err_msg = f"huc number of {huc_number} is not eight characters in length" + return False, err_msg + + if huc_number.isnumeric() is False: + err_msg = f"huc number of {huc_number} is not a number" + return False, err_msg + + return True, "" \ No newline at end of file From 3ea81394ef348a44918f76ffd995b9558e74bb32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CRobHanna-NOAA=E2=80=9D?= <“Robert.Hanna@NOAA.gov”> Date: Fri, 10 May 2024 20:36:14 +0000 Subject: [PATCH 2/9] update benchmark code --- data/ble/ble_benchmark/create_ble_benchmark.py | 17 ++++++++--------- src/utils/shared_validators.py | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/data/ble/ble_benchmark/create_ble_benchmark.py b/data/ble/ble_benchmark/create_ble_benchmark.py index f2caa6250..9e7db89e4 100644 --- a/data/ble/ble_benchmark/create_ble_benchmark.py +++ b/data/ble/ble_benchmark/create_ble_benchmark.py @@ -7,8 +7,8 @@ from datetime import datetime from glob import glob from io import BytesIO -from urllib.request import urlopen from urllib.parse import urlsplit +from urllib.request import urlopen from zipfile import ZipFile import numpy as np @@ -16,6 +16,7 @@ from create_flow_forecast_file import create_flow_forecast_file from osgeo import gdal from preprocess_benchmark import preprocess_benchmark_static + from src.utils.shared_validators import is_valid_huc @@ -93,7 +94,6 @@ def create_ble_benchmark( logging.info(f"Start time: {start_time.strftime('%m/%d/%Y %H:%M:%S')}") print() - # EBFE_urls_20230608.xlsx acquired from FEMA (fethomps@usgs.gov) ext = os.path.splitext(input_file)[1] if ext == '.xlsx': @@ -114,7 +114,7 @@ def create_ble_benchmark( spatial_df, ble_geodatabase = download_and_extract_rasters(spatial_df, save_folder) print() - logging.info ("=======================================") + logging.info("=======================================") for i, row in spatial_df.iterrows(): huc = row['HUC'] logging.info("++++++++++++++++++++++++++") @@ -193,8 +193,8 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): out_files = [] out_list = [] - logging.info ("=======================================") - logging.info ("Extracting and downloading spatial data") + logging.info("=======================================") + logging.info("Extracting and downloading spatial data") print() for i, row in spatial_df.iterrows(): # Extract HUC and HUC Name from URL @@ -202,7 +202,7 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): if url is None: continue - + logging.info("++++++++++++++++++++++++++") logging.info(f"URL is {url}") @@ -212,7 +212,7 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): is_ble_huc_valid, ___ = is_valid_huc(huc) if not is_ble_huc_valid: - logging.info(f"Invalid huc") + logging.info("Invalid huc") continue # logging.info @@ -230,7 +230,6 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): else: logging.info(f" {huc} : zip file already exists: {save_file}") - # Loading gdb logging.info(f" {huc} : Loading gdb") gdb_list = glob(os.path.join(save_file, '**', '*.gdb'), recursive=True) @@ -264,7 +263,7 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): spatial_df['rasters'] = out_files print() - logging.info ("Extracting and downloading spatial data - COMPLETE") + logging.info("Extracting and downloading spatial data - COMPLETE") print() return spatial_df, ble_geodatabase diff --git a/src/utils/shared_validators.py b/src/utils/shared_validators.py index 4a5417464..5a0bace6a 100644 --- a/src/utils/shared_validators.py +++ b/src/utils/shared_validators.py @@ -101,4 +101,4 @@ def is_valid_huc(huc_number): err_msg = f"huc number of {huc_number} is not a number" return False, err_msg - return True, "" \ No newline at end of file + return True, "" From 4e431c4e087d5395978978b37aba2341b1729a45 Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Mon, 13 May 2024 23:05:47 +0000 Subject: [PATCH 3/9] fixes to run ble reload --- .../ble/ble_benchmark/create_ble_benchmark.py | 71 +++++++++++++------ .../create_flow_forecast_file.py | 23 +++++- .../ble/ble_benchmark/preprocess_benchmark.py | 17 ++++- 3 files changed, 88 insertions(+), 23 deletions(-) diff --git a/data/ble/ble_benchmark/create_ble_benchmark.py b/data/ble/ble_benchmark/create_ble_benchmark.py index 9e7db89e4..8a9d01c85 100644 --- a/data/ble/ble_benchmark/create_ble_benchmark.py +++ b/data/ble/ble_benchmark/create_ble_benchmark.py @@ -4,27 +4,29 @@ import datetime as dt import logging import os +import shutil from datetime import datetime from glob import glob from io import BytesIO +from pathlib import Path from urllib.parse import urlsplit from urllib.request import urlopen from zipfile import ZipFile import numpy as np import pandas as pd +import rasterio from create_flow_forecast_file import create_flow_forecast_file from osgeo import gdal from preprocess_benchmark import preprocess_benchmark_static -from src.utils.shared_validators import is_valid_huc +from utils.shared_validators import is_valid_huc def __setup_logger(output_folder_path): start_time = datetime.now() file_dt_string = start_time.strftime("%Y_%m_%d-%H_%M_%S") log_file_name = f"create_ble_benchmark-{file_dt_string}.log" - log_file_path = os.path.join(output_folder_path, log_file_name) file_handler = logging.FileHandler(log_file_path) @@ -94,15 +96,27 @@ def create_ble_benchmark( logging.info(f"Start time: {start_time.strftime('%m/%d/%Y %H:%M:%S')}") print() - # EBFE_urls_20230608.xlsx acquired from FEMA (fethomps@usgs.gov) + # set from GDAL options, mostly to shut off the verbosity + rasterio_gdal_path = rasterio._env.get_gdal_data() + os.environ['GDAL_DATA'] = rasterio_gdal_path + gdal_new_config = {'CPL_DEBUG': 'OFF', 'GDAL_CACHEMAX': '536870912', 'GDAL_DATA': rasterio_gdal_path} + + for key, val in gdal_new_config.items(): + gdal.SetConfigOption(key, val) + + # Latest is: + # EBFE_urls_20240426.xlsx acquired from FEMA (fethomps@usgs.gov) (Florence Thompson) + + # *** Note: with each new file we get from Florence, you might have to adjust the columns below. ext = os.path.splitext(input_file)[1] if ext == '.xlsx': - data = pd.read_excel(input_file, header=None, names=['Size', 'Units', 'URL']) + data = pd.read_excel(input_file, header=0, names=['date', 'size', 'units', 'URL']) elif ext == '.csv': - data = pd.read_csv(input_file, header=None, names=['Size', 'Units', 'URL']) + data = pd.read_csv(input_file, header=None, names=['size', 'units', 'URL']) # Subset Spatial Data URLs spatial_df = data[data['URL'].str.contains('SpatialData')] + if huc is not None: spatial_df = spatial_df[spatial_df['URL'].str.contains(huc)] @@ -115,10 +129,15 @@ def create_ble_benchmark( print() logging.info("=======================================") + logging.info("") + logging.info("Creating flow files") + logging.info("") + num_recs = len(spatial_df) for i, row in spatial_df.iterrows(): huc = row['HUC'] logging.info("++++++++++++++++++++++++++") logging.info(f"HUC is {huc}") + print(f" Flow file {i+1} of {num_recs}") # reference_raster is used to set the metadata for benchmark_raster reference_raster = os.path.join(reference_folder, f'{huc}/branches/0/rem_zeroed_masked_0.tif') @@ -146,6 +165,13 @@ def create_ble_benchmark( nwm_feature_id_field, ) + # cleaning up temp files. + for f in Path(save_folder).glob(f"{huc}*"): + if os.path.isfile(f): + os.remove(f) + else: + shutil.rmtree(f, ignore_errors=True) + # Get time metrics end_time = dt.datetime.now(dt.timezone.utc) logging.info("") @@ -195,10 +221,15 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): logging.info("=======================================") logging.info("Extracting and downloading spatial data") + + num_recs = len(spatial_df) + logging.info(f" Number of HUC records to download is {num_recs}") print() for i, row in spatial_df.iterrows(): + # Extract HUC and HUC Name from URL url = row['URL'] + print(f" Downloading {i+1} of {num_recs}") if url is None: continue @@ -212,26 +243,32 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): is_ble_huc_valid, ___ = is_valid_huc(huc) if not is_ble_huc_valid: - logging.info("Invalid huc") + logging.info(f"{huc} : Invalid huc") continue # logging.info logging.info(f"{huc} : Downloading and extracting for {huc_name}") + + # we don't want to add a huc record if it is aleady there + if huc in hucs: + logging.info(f"{huc} : HUC is already included in the list. Skipping {url}") + continue + hucs.append(huc) huc_names.append(huc_name) # Download file - logging.info(f" {huc} : Unzipping") save_file = os.path.join(save_folder, os.path.basename(url)) if not os.path.exists(save_file): + logging.info(f"Saving download file to {save_file}") http_response = urlopen(url) zipfile = ZipFile(BytesIO(http_response.read())) zipfile.extractall(path=save_file) else: - logging.info(f" {huc} : zip file already exists: {save_file}") + logging.info(f"{huc} : zip file already exists in file system: {save_file}") # Loading gdb - logging.info(f" {huc} : Loading gdb") + logging.info(f"{huc} : Loading gdb") gdb_list = glob(os.path.join(save_file, '**', '*.gdb'), recursive=True) if len(gdb_list) == 1: @@ -245,7 +282,7 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): if not os.path.exists(out_file): # Read raster data from GDB - print(f'.. {huc} : Reading {depth_raster}') + print(f'{huc} : Reading {depth_raster}') depth_raster_path = [item[0] for item in subdatasets if depth_raster in item[1]][0] extract_raster(depth_raster_path, out_file) @@ -253,7 +290,7 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): out_list.append(out_file) else: - logging.info(f" {huc} : gdb_list does not equal 1") + logging.info(f"{huc} : gdb_list does not equal 1") if len(out_list) > 0: out_files.append(out_list) @@ -304,10 +341,10 @@ def extract_raster(in_raster: str, out_raster: str): usage='''python3 /foss_fim/data/ble/ble_benchmark/create_ble_benchmark.py -i /data/inputs/ble/ble_benchmark/EBFE_urls_20240416.xlsx -s /data/temp/ble_benchmark - -r /data/outputs/my_run/ - -o /data/test_cases/ble_test_cases/validation_data_ble + -r /data/outputs/fim_test_BED/ + -o /data/test_cases/test/ble_test_cases/validation_data_ble -n /data/inputs/nwm_hydrofabric/nwm_flows.gpkg - -u 12030105 + -u 12040101 ''', ) parser.add_argument('-i', '--input-file', type=str, help='Input file', required=True) @@ -346,10 +383,4 @@ def extract_raster(in_raster: str, out_raster: str): args = vars(parser.parse_args()) - start_time = datetime.now() - print('Pulling BLE Benchmark data... \n') - create_ble_benchmark(**args) - - end_time = datetime.now() - print('\n Finished Pulling BLE Benchmark data \n', 'Total Time: ', end_time - start_time) diff --git a/data/ble/ble_benchmark/create_flow_forecast_file.py b/data/ble/ble_benchmark/create_flow_forecast_file.py index 65c22daf0..03bc18ff6 100755 --- a/data/ble/ble_benchmark/create_flow_forecast_file.py +++ b/data/ble/ble_benchmark/create_flow_forecast_file.py @@ -9,7 +9,7 @@ import pandas as pd -gpd.options.io_engine = "pyogrio" +# gpd.options.io_engine = "pyogrio" def create_flow_forecast_file( @@ -55,6 +55,9 @@ def create_flow_forecast_file( ''' + # print(locals()) + print(" ******************************************") + def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): """ This function will fill in missing flows in the forecast dataframe. @@ -135,15 +138,25 @@ def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): ble_xs_layer_name = 'XS_1D' xs_layer = gpd.read_file(ble_geodatabase, layer=ble_xs_layer_name) + # print("^^^^^^ xs_layer") + # print(xs_layer) + # print(xs_layer.crs) # Read in the NWM stream layer into a geopandas dataframe using the bounding box option based on the extents of the BLE XS layer. - nwm_river_layer = gpd.read_file(nwm_geodatabase, bbox=xs_layer, layer=nwm_stream_layer_name) + nwm_river_layer = gpd.read_file(nwm_geodatabase, mask=xs_layer, layer=nwm_stream_layer_name) + # print("^^^^^^ nwm_river_layer") + # print(nwm_river_layer) + # print(nwm_river_layer.crs) # Make sure xs_layer is in same projection as nwm_river_layer. xs_layer_proj = xs_layer.to_crs(nwm_river_layer.crs) + # print("^^^^^^ xs_layer_proj") + # print(xs_layer_proj) # Perform an intersection of the BLE layers and the NWM layers, using the keep_geom_type set to False produces a point output. intersection = gpd.overlay(xs_layer_proj, nwm_river_layer, how='intersection', keep_geom_type=False) + # print("^^^^^^ intersection") + # print(intersection) ## Create the flow forecast files # Define fields containing flow (typically these won't change for BLE) @@ -168,6 +181,9 @@ def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): forecast = forecast.groupby('feature_id').median() forecast = forecast.reset_index(level=0) + # print(f"forecast 2") + # print(len(forecast)) + # Convert CFS to CMS forecast['discharge'] = forecast['discharge'] * dischargeMultiplier @@ -175,6 +191,9 @@ def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): print(f'Filling in missing flows for {return_period[i]} flow.') forecast = fill_missing_flows(forecast, nwm_river_layer) + # print(f"forecast 3") + # print(len(forecast)) + # Set paths and write file output_dir = os.path.join(output_parent_dir, huc) dir_of_csv = os.path.join(output_dir, return_period[i]) diff --git a/data/ble/ble_benchmark/preprocess_benchmark.py b/data/ble/ble_benchmark/preprocess_benchmark.py index f9b1792cc..7084bcc39 100755 --- a/data/ble/ble_benchmark/preprocess_benchmark.py +++ b/data/ble/ble_benchmark/preprocess_benchmark.py @@ -1,9 +1,14 @@ #!/usr/bin/env python3 import argparse +import os +import sys import numpy as np import rasterio +import rasterio._env +import rasterio._path +import rasterio.enums import rasterio.mask from rasterio.warp import Resampling, calculate_default_transform, reproject @@ -33,6 +38,15 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_p (required for writing to output dataset). ''' + + # rasterio_gdal_path = rasterio._env.get_gdal_data() + # os.environ['GDAL_DATA'] = rasterio_gdal_path + + # with warnings.catch_warnings(): + # warnings.simplefilter("ignore") + + # with rasterio.Env(CPL_DEBUG=OFF, GDAL_CACHEMAX=512000000): + # Open and read raster and benchmark rasters reference = rasterio.open(reference_raster) benchmark = rasterio.open(benchmark_raster) @@ -82,10 +96,11 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_p # Write out preprocessed benchmark array to raster if path is supplied if out_raster_path is not None: - with rasterio.Env(): + with rasterio.Env(CPL_DEBUG=False): # Write out reassigned values to raster dataset with rasterio.open(out_raster_path, 'w', **profile) as dst: dst.write(boolean_benchmark.astype('int8'), 1) + return boolean_benchmark.astype('int8'), profile From 283d0827afcb75b621a8b0a56d486b15765746af Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Mon, 13 May 2024 23:23:30 +0000 Subject: [PATCH 4/9] cleanup debugging --- .../ble_benchmark/create_flow_forecast_file.py | 16 ---------------- data/ble/ble_benchmark/preprocess_benchmark.py | 10 ---------- 2 files changed, 26 deletions(-) diff --git a/data/ble/ble_benchmark/create_flow_forecast_file.py b/data/ble/ble_benchmark/create_flow_forecast_file.py index 03bc18ff6..647148157 100755 --- a/data/ble/ble_benchmark/create_flow_forecast_file.py +++ b/data/ble/ble_benchmark/create_flow_forecast_file.py @@ -138,25 +138,15 @@ def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): ble_xs_layer_name = 'XS_1D' xs_layer = gpd.read_file(ble_geodatabase, layer=ble_xs_layer_name) - # print("^^^^^^ xs_layer") - # print(xs_layer) - # print(xs_layer.crs) # Read in the NWM stream layer into a geopandas dataframe using the bounding box option based on the extents of the BLE XS layer. nwm_river_layer = gpd.read_file(nwm_geodatabase, mask=xs_layer, layer=nwm_stream_layer_name) - # print("^^^^^^ nwm_river_layer") - # print(nwm_river_layer) - # print(nwm_river_layer.crs) # Make sure xs_layer is in same projection as nwm_river_layer. xs_layer_proj = xs_layer.to_crs(nwm_river_layer.crs) - # print("^^^^^^ xs_layer_proj") - # print(xs_layer_proj) # Perform an intersection of the BLE layers and the NWM layers, using the keep_geom_type set to False produces a point output. intersection = gpd.overlay(xs_layer_proj, nwm_river_layer, how='intersection', keep_geom_type=False) - # print("^^^^^^ intersection") - # print(intersection) ## Create the flow forecast files # Define fields containing flow (typically these won't change for BLE) @@ -181,9 +171,6 @@ def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): forecast = forecast.groupby('feature_id').median() forecast = forecast.reset_index(level=0) - # print(f"forecast 2") - # print(len(forecast)) - # Convert CFS to CMS forecast['discharge'] = forecast['discharge'] * dischargeMultiplier @@ -191,9 +178,6 @@ def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): print(f'Filling in missing flows for {return_period[i]} flow.') forecast = fill_missing_flows(forecast, nwm_river_layer) - # print(f"forecast 3") - # print(len(forecast)) - # Set paths and write file output_dir = os.path.join(output_parent_dir, huc) dir_of_csv = os.path.join(output_dir, return_period[i]) diff --git a/data/ble/ble_benchmark/preprocess_benchmark.py b/data/ble/ble_benchmark/preprocess_benchmark.py index 7084bcc39..b7f8a5a08 100755 --- a/data/ble/ble_benchmark/preprocess_benchmark.py +++ b/data/ble/ble_benchmark/preprocess_benchmark.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 import argparse -import os -import sys import numpy as np import rasterio @@ -39,14 +37,6 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_p ''' - # rasterio_gdal_path = rasterio._env.get_gdal_data() - # os.environ['GDAL_DATA'] = rasterio_gdal_path - - # with warnings.catch_warnings(): - # warnings.simplefilter("ignore") - - # with rasterio.Env(CPL_DEBUG=OFF, GDAL_CACHEMAX=512000000): - # Open and read raster and benchmark rasters reference = rasterio.open(reference_raster) benchmark = rasterio.open(benchmark_raster) From 71c84192e278e0f163a255be56de7403dd5dde75 Mon Sep 17 00:00:00 2001 From: Rob Hanna - NOAA <90854818+RobHanna-NOAA@users.noreply.github.com> Date: Mon, 13 May 2024 17:33:51 -0600 Subject: [PATCH 5/9] Update CHANGELOG.md --- docs/CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c904fef02..474c4274a 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,6 +1,22 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v4.5.x.x - 2024-05-15 - [PR#1155](https://github.com/NOAA-OWP/inundation-mapping/pull/1155) + +Download and process the latest BLE benchmark data. + +### Changes +- `data/ble/ble_benchmark`: + - `create_ble_benchmark.py`: Added logging system, added GDAL config to control output messages, add system to cleanup temp working files. + - `create_flow_forecast_file.py`: Add logging system. + - `preprocess_benchmark.py`: Adjust GDAL to control excess output. + +- `src/utils`: + - `shared_validators.py`: Add function to valid if huc is good. + +

+ + ## v4.5.0.1 - 2024-05-09 - [PR#1150](https://github.com/NOAA-OWP/inundation-mapping/pull/1150) Fixes two bugs discovered in v4.5.0.0: From c2d01be446791e4e3c7760785f5aaf6caa8beae9 Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Tue, 14 May 2024 20:44:06 +0000 Subject: [PATCH 6/9] fix raster df --- .../ble/ble_benchmark/create_ble_benchmark.py | 28 +++++++------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/data/ble/ble_benchmark/create_ble_benchmark.py b/data/ble/ble_benchmark/create_ble_benchmark.py index 8a9d01c85..95a02a597 100644 --- a/data/ble/ble_benchmark/create_ble_benchmark.py +++ b/data/ble/ble_benchmark/create_ble_benchmark.py @@ -132,6 +132,7 @@ def create_ble_benchmark( logging.info("") logging.info("Creating flow files") logging.info("") + num_recs = len(spatial_df) for i, row in spatial_df.iterrows(): huc = row['HUC'] @@ -139,9 +140,9 @@ def create_ble_benchmark( logging.info(f"HUC is {huc}") print(f" Flow file {i+1} of {num_recs}") - # reference_raster is used to set the metadata for benchmark_raster + # Reference_raster is used to set the metadata for benchmark_raster reference_raster = os.path.join(reference_folder, f'{huc}/branches/0/rem_zeroed_masked_0.tif') - for benchmark_raster in row['rasters']: + for benchmark_raster in row['raster_path']: magnitude = '100yr' if 'BLE_DEP01PCT' in benchmark_raster else '500yr' out_raster_dir = os.path.join(benchmark_folder, huc, magnitude) @@ -167,10 +168,10 @@ def create_ble_benchmark( # cleaning up temp files. for f in Path(save_folder).glob(f"{huc}*"): - if os.path.isfile(f): + if os.path.isfile(f) is True: os.remove(f) else: - shutil.rmtree(f, ignore_errors=True) + shutil.rmtree(f, ignore_errors=False) # Get time metrics end_time = dt.datetime.now(dt.timezone.utc) @@ -231,9 +232,6 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): url = row['URL'] print(f" Downloading {i+1} of {num_recs}") - if url is None: - continue - logging.info("++++++++++++++++++++++++++") logging.info(f"URL is {url}") @@ -249,14 +247,6 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): # logging.info logging.info(f"{huc} : Downloading and extracting for {huc_name}") - # we don't want to add a huc record if it is aleady there - if huc in hucs: - logging.info(f"{huc} : HUC is already included in the list. Skipping {url}") - continue - - hucs.append(huc) - huc_names.append(huc_name) - # Download file save_file = os.path.join(save_folder, os.path.basename(url)) if not os.path.exists(save_file): @@ -286,19 +276,21 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): depth_raster_path = [item[0] for item in subdatasets if depth_raster in item[1]][0] extract_raster(depth_raster_path, out_file) - out_list.append(out_file) else: logging.info(f"{huc} : gdb_list does not equal 1") - if len(out_list) > 0: out_files.append(out_list) + print(f"Number of hucs for spatial df = {len(hucs)}") + print(f"Number of HUC_Name for spatial df = {len(huc_names)}") + print(f"Number of raster files for spatial df = {len(out_files)}") spatial_df['HUC'] = hucs spatial_df['HUC_Name'] = huc_names - spatial_df['rasters'] = out_files + spatial_df['raster_path'] = out_files + spatial_df.to_csv(os.path.join(save_folder, "spatial_db_w_rasters.csv"), header=True, index=False) print() logging.info("Extracting and downloading spatial data - COMPLETE") print() From 681bea244f8c35db8d839a194fb5a240496ff21f Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Thu, 16 May 2024 13:33:37 +0000 Subject: [PATCH 7/9] fundamentally starting over --- .../ble/ble_benchmark/create_ble_benchmark.py | 138 ++++++++++++++---- .../create_flow_forecast_file.py | 4 +- .../ble/ble_benchmark/preprocess_benchmark.py | 24 ++- 3 files changed, 128 insertions(+), 38 deletions(-) diff --git a/data/ble/ble_benchmark/create_ble_benchmark.py b/data/ble/ble_benchmark/create_ble_benchmark.py index 95a02a597..8490b1bf5 100644 --- a/data/ble/ble_benchmark/create_ble_benchmark.py +++ b/data/ble/ble_benchmark/create_ble_benchmark.py @@ -4,7 +4,7 @@ import datetime as dt import logging import os -import shutil +import sys from datetime import datetime from glob import glob from io import BytesIO @@ -15,7 +15,8 @@ import numpy as np import pandas as pd -import rasterio + +# import rasterio from create_flow_forecast_file import create_flow_forecast_file from osgeo import gdal from preprocess_benchmark import preprocess_benchmark_static @@ -97,12 +98,12 @@ def create_ble_benchmark( print() # set from GDAL options, mostly to shut off the verbosity - rasterio_gdal_path = rasterio._env.get_gdal_data() - os.environ['GDAL_DATA'] = rasterio_gdal_path - gdal_new_config = {'CPL_DEBUG': 'OFF', 'GDAL_CACHEMAX': '536870912', 'GDAL_DATA': rasterio_gdal_path} + # rasterio_gdal_path = rasterio._env.get_gdal_data() + # os.environ['GDAL_DATA'] = rasterio_gdal_path + # gdal_new_config = {'CPL_DEBUG': 'OFF', 'GDAL_CACHEMAX': '536870912', 'GDAL_DATA': rasterio_gdal_path} - for key, val in gdal_new_config.items(): - gdal.SetConfigOption(key, val) + # for key, val in gdal_new_config.items(): + # gdal.SetConfigOption(key, val) # Latest is: # EBFE_urls_20240426.xlsx acquired from FEMA (fethomps@usgs.gov) (Florence Thompson) @@ -125,8 +126,17 @@ def create_ble_benchmark( # Convert size to MiB spatial_df['MiB'] = np.where(spatial_df['units'] == 'GiB', spatial_df['size'] * 1000, spatial_df['size']) + # create some new empty columns + num_cols = len(spatial_df.columns) + spatial_df.insert(num_cols - 1, "HUC", "") + spatial_df.insert(num_cols, "HUC_Name", "") + spatial_df.insert(num_cols + 1, "raster_paths", "") + spatial_df = spatial_df.reset_index() + spatial_df, ble_geodatabase = download_and_extract_rasters(spatial_df, save_folder) + print(ble_geodatabase) + print() logging.info("=======================================") logging.info("") @@ -134,6 +144,7 @@ def create_ble_benchmark( logging.info("") num_recs = len(spatial_df) + print(spatial_df) for i, row in spatial_df.iterrows(): huc = row['HUC'] logging.info("++++++++++++++++++++++++++") @@ -142,7 +153,11 @@ def create_ble_benchmark( # Reference_raster is used to set the metadata for benchmark_raster reference_raster = os.path.join(reference_folder, f'{huc}/branches/0/rem_zeroed_masked_0.tif') - for benchmark_raster in row['raster_path']: + print(f"number of rasters is {len(row['raster_paths'])}") + print(f"full raster path value is {row['raster_paths']}") + for benchmark_raster in row['raster_paths']: + + print(f"benchmark raster is {benchmark_raster}") magnitude = '100yr' if 'BLE_DEP01PCT' in benchmark_raster else '500yr' out_raster_dir = os.path.join(benchmark_folder, huc, magnitude) @@ -165,13 +180,7 @@ def create_ble_benchmark( nwm_stream_layer_name, nwm_feature_id_field, ) - - # cleaning up temp files. - for f in Path(save_folder).glob(f"{huc}*"): - if os.path.isfile(f) is True: - os.remove(f) - else: - shutil.rmtree(f, ignore_errors=False) + logging.info(f"HUC {huc} flowfiles complete") # Get time metrics end_time = dt.datetime.now(dt.timezone.utc) @@ -191,6 +200,7 @@ def create_ble_benchmark( logging.info(f"Duration: {time_fmt}") print() + return def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): @@ -226,11 +236,10 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): num_recs = len(spatial_df) logging.info(f" Number of HUC records to download is {num_recs}") print() - for i, row in spatial_df.iterrows(): - + for idx, row in spatial_df.iterrows(): # Extract HUC and HUC Name from URL url = row['URL'] - print(f" Downloading {i+1} of {num_recs}") + print(f" Downloading {idx+1} of {num_recs}") logging.info("++++++++++++++++++++++++++") logging.info(f"URL is {url}") @@ -244,18 +253,19 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): logging.info(f"{huc} : Invalid huc") continue + hucs.append(huc) + huc_names.append(huc_name) + # logging.info logging.info(f"{huc} : Downloading and extracting for {huc_name}") # Download file save_file = os.path.join(save_folder, os.path.basename(url)) + logging.info(f"Saving download file to {save_file}") if not os.path.exists(save_file): - logging.info(f"Saving download file to {save_file}") http_response = urlopen(url) zipfile = ZipFile(BytesIO(http_response.read())) zipfile.extractall(path=save_file) - else: - logging.info(f"{huc} : zip file already exists in file system: {save_file}") # Loading gdb logging.info(f"{huc} : Loading gdb") @@ -266,7 +276,10 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): src_ds = gdal.Open(ble_geodatabase, gdal.GA_ReadOnly) subdatasets = src_ds.GetSubDatasets() + src_ds = None + # Find depth rasters + # huc_rasters = [] for depth_raster in depth_rasters: out_file = os.path.join(save_folder, f'{huc}_{depth_raster}.tif') @@ -275,20 +288,39 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): print(f'{huc} : Reading {depth_raster}') depth_raster_path = [item[0] for item in subdatasets if depth_raster in item[1]][0] - extract_raster(depth_raster_path, out_file) + if extract_raster(huc, depth_raster_path, out_file) is False: + continue + + # huc_rasters.append(out_file) out_list.append(out_file) - else: - logging.info(f"{huc} : gdb_list does not equal 1") + # if len(huc_rasters) > 0: + + # spatial_df.at[idx, 'HUC'] = huc + # spatial_df.at[idx, 'HUC_Name'] = huc_name + # spatial_df.at[idx, 'raster_paths'] = huc_rasters + + # hucs.append(huc) + # huc_names.append(huc_name) + # out_files.append(str(huc_rasters)) + if len(out_list) > 0: out_files.append(out_list) - print(f"Number of hucs for spatial df = {len(hucs)}") - print(f"Number of HUC_Name for spatial df = {len(huc_names)}") - print(f"Number of raster files for spatial df = {len(out_files)}") + if len(hucs) == 0: + logging.info("No valid rasters remain. System aborted.") + sys.exit(1) + + # this is ugly + + # print("final hucs") + # print(hucs) + # print(huc_names) + # print(out_files) + spatial_df['HUC'] = hucs spatial_df['HUC_Name'] = huc_names - spatial_df['raster_path'] = out_files + spatial_df['raster_paths'] = out_files spatial_df.to_csv(os.path.join(save_folder, "spatial_db_w_rasters.csv"), header=True, index=False) print() @@ -298,7 +330,7 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): return spatial_df, ble_geodatabase -def extract_raster(in_raster: str, out_raster: str): +def extract_raster(huc: str, in_raster: str, out_raster: str): """ Extract raster from GDB and save to out_raster @@ -311,21 +343,67 @@ def extract_raster(in_raster: str, out_raster: str): Returns ------- - None + True = raster saved """ data_ds = gdal.Open(in_raster, gdal.GA_ReadOnly) + gt = data_ds.GetGeoTransform() + + # Why are we using GDAL? becuase it is loading directliy from a gdb + # ie) in_raster is OpenFileGDB:"/data/test_cases/rob_ble_reload/ + # download_files_test_large/08040301_SpatialData.zip/Spatial/LA_LowerRed_BLE.gdb":BLE_DEP0_2PCT + + # TODO: May 15, 2024 + # Some of them come in as 5m resolution is too big and memory failes. It will load but ReadAsArray fails + # we tried some gdal warp and various things to get it work. + # For now, we will just a have to skip anyting that is not 10m. + + if gt[1] != 10.0: + # # resample + # layer_name = in_raster + # # rename the file + # resampled_file_name = file_name.replace(".tif", "_resampled.tif") + + # print(f"in : {in_raster} and resample : {resampled_file_name}") + + # print(f" --- gdal wrap : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") + # #gdal.WarpOptions(geoloc=True, format='GTiff', xRes=10.0, yRes=10.0) + # # kwargs = {'format': 'GTiff', 'geoloc': True} + # #gdal.Warp(in_raster, resampled_file_name, geoloc=True, format='GTiff', xRes=10, yRes=10) + # gdal.Warp(in_raster, resampled_file_name, format='GTiff') + + # print(f" --- gdal re-open : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") + # # reload the resampled version + # data_ds = gdal.Open(in_raster, gdal.GA_ReadOnly) + # print(f" --- gdal re-open : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") + # gt = data_ds.GetGeoTransform() + # print(gt) + + logging.info( + f"Dropping {huc} raster not resolution of 10 m. It is {gt[1]} : raster input = {in_raster}" + ) + return False + + # print(f" --- read as array : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") data = data_ds.ReadAsArray() + # print(f" --- GetRasterBand : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") nodata = data_ds.GetRasterBand(1).GetNoDataValue() driver = gdal.GetDriverByName('GTiff') out_ds = driver.Create(out_raster, data.shape[1], data.shape[0], 1, gdal.GDT_Float32) + # print(f" --- SetGeoTransform : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") out_ds.SetGeoTransform(data_ds.GetGeoTransform()) + # print(f" --- SetProjection : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") out_ds.SetProjection(data_ds.GetProjection()) + # print(f" --- WriteArray : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") out_ds.GetRasterBand(1).WriteArray(data) out_ds.GetRasterBand(1).SetNoDataValue(nodata) + # print(f" --- open raster done : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") + data_ds = None out_ds = None + return True + if __name__ == '__main__': parser = argparse.ArgumentParser( diff --git a/data/ble/ble_benchmark/create_flow_forecast_file.py b/data/ble/ble_benchmark/create_flow_forecast_file.py index 647148157..0979c8b76 100755 --- a/data/ble/ble_benchmark/create_flow_forecast_file.py +++ b/data/ble/ble_benchmark/create_flow_forecast_file.py @@ -9,7 +9,7 @@ import pandas as pd -# gpd.options.io_engine = "pyogrio" +gpd.options.io_engine = "pyogrio" def create_flow_forecast_file( @@ -55,8 +55,8 @@ def create_flow_forecast_file( ''' - # print(locals()) print(" ******************************************") + # print(locals()) def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): """ diff --git a/data/ble/ble_benchmark/preprocess_benchmark.py b/data/ble/ble_benchmark/preprocess_benchmark.py index b7f8a5a08..999fef7e6 100755 --- a/data/ble/ble_benchmark/preprocess_benchmark.py +++ b/data/ble/ble_benchmark/preprocess_benchmark.py @@ -1,12 +1,14 @@ #!/usr/bin/env python3 import argparse +import logging import numpy as np import rasterio -import rasterio._env -import rasterio._path -import rasterio.enums + +# import rasterio._env +# import rasterio._path +# import rasterio.enums import rasterio.mask from rasterio.warp import Resampling, calculate_default_transform, reproject @@ -37,9 +39,19 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_p ''' + # gdal.SetConfigOption('CPL_LOG', '/dev/null') + # gdal.SetConfigOption('CPL_DEBUG', 'False') + + # with rasterio.Env(CPL_DEBUG=False, + # GDAL_CACHEMAX=536870912, + # CPL_CURL_VERBOSE=False, + # GDAL_DATA=rasterio._env.get_gdal_data() ): + # Open and read raster and benchmark rasters - reference = rasterio.open(reference_raster) - benchmark = rasterio.open(benchmark_raster) + logging.info(f"--- Opening reference_raster: {reference_raster}") + reference = rasterio.open(reference_raster, "r") + logging.info(f"--- Opening benchmark_raster: {benchmark_raster}") + benchmark = rasterio.open(benchmark_raster, "r") benchmark_arr = benchmark.read(1) # Set arbitrary no data value that is not possible value of the benchmark dataset. It is reassigned later. @@ -86,7 +98,7 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_p # Write out preprocessed benchmark array to raster if path is supplied if out_raster_path is not None: - with rasterio.Env(CPL_DEBUG=False): + with rasterio.Env(): # Write out reassigned values to raster dataset with rasterio.open(out_raster_path, 'w', **profile) as dst: dst.write(boolean_benchmark.astype('int8'), 1) From e6873aff2812bd3a29aa6a169d6d20f5f4ba8849 Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Fri, 17 May 2024 01:15:13 +0000 Subject: [PATCH 8/9] working - pre MP --- .../ble/ble_benchmark/create_ble_benchmark.py | 163 ++++++++++-------- .../create_flow_forecast_file.py | 15 +- .../ble/ble_benchmark/preprocess_benchmark.py | 25 +-- 3 files changed, 121 insertions(+), 82 deletions(-) diff --git a/data/ble/ble_benchmark/create_ble_benchmark.py b/data/ble/ble_benchmark/create_ble_benchmark.py index 8490b1bf5..e256c2b32 100644 --- a/data/ble/ble_benchmark/create_ble_benchmark.py +++ b/data/ble/ble_benchmark/create_ble_benchmark.py @@ -5,6 +5,7 @@ import logging import os import sys +import traceback from datetime import datetime from glob import glob from io import BytesIO @@ -16,7 +17,8 @@ import numpy as np import pandas as pd -# import rasterio +# import fiona +import rasterio from create_flow_forecast_file import create_flow_forecast_file from osgeo import gdal from preprocess_benchmark import preprocess_benchmark_static @@ -24,24 +26,33 @@ from utils.shared_validators import is_valid_huc +gdal.UseExceptions() + + def __setup_logger(output_folder_path): start_time = datetime.now() file_dt_string = start_time.strftime("%Y_%m_%d-%H_%M_%S") log_file_name = f"create_ble_benchmark-{file_dt_string}.log" log_file_path = os.path.join(output_folder_path, log_file_name) - file_handler = logging.FileHandler(log_file_path) - file_handler.setLevel(logging.INFO) - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.DEBUG) + # logging.basicConfig( + # format='%(asctime)s -- ', + # level=logging.INFO, + # datefmt='%Y-%m-%d %H:%M:%S') logger = logging.getLogger() - logger.addHandler(file_handler) - logger.addHandler(console_handler) logger.setLevel(logging.DEBUG) + fmt = logging.Formatter("%(asctime)s >> %(message)s", "%Y-%m-%d %H:%M:%S") - logging.info(f'Started : {start_time.strftime("%m/%d/%Y %H:%M:%S")}') - logging.info("----------------") + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + console_handler.setFormatter(fmt) + logger.addHandler(console_handler) + + file_handler = logging.FileHandler(log_file_path) + file_handler.setLevel(logging.INFO) + file_handler.setFormatter(fmt) + logger.addHandler(file_handler) def create_ble_benchmark( @@ -97,14 +108,6 @@ def create_ble_benchmark( logging.info(f"Start time: {start_time.strftime('%m/%d/%Y %H:%M:%S')}") print() - # set from GDAL options, mostly to shut off the verbosity - # rasterio_gdal_path = rasterio._env.get_gdal_data() - # os.environ['GDAL_DATA'] = rasterio_gdal_path - # gdal_new_config = {'CPL_DEBUG': 'OFF', 'GDAL_CACHEMAX': '536870912', 'GDAL_DATA': rasterio_gdal_path} - - # for key, val in gdal_new_config.items(): - # gdal.SetConfigOption(key, val) - # Latest is: # EBFE_urls_20240426.xlsx acquired from FEMA (fethomps@usgs.gov) (Florence Thompson) @@ -121,21 +124,22 @@ def create_ble_benchmark( if huc is not None: spatial_df = spatial_df[spatial_df['URL'].str.contains(huc)] - spatial_df = spatial_df.reset_index() + spatial_df.reset_index(inplace=True) # Convert size to MiB spatial_df['MiB'] = np.where(spatial_df['units'] == 'GiB', spatial_df['size'] * 1000, spatial_df['size']) # create some new empty columns num_cols = len(spatial_df.columns) - spatial_df.insert(num_cols - 1, "HUC", "") - spatial_df.insert(num_cols, "HUC_Name", "") - spatial_df.insert(num_cols + 1, "raster_paths", "") - spatial_df = spatial_df.reset_index() + spatial_df.insert(num_cols, "HUC", "") + spatial_df.insert(num_cols + 1, "HUC_Name", "") + spatial_df.insert(num_cols + 2, "raster_paths", "") spatial_df, ble_geodatabase = download_and_extract_rasters(spatial_df, save_folder) - print(ble_geodatabase) + spatial_df = spatial_df[spatial_df['HUC'] != ""] + spatial_df = spatial_df[spatial_df['raster_paths'] != ""] + spatial_df.reset_index(inplace=True) print() logging.info("=======================================") @@ -144,7 +148,6 @@ def create_ble_benchmark( logging.info("") num_recs = len(spatial_df) - print(spatial_df) for i, row in spatial_df.iterrows(): huc = row['HUC'] logging.info("++++++++++++++++++++++++++") @@ -153,11 +156,15 @@ def create_ble_benchmark( # Reference_raster is used to set the metadata for benchmark_raster reference_raster = os.path.join(reference_folder, f'{huc}/branches/0/rem_zeroed_masked_0.tif') - print(f"number of rasters is {len(row['raster_paths'])}") - print(f"full raster path value is {row['raster_paths']}") + if os.path.exists(reference_raster) is False: + logging.info( + f" {huc} : Raster path of {reference_raster} does not exist." " Might be a non-applicable HUC" + ) + continue + for benchmark_raster in row['raster_paths']: - print(f"benchmark raster is {benchmark_raster}") + print(f" {huc} : benchmark raster is {benchmark_raster}") magnitude = '100yr' if 'BLE_DEP01PCT' in benchmark_raster else '500yr' out_raster_dir = os.path.join(benchmark_folder, huc, magnitude) @@ -167,19 +174,31 @@ def create_ble_benchmark( out_raster_path = os.path.join(out_raster_dir, f"ble_huc_{huc}_extent_{magnitude}.tif") # Make benchmark inundation raster - logging.info(f" {huc} : magnitude : {magnitude} : preprocessing stats") - preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_path) - - logging.info(f" {huc} : magnitude : {magnitude} : creating flow file") - create_flow_forecast_file( - huc, - ble_geodatabase, - nwm_geopackage, - benchmark_folder, - ble_xs_layer_name, - nwm_stream_layer_name, - nwm_feature_id_field, - ) + logging.info(f" {huc} : magnitude : {magnitude} : preprocessing stats") + try: + preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_path) + + except Exception: + logging.info(f"HUC {huc} - An error with preprocessing benchmark stats") + logging.info(traceback.format_exc()) + continue + + logging.info(f" {huc} : magnitude : {magnitude} : creating flow file") + try: + create_flow_forecast_file( + huc, + ble_geodatabase, + nwm_geopackage, + benchmark_folder, + ble_xs_layer_name, + nwm_stream_layer_name, + nwm_feature_id_field, + ) + except Exception: + logging.info(f"HUC {huc} - An error with creating flow files has occured.") + logging.info(traceback.format_exc()) + continue + logging.info(f"HUC {huc} flowfiles complete") # Get time metrics @@ -226,9 +245,9 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): # Download and unzip each file hucs = [] - huc_names = [] - out_files = [] - out_list = [] + # huc_names = [] + # out_files = [] + # out_list = [] logging.info("=======================================") logging.info("Extracting and downloading spatial data") @@ -237,35 +256,45 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): logging.info(f" Number of HUC records to download is {num_recs}") print() for idx, row in spatial_df.iterrows(): + # Extract HUC and HUC Name from URL url = row['URL'] - print(f" Downloading {idx+1} of {num_recs}") + print(f" Downloading {idx + 1} of {num_recs}") + logging.info("") logging.info("++++++++++++++++++++++++++") logging.info(f"URL is {url}") huc, huc_name = os.path.basename(os.path.dirname(url)).split('_') + # hucs.append(huc) + # huc_names.append(huc_name) # Check to ensure it is a valid HUC (some are not) is_ble_huc_valid, ___ = is_valid_huc(huc) if not is_ble_huc_valid: logging.info(f"{huc} : Invalid huc") + # out_files.append("") continue + spatial_df.at[idx, 'HUC'] = huc + spatial_df.at[idx, 'HUC_Name'] = huc_name + hucs.append(huc) - huc_names.append(huc_name) + # huc_names.append(huc_name) # logging.info logging.info(f"{huc} : Downloading and extracting for {huc_name}") # Download file save_file = os.path.join(save_folder, os.path.basename(url)) - logging.info(f"Saving download file to {save_file}") + logging.info(f"{huc} : Saving download file to {save_file}") if not os.path.exists(save_file): http_response = urlopen(url) zipfile = ZipFile(BytesIO(http_response.read())) zipfile.extractall(path=save_file) + else: + logging.info(f"{huc} : File already exists. Skipping download.") # Loading gdb logging.info(f"{huc} : Loading gdb") @@ -279,33 +308,30 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): src_ds = None # Find depth rasters - # huc_rasters = [] + raster_paths = [] for depth_raster in depth_rasters: out_file = os.path.join(save_folder, f'{huc}_{depth_raster}.tif') if not os.path.exists(out_file): # Read raster data from GDB - print(f'{huc} : Reading {depth_raster}') + logging.info(f'{huc} : Reading {depth_raster}') depth_raster_path = [item[0] for item in subdatasets if depth_raster in item[1]][0] if extract_raster(huc, depth_raster_path, out_file) is False: continue - # huc_rasters.append(out_file) - out_list.append(out_file) + raster_paths.append(out_file) - # if len(huc_rasters) > 0: + # out_list.append(out_file) - # spatial_df.at[idx, 'HUC'] = huc - # spatial_df.at[idx, 'HUC_Name'] = huc_name - # spatial_df.at[idx, 'raster_paths'] = huc_rasters + if len(raster_paths) > 0: + spatial_df.at[idx, 'raster_paths'] = raster_paths - # hucs.append(huc) # huc_names.append(huc_name) # out_files.append(str(huc_rasters)) - if len(out_list) > 0: - out_files.append(out_list) + # if len(out_list) > 0: + # out_files.append(out_list) if len(hucs) == 0: logging.info("No valid rasters remain. System aborted.") @@ -313,14 +339,18 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): # this is ugly + # print(spatial_df) + # print("final hucs") # print(hucs) # print(huc_names) # print(out_files) - spatial_df['HUC'] = hucs - spatial_df['HUC_Name'] = huc_names - spatial_df['raster_paths'] = out_files + # spatial_df['HUC'] = hucs + # spatial_df['HUC_Name'] = huc_names + # spatial_df['raster_paths'] = out_files + + # print(out_files) spatial_df.to_csv(os.path.join(save_folder, "spatial_db_w_rasters.csv"), header=True, index=False) print() @@ -358,7 +388,9 @@ def extract_raster(huc: str, in_raster: str, out_raster: str): # we tried some gdal warp and various things to get it work. # For now, we will just a have to skip anyting that is not 10m. - if gt[1] != 10.0: + logging.info(f"{huc} : Raster reesolution is {gt[1]}") + + if int(gt[1]) != 10: # # resample # layer_name = in_raster # # rename the file @@ -380,25 +412,18 @@ def extract_raster(huc: str, in_raster: str, out_raster: str): # print(gt) logging.info( - f"Dropping {huc} raster not resolution of 10 m. It is {gt[1]} : raster input = {in_raster}" + f"{huc} dropping raster not resolution of 10 m. It is {gt[1]} : raster input = {in_raster}" ) return False - # print(f" --- read as array : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") data = data_ds.ReadAsArray() - # print(f" --- GetRasterBand : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") nodata = data_ds.GetRasterBand(1).GetNoDataValue() - driver = gdal.GetDriverByName('GTiff') out_ds = driver.Create(out_raster, data.shape[1], data.shape[0], 1, gdal.GDT_Float32) - # print(f" --- SetGeoTransform : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") out_ds.SetGeoTransform(data_ds.GetGeoTransform()) - # print(f" --- SetProjection : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") out_ds.SetProjection(data_ds.GetProjection()) - # print(f" --- WriteArray : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") out_ds.GetRasterBand(1).WriteArray(data) out_ds.GetRasterBand(1).SetNoDataValue(nodata) - # print(f" --- open raster done : {dt.datetime.now(dt.timezone.utc).strftime('%H:%M:%S')}") data_ds = None out_ds = None diff --git a/data/ble/ble_benchmark/create_flow_forecast_file.py b/data/ble/ble_benchmark/create_flow_forecast_file.py index 0979c8b76..72361919c 100755 --- a/data/ble/ble_benchmark/create_flow_forecast_file.py +++ b/data/ble/ble_benchmark/create_flow_forecast_file.py @@ -4,12 +4,16 @@ import os import fiona +import fiona._env +import fiona.env import geopandas as gpd import numpy as np import pandas as pd +import rasterio._env +from osgeo import gdal -gpd.options.io_engine = "pyogrio" +# gpd.options.io_engine = "pyogrio" def create_flow_forecast_file( @@ -58,6 +62,10 @@ def create_flow_forecast_file( print(" ******************************************") # print(locals()) + # gdal.SetConfigOption('GDAL_DATA', rasterio._env.get_gdal_data()) + # gdal.SetConfigOption('CPL_LOG', '/dev/null') + # gdal.SetConfigOption('CPL_DEBUG', 'OFF') + def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): """ This function will fill in missing flows in the forecast dataframe. @@ -84,7 +92,7 @@ def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): nonintersects = merged[merged['discharge'].isna()] - print(f'Iteration {n}. {len(nonintersects)} segments remaining.') + # print(f'Iteration {n}. {len(nonintersects)} segments remaining.') updated = False for i, row in nonintersects.iterrows(): @@ -175,7 +183,6 @@ def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): forecast['discharge'] = forecast['discharge'] * dischargeMultiplier # Assign flow to segments missing flows - print(f'Filling in missing flows for {return_period[i]} flow.') forecast = fill_missing_flows(forecast, nwm_river_layer) # Set paths and write file @@ -185,6 +192,8 @@ def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): path_to_csv = os.path.join(dir_of_csv, "ble_huc_{}_flows_{}.csv".format(huc, return_period[i])) forecast.to_csv(path_to_csv, index=False) + return + if __name__ == '__main__': # Parse arguments diff --git a/data/ble/ble_benchmark/preprocess_benchmark.py b/data/ble/ble_benchmark/preprocess_benchmark.py index 999fef7e6..438679b3d 100755 --- a/data/ble/ble_benchmark/preprocess_benchmark.py +++ b/data/ble/ble_benchmark/preprocess_benchmark.py @@ -5,14 +5,22 @@ import numpy as np import rasterio +import rasterio._env -# import rasterio._env # import rasterio._path # import rasterio.enums import rasterio.mask +from osgeo import gdal from rasterio.warp import Resampling, calculate_default_transform, reproject +# gdal.SetConfigOption('GDAL_DATA', rasterio._env.get_gdal_data()) +# gdal.SetConfigOption('CPL_LOG', 'NUL') +# gdal.SetConfigOption('CPL_DEBUG', 'OFF') +# gdal.SetConfigOption('OLC_FASTSETNEXTBYINDEX', '1') +# gdal.SetConfigOption('OLC_FASTFEATURECOUNT', '1') + + def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_path=None): ''' This function will preprocess a benchmark dataset for purposes of evaluating FIM output. @@ -39,19 +47,16 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_p ''' - # gdal.SetConfigOption('CPL_LOG', '/dev/null') - # gdal.SetConfigOption('CPL_DEBUG', 'False') - - # with rasterio.Env(CPL_DEBUG=False, + # with rasterio.Env(CPL_DEBUG="OFF"): # GDAL_CACHEMAX=536870912, - # CPL_CURL_VERBOSE=False, # GDAL_DATA=rasterio._env.get_gdal_data() ): # Open and read raster and benchmark rasters - logging.info(f"--- Opening reference_raster: {reference_raster}") - reference = rasterio.open(reference_raster, "r") - logging.info(f"--- Opening benchmark_raster: {benchmark_raster}") - benchmark = rasterio.open(benchmark_raster, "r") + # logging.info(f"--- Opening reference_raster: {reference_raster}") + # with rasterio.Env(): + reference = rasterio.open(reference_raster) + # logging.info(f"--- Opening benchmark_raster: {benchmark_raster}") + benchmark = rasterio.open(benchmark_raster) benchmark_arr = benchmark.read(1) # Set arbitrary no data value that is not possible value of the benchmark dataset. It is reassigned later. From 6e393f90a7d046304d955727701c036eb34730f3 Mon Sep 17 00:00:00 2001 From: Rob Hanna Date: Fri, 17 May 2024 12:47:32 +0000 Subject: [PATCH 9/9] running in full scale now --- .../ble/ble_benchmark/create_ble_benchmark.py | 101 ++++++++---------- .../create_flow_forecast_file.py | 5 - .../ble/ble_benchmark/preprocess_benchmark.py | 16 --- 3 files changed, 43 insertions(+), 79 deletions(-) diff --git a/data/ble/ble_benchmark/create_ble_benchmark.py b/data/ble/ble_benchmark/create_ble_benchmark.py index e256c2b32..a8823fa72 100644 --- a/data/ble/ble_benchmark/create_ble_benchmark.py +++ b/data/ble/ble_benchmark/create_ble_benchmark.py @@ -16,8 +16,6 @@ import numpy as np import pandas as pd - -# import fiona import rasterio from create_flow_forecast_file import create_flow_forecast_file from osgeo import gdal @@ -35,11 +33,6 @@ def __setup_logger(output_folder_path): log_file_name = f"create_ble_benchmark-{file_dt_string}.log" log_file_path = os.path.join(output_folder_path, log_file_name) - # logging.basicConfig( - # format='%(asctime)s -- ', - # level=logging.INFO, - # datefmt='%Y-%m-%d %H:%M:%S') - logger = logging.getLogger() logger.setLevel(logging.DEBUG) fmt = logging.Formatter("%(asctime)s >> %(message)s", "%Y-%m-%d %H:%M:%S") @@ -65,6 +58,7 @@ def create_ble_benchmark( nwm_stream_layer_name: str, nwm_feature_id_field: str, huc: str = None, + num_jobs: int = 1, ): """ Downloads and preprocesses BLE benchmark datasets for purposes of evaluating FIM output. @@ -148,6 +142,7 @@ def create_ble_benchmark( logging.info("") num_recs = len(spatial_df) + for i, row in spatial_df.iterrows(): huc = row['HUC'] logging.info("++++++++++++++++++++++++++") @@ -245,9 +240,6 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): # Download and unzip each file hucs = [] - # huc_names = [] - # out_files = [] - # out_list = [] logging.info("=======================================") logging.info("Extracting and downloading spatial data") @@ -266,8 +258,6 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): logging.info(f"URL is {url}") huc, huc_name = os.path.basename(os.path.dirname(url)).split('_') - # hucs.append(huc) - # huc_names.append(huc_name) # Check to ensure it is a valid HUC (some are not) is_ble_huc_valid, ___ = is_valid_huc(huc) @@ -281,77 +271,64 @@ def download_and_extract_rasters(spatial_df: pd.DataFrame, save_folder: str): spatial_df.at[idx, 'HUC_Name'] = huc_name hucs.append(huc) - # huc_names.append(huc_name) # logging.info logging.info(f"{huc} : Downloading and extracting for {huc_name}") # Download file - save_file = os.path.join(save_folder, os.path.basename(url)) - logging.info(f"{huc} : Saving download file to {save_file}") - if not os.path.exists(save_file): - http_response = urlopen(url) - zipfile = ZipFile(BytesIO(http_response.read())) - zipfile.extractall(path=save_file) - else: - logging.info(f"{huc} : File already exists. Skipping download.") + try: + save_file = os.path.join(save_folder, os.path.basename(url)) + logging.info(f"{huc} : Saving download file to {save_file}") + if not os.path.exists(save_file): + http_response = urlopen(url) + zipfile = ZipFile(BytesIO(http_response.read())) + zipfile.extractall(path=save_file) + else: + logging.info(f"{huc} : File already exists. Skipping download.") + except Exception: + logging.info(f"HUC {huc} - An error with downloading and unzipping.") + logging.info(traceback.format_exc()) + continue # Loading gdb logging.info(f"{huc} : Loading gdb") gdb_list = glob(os.path.join(save_file, '**', '*.gdb'), recursive=True) if len(gdb_list) == 1: - ble_geodatabase = gdb_list[0] - src_ds = gdal.Open(ble_geodatabase, gdal.GA_ReadOnly) - subdatasets = src_ds.GetSubDatasets() + raster_paths = [] + try: + ble_geodatabase = gdb_list[0] + src_ds = gdal.Open(ble_geodatabase, gdal.GA_ReadOnly) + subdatasets = src_ds.GetSubDatasets() - src_ds = None + src_ds = None - # Find depth rasters - raster_paths = [] - for depth_raster in depth_rasters: - out_file = os.path.join(save_folder, f'{huc}_{depth_raster}.tif') + # Find depth rasters + for depth_raster in depth_rasters: + out_file = os.path.join(save_folder, f'{huc}_{depth_raster}.tif') - if not os.path.exists(out_file): - # Read raster data from GDB - logging.info(f'{huc} : Reading {depth_raster}') - depth_raster_path = [item[0] for item in subdatasets if depth_raster in item[1]][0] + if not os.path.exists(out_file): + # Read raster data from GDB + logging.info(f'{huc} : Reading {depth_raster}') + depth_raster_path = [item[0] for item in subdatasets if depth_raster in item[1]][0] - if extract_raster(huc, depth_raster_path, out_file) is False: - continue + if extract_raster(huc, depth_raster_path, out_file) is False: + continue - raster_paths.append(out_file) + raster_paths.append(out_file) - # out_list.append(out_file) + except Exception: + logging.info(f"HUC {huc} - An error with getting rasters.") + logging.info(traceback.format_exc()) + continue if len(raster_paths) > 0: spatial_df.at[idx, 'raster_paths'] = raster_paths - # huc_names.append(huc_name) - # out_files.append(str(huc_rasters)) - - # if len(out_list) > 0: - # out_files.append(out_list) - if len(hucs) == 0: logging.info("No valid rasters remain. System aborted.") sys.exit(1) - # this is ugly - - # print(spatial_df) - - # print("final hucs") - # print(hucs) - # print(huc_names) - # print(out_files) - - # spatial_df['HUC'] = hucs - # spatial_df['HUC_Name'] = huc_names - # spatial_df['raster_paths'] = out_files - - # print(out_files) - spatial_df.to_csv(os.path.join(save_folder, "spatial_db_w_rasters.csv"), header=True, index=False) print() logging.info("Extracting and downloading spatial data - COMPLETE") @@ -475,6 +452,14 @@ def extract_raster(huc: str, in_raster: str, out_raster: str): required=False, default='ID', ) + parser.add_argument( + '-j', + '--num_jobs', + help='Number of jobs - aka: number of multi processes allowed to run at one time', + required=False, + default=1, + type=int, + ) args = vars(parser.parse_args()) diff --git a/data/ble/ble_benchmark/create_flow_forecast_file.py b/data/ble/ble_benchmark/create_flow_forecast_file.py index 72361919c..40de89c2f 100755 --- a/data/ble/ble_benchmark/create_flow_forecast_file.py +++ b/data/ble/ble_benchmark/create_flow_forecast_file.py @@ -60,11 +60,6 @@ def create_flow_forecast_file( ''' print(" ******************************************") - # print(locals()) - - # gdal.SetConfigOption('GDAL_DATA', rasterio._env.get_gdal_data()) - # gdal.SetConfigOption('CPL_LOG', '/dev/null') - # gdal.SetConfigOption('CPL_DEBUG', 'OFF') def fill_missing_flows(forecast: pd.DataFrame, nwm_river_layer: pd.DataFrame): """ diff --git a/data/ble/ble_benchmark/preprocess_benchmark.py b/data/ble/ble_benchmark/preprocess_benchmark.py index 438679b3d..cbf17b59e 100755 --- a/data/ble/ble_benchmark/preprocess_benchmark.py +++ b/data/ble/ble_benchmark/preprocess_benchmark.py @@ -6,21 +6,11 @@ import numpy as np import rasterio import rasterio._env - -# import rasterio._path -# import rasterio.enums import rasterio.mask from osgeo import gdal from rasterio.warp import Resampling, calculate_default_transform, reproject -# gdal.SetConfigOption('GDAL_DATA', rasterio._env.get_gdal_data()) -# gdal.SetConfigOption('CPL_LOG', 'NUL') -# gdal.SetConfigOption('CPL_DEBUG', 'OFF') -# gdal.SetConfigOption('OLC_FASTSETNEXTBYINDEX', '1') -# gdal.SetConfigOption('OLC_FASTFEATURECOUNT', '1') - - def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_path=None): ''' This function will preprocess a benchmark dataset for purposes of evaluating FIM output. @@ -47,12 +37,6 @@ def preprocess_benchmark_static(benchmark_raster, reference_raster, out_raster_p ''' - # with rasterio.Env(CPL_DEBUG="OFF"): - # GDAL_CACHEMAX=536870912, - # GDAL_DATA=rasterio._env.get_gdal_data() ): - - # Open and read raster and benchmark rasters - # logging.info(f"--- Opening reference_raster: {reference_raster}") # with rasterio.Env(): reference = rasterio.open(reference_raster) # logging.info(f"--- Opening benchmark_raster: {benchmark_raster}")