From 06752c3dbc9d1b0157a4318efa817f46a4be0898 Mon Sep 17 00:00:00 2001 From: Jan Van den bosch Date: Mon, 25 Mar 2024 15:10:46 +0100 Subject: [PATCH 1/2] works without masking/scaling #696 --- openeogeotrellis/collections/sentinel3.py | 109 ++++++++++++++++++---- 1 file changed, 89 insertions(+), 20 deletions(-) diff --git a/openeogeotrellis/collections/sentinel3.py b/openeogeotrellis/collections/sentinel3.py index fe9602e52..2208b029c 100644 --- a/openeogeotrellis/collections/sentinel3.py +++ b/openeogeotrellis/collections/sentinel3.py @@ -20,6 +20,8 @@ SYNERGY_PRODUCT_TYPE = "SY_2_SYN___" SLSTR_PRODUCT_TYPE = "SL_2_LST___" +RIM_PIXELS = 60 + logger = logging.getLogger(__name__) @@ -125,6 +127,7 @@ def process_feature(feature: dict) -> Tuple[str, dict]: return {zoom: merged_tile_layer} + def _instant_ms_to_hour(instant: int) -> datetime: """ Convert Geotrellis SpaceTimeKey instant (Scala Long, millisecond resolution) to Python datetime object, @@ -136,7 +139,8 @@ def _instant_ms_to_hour(instant: int) -> datetime: """ return datetime(*(datetime.utcfromtimestamp(instant // 1000).timetuple()[:4])) -#@ensure_executor_logging + +@ensure_executor_logging def read_product(product, product_type, band_names, tile_size, limit_python_memory): from openeogeotrellis.collections.s1backscatter_orfeo import get_total_extent import geopyspark @@ -202,7 +206,7 @@ def read_product(product, product_type, band_names, tile_size, limit_python_memo orfeo_bands = create_s3_toa(product_type, creo_path, band_names, [layout_extent['xmin'], layout_extent['ymin'], layout_extent['xmax'], - layout_extent['ymax']],digital_numbers=digital_numbers) + layout_extent['ymax']], digital_numbers=digital_numbers) debug_mode = True @@ -244,7 +248,7 @@ def read_product(product, product_type, band_names, tile_size, limit_python_memo return tiles -def create_s3_toa(product_type, creo_path, band_names, bbox_tile, digital_numbers = True): +def create_s3_toa(product_type, creo_path, band_names, bbox_tile, digital_numbers=True): if product_type == OLCI_PRODUCT_TYPE: geofile = 'geo_coordinates.nc' lat_band = 'latitude' @@ -264,15 +268,23 @@ def create_s3_toa(product_type, creo_path, band_names, bbox_tile, digital_number raise ValueError(product_type) tile_coordinates, tile_shape = create_final_grid(bbox_tile, resolution=final_grid_resolution) - tile_coordinates.shape = tile_shape + (2,) # target =((4352, 5114, 2)) before=22256128,2 + tile_coordinates.shape = tile_shape + (2,) + + tile_coordinates_with_rim, tile_shape_with_rim = create_final_grid(bbox_tile, resolution=final_grid_resolution, rim_pixels=RIM_PIXELS) + tile_coordinates_with_rim.shape = tile_shape_with_rim + (2,) geofile = os.path.join(creo_path, geofile) - bbox_original, source_coordinates,data_mask = _read_latlonfile(geofile, lat_band, lon_band, bbox_tile) + bbox_original, source_coordinates, data_mask = _read_latlonfile(geofile, lat_band, lon_band, bbox_tile) logger.info(f"{bbox_original=} {source_coordinates=}") + angle_geofile = os.path.join(creo_path, 'geodetic_tx.nc') + _, angle_source_coordinates, angle_data_mask = _read_latlonfile(angle_geofile, lat_band='latitude_tx', lon_band='longitude_tx', bbox=bbox_tile) + reprojected_data, is_empty = do_reproject(product_type, final_grid_resolution, creo_path, band_names, - source_coordinates, tile_coordinates,data_mask,digital_numbers) + source_coordinates, tile_coordinates, data_mask, + angle_source_coordinates, tile_coordinates_with_rim, angle_data_mask, + digital_numbers) return reprojected_data @@ -310,7 +322,10 @@ def create_final_grid(final_bbox, resolution, rim_pixels=0 ): return target_coordinates, target_shape -def do_reproject(product_type, final_grid_resolution, creo_path, band_names, source_coordinates, target_coordinates,data_mask,digital_numbers=True): +def do_reproject(product_type, final_grid_resolution, creo_path, band_names, + source_coordinates, target_coordinates, data_mask, + angle_source_coordinates, target_coordinates_with_rim, angle_data_mask, + digital_numbers=True): """Create LUT for reprojecting, and reproject all possible(hard-coded) bands Parameters @@ -331,10 +346,15 @@ def do_reproject(product_type, final_grid_resolution, creo_path, band_names, sou is_empty = False logger.info(f"Reprojecting {product_type}") ### create LUT for radiances - distance, LUT = create_index_LUT(source_coordinates, + distance, LUT = create_index_LUT(source_coordinates, # TODO: ditch unused distance target_coordinates, 2 * final_grid_resolution) # indices will have the same size as flattend grid_xy referring to the index from the latlon a data arrays + ### create LUT for angle files + _, LUT_angles = create_index_LUT(angle_source_coordinates, + target_coordinates_with_rim, + 2 * final_grid_resolution) + varOut = [] for band_name in band_names: @@ -359,14 +379,21 @@ def variable_name(band_name: str): # actually, the file without the .nc extensi return band_name raise ValueError(band_name) - band_data, band_settings = read_band(in_file, in_band=variable_name(band_name),data_mask=data_mask) - - reprojected_data = apply_LUT_on_band(band_data, LUT, band_settings.get('_FillValue', None)) # result is an numpy array with reprojected data + if product_type == SLSTR_PRODUCT_TYPE and band_name in ["solar_azimuth_tn", "solar_zenith_tn", ]: + band_data, band_settings = read_band(in_file, in_band=variable_name(band_name), data_mask=angle_data_mask) + reprojected_data = apply_LUT_on_band(band_data, LUT_angles, band_settings.get('_FillValue', None)) # result is an numpy array with reprojected data + interpolated = _linearNDinterpolate(reprojected_data) + reprojected_data = interpolated[RIM_PIXELS:-RIM_PIXELS, RIM_PIXELS:-RIM_PIXELS] + else: + band_data, band_settings = read_band(in_file, in_band=variable_name(band_name), data_mask=data_mask) + reprojected_data = apply_LUT_on_band(band_data, LUT, band_settings.get('_FillValue', None)) # result is an numpy array with reprojected data + """ # TODO: reintroduce scaling if '_FillValue' in band_settings and not digital_numbers: reprojected_data[reprojected_data == band_settings['_FillValue']] = np.nan if 'add_offset' in band_settings and 'scale_factor' in band_settings and not digital_numbers: reprojected_data = reprojected_data.astype("float32") * band_settings['scale_factor'] + band_settings['add_offset'] + """ varOut.append(reprojected_data) @@ -374,7 +401,48 @@ def variable_name(band_name: str): # actually, the file without the .nc extensi return np.array(varOut), is_empty -def _read_latlonfile(latlon_file, lat_band="latitude", lon_band="longitude",bbox = None): +def _linearNDinterpolate(in_array): + """Some low resolution bands only contain values in diagonal lines. They need to be interpolated to fill the + target grid completely. nan values will be interpolated + + Parameters + ---------- + in_array : float + the array containing nans that needs to be interpolated + + Notes + ------- + If nothing was interpolated, the in_array is returned + + Returns + ------- + out_array: float + a 2D numpy array containing interpolated data + """ + from scipy.interpolate import LinearNDInterpolator + logger.info(" Start interpolation...") + rownrs = np.arange(in_array.shape[0]) # Column indices (longitude) + columnnrs = np.arange(in_array.shape[1]) # Row indices (latitude) + + X, Y = np.meshgrid(rownrs, columnnrs) # 2D grid for interpolation + ok = ~np.isnan(in_array) # ok= all valid data =True + xp = ok.nonzero()[0] # get the index of all valid pixels + if len(xp) < 10: + logger.warning("Angle files are empty here, skip the interpolation. The not interpolated result(array_in) will be reused") + return in_array + yp = ok.nonzero()[1] + fp = in_array[~np.isnan(in_array)] # get the value of all valid pixels + # interp = RegularGridInterpolator((xp, fp), in_array) + coordinates = list(zip(xp, yp)) + interp = LinearNDInterpolator(coordinates, fp, fill_value=np.nan) + # X = np.linspace(0, in_array.shape[0], num=1) + # Y = np.linspace(0, in_array.shape[1], num=1) + # X, Y = np.meshgrid(in_array) + Z = interp(X, Y) + return Z.T # we should transpose this array + + +def _read_latlonfile(latlon_file, lat_band="latitude", lon_band="longitude", bbox=None): # TODO: bbox cannot be None """Read latlon data from this netcdf file Parameters @@ -406,8 +474,8 @@ def _read_latlonfile(latlon_file, lat_band="latitude", lon_band="longitude",bbox # Create the coordinate arrays for latitude and longitude ## Coordinated referring to the CENTER of the pixel - lat_orig = lat_lon_ds[lat_band].where(data_mask,drop=True).values - lon_orig = lat_lon_ds[lon_band].where(data_mask,drop=True).values + lat_orig = lat_lon_ds[lat_band].values # TODO: reintroduce mask + lon_orig = lat_lon_ds[lon_band].values lat_lon_ds.close() extreme_right_lon = lon_orig[0,-1] # negative degrees (-170) @@ -421,8 +489,8 @@ def _read_latlonfile(latlon_file, lat_band="latitude", lon_band="longitude",bbox bbox_original = [x_min, y_min, x_max, y_max] lon_flat = lon_orig.flatten() lat_flat = lat_orig.flatten() - lon_flat = lon_flat[~np.isnan(lon_flat)] - lat_flat = lat_flat[~np.isnan(lat_flat)] + # lon_flat = lon_flat[~np.isnan(lon_flat)] TODO: reintroduce mask + # lat_flat = lat_flat[~np.isnan(lat_flat)] source_coordinates = np.column_stack((lon_flat, lat_flat)) return bbox_original, source_coordinates, data_mask @@ -458,7 +526,7 @@ def create_index_LUT(coordinates, target_coordinates, max_distance): return distances, lut_indices -def read_band(in_file, in_band, get_data_array=True,data_mask=None): +def read_band(in_file, in_band, get_data_array=True, data_mask=None): # TODO: data_mask probably cannot be None """get array and settings(metadata) out of the in_file Parameters @@ -479,7 +547,8 @@ def read_band(in_file, in_band, get_data_array=True,data_mask=None): A dict containing the bands metadata (_FillValue, name, dtype, units,...) """ # can be used to get only the band settings or together with the data - dataset = xr.open_dataset(in_file, mask_and_scale=False,cache=False) # disable autoconvert digital values + logger.debug(f"Reading {in_band} from file {in_file}") + dataset = xr.open_dataset(in_file, mask_and_scale=False) # disable autoconvert digital values settings = dataset[in_band].attrs settings['dtype'] = dataset[in_band].dtype.name settings['name'] = in_band @@ -519,7 +588,7 @@ def read_band(in_file, in_band, get_data_array=True,data_mask=None): settings["dtype"] = 'float32' if get_data_array: - data_array = dataset[in_band].where(data_mask,drop=True).data + data_array = dataset[in_band].data # TODO: reintroduce masking else: data_array = None dataset.close() @@ -545,7 +614,7 @@ def apply_LUT_on_band(in_data, LUT, nodata=None): 2D-numpy array with the size of a tile containing reprojected values """ data_flat = in_data.flatten() - data_flat = data_flat[~np.isnan(data_flat)] + # data_flat = data_flat[~np.isnan(data_flat)] # TODO: reintroduce? # if nodata is empty, we will just use the max possible value if nodata is None: From 398eb4cda30d1a7bfb20ee236403ca465f74a091 Mon Sep 17 00:00:00 2001 From: Jan Van den bosch Date: Tue, 26 Mar 2024 13:06:14 +0100 Subject: [PATCH 2/2] reintroduce scaling #696 --- openeogeotrellis/collections/sentinel3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openeogeotrellis/collections/sentinel3.py b/openeogeotrellis/collections/sentinel3.py index 2208b029c..d68b3b2ef 100644 --- a/openeogeotrellis/collections/sentinel3.py +++ b/openeogeotrellis/collections/sentinel3.py @@ -388,12 +388,12 @@ def variable_name(band_name: str): # actually, the file without the .nc extensi band_data, band_settings = read_band(in_file, in_band=variable_name(band_name), data_mask=data_mask) reprojected_data = apply_LUT_on_band(band_data, LUT, band_settings.get('_FillValue', None)) # result is an numpy array with reprojected data - """ # TODO: reintroduce scaling + """ # TODO: reintroduce masking if '_FillValue' in band_settings and not digital_numbers: reprojected_data[reprojected_data == band_settings['_FillValue']] = np.nan + """ if 'add_offset' in band_settings and 'scale_factor' in band_settings and not digital_numbers: reprojected_data = reprojected_data.astype("float32") * band_settings['scale_factor'] + band_settings['add_offset'] - """ varOut.append(reprojected_data)