Skip to content

Commit

Permalink
rebase, format and typing
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinsantana11 committed Aug 24, 2024
1 parent 64bf984 commit 30979c3
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions clouddrift/adapters/gdp/gdpsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ def _process(
md_df=gdp_metadata_df,
data_df=df_chunk,
use_fill_values=use_fill_values,
tqdm={"disable": True}
tqdm={"disable": True},
)
ds = ra.to_xarray()

Expand Down Expand Up @@ -494,11 +494,16 @@ def to_raggedarray(
import gzip

data_files = list()
for compressed_data_file in tqdm([dst for (_, dst) in requests], desc="Decompressing files", unit="file"):
for compressed_data_file in tqdm(
[dst for (_, dst) in requests], desc="Decompressing files", unit="file"
):
decompressed_fp = compressed_data_file[:-3]
data_files.append(decompressed_fp)
if not os.path.exists(decompressed_fp):
with gzip.open(compressed_data_file, "rb") as compr, open(decompressed_fp, "wb") as decompr:
with (
gzip.open(compressed_data_file, "rb") as compr,
open(decompressed_fp, "wb") as decompr,
):
decompr.write(compr.read())

df = dd.read_csv(
Expand All @@ -509,13 +514,13 @@ def to_raggedarray(
engine="c",
dtype=_INPUT_COLS_PREFILTER_DTYPES,
blocksize="1GB",
assume_missing=True
assume_missing=True,
)
drifter_datasets = _process(df, gdp_metadata_df, use_fill_values)

# Sort the drifters by their start date.
deploy_date_id_map = {
ds["id"].data[0]: ds["start_date"].data[0] for ds in drifter_datasets
ds["id"].data[0]: ds["start_date"].data[0] for ds in drifter_datasets.values()
}
deploy_date_sort_key = np.argsort(list(deploy_date_id_map.values()))
sorted_drifter_datasets = [drifter_datasets[idx] for idx in deploy_date_sort_key]
Expand Down

0 comments on commit 30979c3

Please sign in to comment.