From c6325059dece07b7ec61bc5c7217d9eafcf42f43 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Mon, 1 Apr 2024 17:36:46 -0400 Subject: [PATCH 1/2] Add `disk_size_gb` option to DataflowBakery --- pangeo_forge_runner/bakery/dataflow.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pangeo_forge_runner/bakery/dataflow.py b/pangeo_forge_runner/bakery/dataflow.py index c5c0949c..60e7591a 100644 --- a/pangeo_forge_runner/bakery/dataflow.py +++ b/pangeo_forge_runner/bakery/dataflow.py @@ -57,6 +57,18 @@ def _default_project_id(self): Ignored if use_dataflow_prime is set. """, ) + + disk_size_gb = Integer( + None, + allow_none=True, + config=True, + help=""" + The disk size, in gigabytes, to use on each remote Compute Engine worker instance. + + Set to None (default) for default sizing + (see https://cloud.google.com/dataflow/docs/reference/pipeline-options#worker-level_options for details). + """, + ) use_dataflow_prime = Bool( False, @@ -181,6 +193,7 @@ def get_pipeline_options( project=self.project_id, job_name=job_name, max_num_workers=self.max_num_workers, + disk_size_gb=self.disk_size_gb, temp_location=self.temp_gcs_location, use_public_ips=self.use_public_ips, region=self.region, From 07ae0addc3b6fae9b65add90f6c5ad6c634e20b4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 21:37:23 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pangeo_forge_runner/bakery/dataflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pangeo_forge_runner/bakery/dataflow.py b/pangeo_forge_runner/bakery/dataflow.py index 60e7591a..b1bd07de 100644 --- a/pangeo_forge_runner/bakery/dataflow.py +++ b/pangeo_forge_runner/bakery/dataflow.py @@ -57,7 +57,7 @@ def _default_project_id(self): Ignored if use_dataflow_prime is set. """, ) - + disk_size_gb = Integer( None, allow_none=True,