Skip to content

Commit

Permalink
[GCP] Support H100 for GCP (#3279)
Browse files Browse the repository at this point in the history
* Support h100

* Fix H100 from sku

* Fix H100
  • Loading branch information
Michaelvll authored Mar 5, 2024
1 parent 9460735 commit f191289
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
4 changes: 3 additions & 1 deletion sky/clouds/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,9 @@ def make_deploy_resources_variables(
# https://cloud.google.com/compute/docs/gpus
if acc in ('A100-80GB', 'L4'):
# A100-80GB and L4 have a different name pattern.
resources_vars['gpu'] = 'nvidia-{}'.format(acc.lower())
resources_vars['gpu'] = f'nvidia-{acc.lower()}'
elif acc == 'H100':
resources_vars['gpu'] = f'nvidia-{acc.lower()}-80gb'
else:
resources_vars['gpu'] = 'nvidia-tesla-{}'.format(
acc.lower())
Expand Down
8 changes: 8 additions & 0 deletions sky/clouds/service_catalog/data_fetchers/fetch_gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
# Unsupported Series: 'f1', 'm2'
SERIES_TO_DISCRIPTION = {
'a2': 'A2 Instance',
'a3': 'A3 Instance',
'c2': 'Compute optimized',
'c2d': 'C2D AMD Instance',
'c3': 'C3 Instance',
Expand Down Expand Up @@ -298,6 +299,11 @@ def _get_gpus_for_zone(zone: str) -> pd.DataFrame:
gpu_name = gpu_name.replace('nvidia-', '')
gpu_name = gpu_name.replace('tesla-', '')
gpu_name = gpu_name.upper()
if 'H100-80GB' in gpu_name:
gpu_name = 'H100'
if count != 8:
# H100 only has 8 cards.
continue
if 'VWS' in gpu_name:
continue
if gpu_name.startswith('TPU-'):
Expand Down Expand Up @@ -344,6 +350,8 @@ def get_gpu_price(row: pd.Series, spot: bool) -> Optional[float]:
gpu_name = row['AcceleratorName']
if gpu_name == 'A100-80GB':
gpu_name = 'A100 80GB'
if gpu_name == 'H100':
gpu_name = 'H100 80GB'
if f'{gpu_name} GPU' not in sku['description']:
continue

Expand Down
3 changes: 3 additions & 0 deletions sky/clouds/service_catalog/gcp_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@
4: ['g2-standard-48'],
8: ['g2-standard-96'],
},
'H100': {
8: ['a3-highgpu-8g'],
}
}

# Number of CPU cores per GPU based on the AWS setting.
Expand Down

0 comments on commit f191289

Please sign in to comment.