-
Notifications
You must be signed in to change notification settings - Fork 495
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'skypilot/master' into serve-sync-log
- Loading branch information
Showing
5 changed files
with
99 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,8 @@ | |
make_deploy_resources_variables(): Bug fix for specify the image_id as | ||
the ocid of the image in the task.yaml file, in this case the image_id | ||
for the node config should be set to the ocid instead of a dict. | ||
- Hysun He ([email protected]) @ Oct 13, 2024: | ||
Support more OS types additional to ubuntu for OCI resources. | ||
""" | ||
import json | ||
import logging | ||
|
@@ -295,10 +297,21 @@ def make_deploy_resources_variables( | |
cpus=None if cpus is None else float(cpus), | ||
disk_tier=resources.disk_tier) | ||
|
||
image_str = self._get_image_str(image_id=resources.image_id, | ||
instance_type=resources.instance_type, | ||
region=region.name) | ||
|
||
# pylint: disable=import-outside-toplevel | ||
from sky.clouds.service_catalog import oci_catalog | ||
os_type = oci_catalog.get_image_os_from_tag(tag=image_str, | ||
region=region.name) | ||
logger.debug(f'OS type for the image {image_str} is {os_type}') | ||
|
||
return { | ||
'instance_type': instance_type, | ||
'custom_resources': custom_resources, | ||
'region': region.name, | ||
'os_type': os_type, | ||
'cpus': str(cpus), | ||
'memory': resources.memory, | ||
'disk_size': resources.disk_size, | ||
|
@@ -501,59 +514,45 @@ def _get_image_id( | |
region_name: str, | ||
instance_type: str, | ||
) -> str: | ||
if image_id is None: | ||
return self._get_default_image(region_name=region_name, | ||
instance_type=instance_type) | ||
if None in image_id: | ||
image_id_str = image_id[None] | ||
else: | ||
assert region_name in image_id, image_id | ||
image_id_str = image_id[region_name] | ||
image_id_str = self._get_image_str(image_id=image_id, | ||
instance_type=instance_type, | ||
region=region_name) | ||
|
||
if image_id_str.startswith('skypilot:'): | ||
image_id_str = service_catalog.get_image_id_from_tag(image_id_str, | ||
region_name, | ||
clouds='oci') | ||
if image_id_str is None: | ||
logger.critical( | ||
'! Real image_id not found! - {region_name}:{image_id}') | ||
# Raise ResourcesUnavailableError to make sure the failover | ||
# in CloudVMRayBackend will be correctly triggered. | ||
# TODO(zhwu): This is a information leakage to the cloud | ||
# implementor, we need to find a better way to handle this. | ||
raise exceptions.ResourcesUnavailableError( | ||
'! ERR: No image found in catalog for region ' | ||
f'{region_name}. Try setting a valid image_id.') | ||
|
||
# Image_id should be impossible be None, except for the case when | ||
# user specify an image tag which does not exist in the image.csv | ||
# catalog file which only possible in "test" / "evaluation" phase. | ||
# Therefore, we use assert here. | ||
assert image_id_str is not None | ||
|
||
logger.debug(f'Got real image_id {image_id_str}') | ||
return image_id_str | ||
|
||
def _get_default_image(self, region_name: str, instance_type: str) -> str: | ||
def _get_image_str(self, image_id: Optional[Dict[Optional[str], str]], | ||
instance_type: str, region: str): | ||
if image_id is None: | ||
image_str = self._get_default_image_tag(instance_type) | ||
elif None in image_id: | ||
image_str = image_id[None] | ||
else: | ||
assert region in image_id, image_id | ||
image_str = image_id[region] | ||
return image_str | ||
|
||
def _get_default_image_tag(self, instance_type: str) -> str: | ||
acc = self.get_accelerators_from_instance_type(instance_type) | ||
|
||
if acc is None: | ||
image_tag = oci_utils.oci_config.get_default_image_tag() | ||
image_id_str = service_catalog.get_image_id_from_tag(image_tag, | ||
region_name, | ||
clouds='oci') | ||
else: | ||
assert len(acc) == 1, acc | ||
image_tag = oci_utils.oci_config.get_default_gpu_image_tag() | ||
image_id_str = service_catalog.get_image_id_from_tag(image_tag, | ||
region_name, | ||
clouds='oci') | ||
|
||
if image_id_str is not None: | ||
logger.debug( | ||
f'Got default image_id {image_id_str} from tag {image_tag}') | ||
return image_id_str | ||
|
||
# Raise ResourcesUnavailableError to make sure the failover in | ||
# CloudVMRayBackend will be correctly triggered. | ||
# TODO(zhwu): This is a information leakage to the cloud implementor, | ||
# we need to find a better way to handle this. | ||
raise exceptions.ResourcesUnavailableError( | ||
'ERR: No image found in catalog for region ' | ||
f'{region_name}. Try update your default image_id settings.') | ||
return image_tag | ||
|
||
def get_vpu_from_disktier( | ||
self, cpus: Optional[float], | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,8 @@ | |
- Hysun He ([email protected]) @ Apr, 2023: Initial implementation | ||
- Hysun He ([email protected]) @ Jun, 2023: Reduce retry times by | ||
excluding those unsubscribed regions. | ||
- Hysun He ([email protected]) @ Oct 14, 2024: Bug fix for validation | ||
of the Marketplace images | ||
""" | ||
|
||
import logging | ||
|
@@ -206,4 +208,24 @@ def get_image_id_from_tag(tag: str, region: Optional[str]) -> Optional[str]: | |
|
||
def is_image_tag_valid(tag: str, region: Optional[str]) -> bool: | ||
"""Returns whether the image tag is valid.""" | ||
# Oct.14, 2024 by Hysun He: Marketplace images are region neutral, so don't | ||
# check with region for the Marketplace images. | ||
df = _image_df[_image_df['Tag'].str.fullmatch(tag)] | ||
if df.empty: | ||
return False | ||
app_catalog_listing_id = df['AppCatalogListingId'].iloc[0] | ||
if app_catalog_listing_id: | ||
return True | ||
return common.is_image_tag_valid_impl(_image_df, tag, region) | ||
|
||
|
||
def get_image_os_from_tag(tag: str, region: Optional[str]) -> Optional[str]: | ||
del region | ||
df = _image_df[_image_df['Tag'].str.fullmatch(tag)] | ||
if df.empty: | ||
os_type = oci_utils.oci_config.get_default_image_os() | ||
else: | ||
os_type = df['OS'].iloc[0] | ||
|
||
logger.debug(f'Operation system for the image {tag} is {os_type}') | ||
return os_type |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
"""OCI Configuration. | ||
History: | ||
- Zhanghao Wu @ Oct 2023: Formatting and refactoring | ||
- Hysun He ([email protected]) @ Apr, 2023: Initial implementation | ||
- Zhanghao Wu @ Oct 2023: Formatting and refactoring | ||
- Hysun He ([email protected]) @ Oct, 2024: Add default image OS | ||
configuration. | ||
""" | ||
import logging | ||
import os | ||
|
@@ -121,5 +123,13 @@ def get_profile(cls) -> str: | |
return skypilot_config.get_nested( | ||
('oci', 'default', 'oci_config_profile'), 'DEFAULT') | ||
|
||
@classmethod | ||
def get_default_image_os(cls) -> str: | ||
# Get the default image OS. Instead of hardcoding, we give a choice to | ||
# set the default image OS type in the sky's user-config file. (if not | ||
# specified, use the hardcode one at last) | ||
return skypilot_config.get_nested(('oci', 'default', 'image_os_type'), | ||
'ubuntu') | ||
|
||
|
||
oci_config = OCIConfig() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters