From eae8fc5740c0563b458fcb2cec6df5a7f0d0f9d1 Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Thu, 16 May 2024 10:29:09 -0700 Subject: [PATCH] [UX] Error out for null env var (#3557) * [UX] Error out for null env var * format * Fix examples for env, including HF_TOKEN and WANDB_API_KEY * fix * Add test * format * fix * type * fix * remove print * add doc * fix comment * minor fix --- .../running-jobs/environment-variables.rst | 14 ++++++++++++++ examples/serve/llama2/llama2.yaml | 2 +- examples/spot_pipeline/bert_qa_train_eval.yaml | 4 ++-- llm/axolotl/axolotl-spot.yaml | 4 ++-- llm/axolotl/axolotl.yaml | 2 +- llm/dbrx/README.md | 2 +- llm/dbrx/dbrx.yaml | 2 +- llm/falcon/falcon.yaml | 4 ++-- llm/gemma/serve.yaml | 2 +- llm/llama-2/README.md | 2 +- llm/llama-2/chatbot-hf.yaml | 2 +- llm/llama-2/chatbot-meta.yaml | 2 +- llm/llama-3/README.md | 2 +- llm/llama-3/llama3.yaml | 2 +- llm/sglang/llama2.yaml | 2 +- llm/vicuna-llama-2/README.md | 2 +- llm/vicuna-llama-2/train.yaml | 6 +++--- llm/vicuna/train.yaml | 14 +++++++------- llm/vllm/serve-openai-api.yaml | 2 +- llm/vllm/service.yaml | 2 +- sky/task.py | 18 ++++++++++++++++-- sky/utils/schemas.py | 2 +- tests/test_yaml_parser.py | 12 ++++++++++++ 23 files changed, 73 insertions(+), 33 deletions(-) diff --git a/docs/source/running-jobs/environment-variables.rst b/docs/source/running-jobs/environment-variables.rst index 16502f70818..2f3427c1bf5 100644 --- a/docs/source/running-jobs/environment-variables.rst +++ b/docs/source/running-jobs/environment-variables.rst @@ -12,6 +12,20 @@ You can specify environment variables to be made available to a task in two ways - The ``envs`` field (dict) in a :ref:`task YAML ` - The ``--env`` flag in the ``sky launch/exec`` :ref:`CLI ` (takes precedence over the above) +.. tip:: + + If an environment variable is required to be specified with `--env` during + ``sky launch/exec``, you can set it to ``null`` in task YAML to raise an + error when it is forgotten to be specified. For example, the ``WANDB_API_KEY`` + and ``HF_TOKEN`` in the following task YAML: + + .. code-block:: yaml + + envs: + WANDB_API_KEY: + HF_TOKEN: null + MYVAR: val + The ``file_mounts``, ``setup``, and ``run`` sections of a task YAML can access the variables via the ``${MYVAR}`` syntax. Using in ``file_mounts`` diff --git a/examples/serve/llama2/llama2.yaml b/examples/serve/llama2/llama2.yaml index 5eaaea449d0..42c82ea0cc9 100644 --- a/examples/serve/llama2/llama2.yaml +++ b/examples/serve/llama2/llama2.yaml @@ -25,7 +25,7 @@ resources: envs: MODEL_SIZE: 7 - HF_TOKEN: # TODO: Replace with huggingface token + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. setup: | conda activate chatbot diff --git a/examples/spot_pipeline/bert_qa_train_eval.yaml b/examples/spot_pipeline/bert_qa_train_eval.yaml index 32fb526ca91..62bd34c3b76 100644 --- a/examples/spot_pipeline/bert_qa_train_eval.yaml +++ b/examples/spot_pipeline/bert_qa_train_eval.yaml @@ -42,7 +42,7 @@ run: | echo Model saved to /checkpoint/bert_qa/$SKYPILOT_TASK_ID envs: - WANDB_API_KEY: # NOTE: Fill in your wandb key + WANDB_API_KEY: # TODO: Fill with your own WANDB_API_KEY, or use --env to pass. --- @@ -84,4 +84,4 @@ run: | --save_steps 1000 envs: - WANDB_API_KEY: # NOTE: Fill in your wandb key + WANDB_API_KEY: # TODO: Fill with your own WANDB_API_KEY, or use --env to pass. diff --git a/llm/axolotl/axolotl-spot.yaml b/llm/axolotl/axolotl-spot.yaml index b6c81b742c9..942f4ccc4ba 100644 --- a/llm/axolotl/axolotl-spot.yaml +++ b/llm/axolotl/axolotl-spot.yaml @@ -38,8 +38,8 @@ run: | accelerate launch -m axolotl.cli.train /sky_workdir/qlora-checkpoint.yaml envs: - HF_TOKEN: # TODO: Replace with huggingface token - BUCKET: + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. + BUCKET: # TODO: Fill with your unique bucket name, or use --env to pass. diff --git a/llm/axolotl/axolotl.yaml b/llm/axolotl/axolotl.yaml index d9cfd91aa6d..9cec1d1f331 100644 --- a/llm/axolotl/axolotl.yaml +++ b/llm/axolotl/axolotl.yaml @@ -26,7 +26,7 @@ run: | accelerate launch -m axolotl.cli.train /sky_workdir/qlora.yaml envs: - HF_TOKEN: # TODO: Replace with huggingface token + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. diff --git a/llm/dbrx/README.md b/llm/dbrx/README.md index e0ad216e92c..3011af9d4e6 100644 --- a/llm/dbrx/README.md +++ b/llm/dbrx/README.md @@ -22,7 +22,7 @@ In this recipe, you will serve `databricks/dbrx-instruct` on your own infra -- ```yaml envs: MODEL_NAME: databricks/dbrx-instruct - HF_TOKEN: # Change to your own huggingface token, or use --env to pass. + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. service: replicas: 2 diff --git a/llm/dbrx/dbrx.yaml b/llm/dbrx/dbrx.yaml index ffa777ab86d..0c9abd06d30 100644 --- a/llm/dbrx/dbrx.yaml +++ b/llm/dbrx/dbrx.yaml @@ -31,7 +31,7 @@ envs: MODEL_NAME: databricks/dbrx-instruct - HF_TOKEN: # Change to your own huggingface token, or use --env to pass. + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. service: replicas: 2 diff --git a/llm/falcon/falcon.yaml b/llm/falcon/falcon.yaml index 256d936d61b..b752db5256b 100644 --- a/llm/falcon/falcon.yaml +++ b/llm/falcon/falcon.yaml @@ -7,7 +7,7 @@ workdir: . envs: MODEL_NAME: tiiuae/falcon-7b # [ybelkada/falcon-7b-sharded-bf16, tiiuae/falcon-7b, tiiuae/falcon-40b] - WANDB_API_KEY: $WANDB_KEY # Change to your own wandb key + WANDB_API_KEY: # TODO: Fill with your own WANDB_API_KEY, or use --env to pass. OUTPUT_BUCKET_NAME: # Set a unique name for the bucket which will store model weights file_mounts: @@ -39,4 +39,4 @@ run: | --bnb_4bit_compute_dtype bfloat16 \ --max_steps 500 \ --dataset_name timdettmers/openassistant-guanaco \ - --output_dir /results \ No newline at end of file + --output_dir /results diff --git a/llm/gemma/serve.yaml b/llm/gemma/serve.yaml index 73f5b9c2b5d..4c5a2c984c5 100644 --- a/llm/gemma/serve.yaml +++ b/llm/gemma/serve.yaml @@ -17,7 +17,7 @@ service: envs: MODEL_NAME: google/gemma-7b-it - HF_TOKEN: # TODO: Replace with huggingface token + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. resources: accelerators: {L4, A10g, A10, L40, A40, A100, A100-80GB} diff --git a/llm/llama-2/README.md b/llm/llama-2/README.md index 7b20ea4aed7..d8f8151572e 100644 --- a/llm/llama-2/README.md +++ b/llm/llama-2/README.md @@ -33,7 +33,7 @@ Fill the access token in the [chatbot-hf.yaml](https://github.com/skypilot-org/s ```yaml envs: MODEL_SIZE: 7 - HF_TOKEN: + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. ``` diff --git a/llm/llama-2/chatbot-hf.yaml b/llm/llama-2/chatbot-hf.yaml index 992c01346e6..ee9d0281296 100644 --- a/llm/llama-2/chatbot-hf.yaml +++ b/llm/llama-2/chatbot-hf.yaml @@ -6,7 +6,7 @@ resources: envs: MODEL_SIZE: 7 - HF_TOKEN: # TODO: Replace with huggingface token + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. setup: | conda activate chatbot diff --git a/llm/llama-2/chatbot-meta.yaml b/llm/llama-2/chatbot-meta.yaml index a0481fe760f..733a2a867d2 100644 --- a/llm/llama-2/chatbot-meta.yaml +++ b/llm/llama-2/chatbot-meta.yaml @@ -6,7 +6,7 @@ resources: envs: MODEL_SIZE: 7 - HF_TOKEN: # TODO: Replace with huggingface token + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. setup: | set -ex diff --git a/llm/llama-3/README.md b/llm/llama-3/README.md index 7b3b6cb56e5..decff6054bf 100644 --- a/llm/llama-3/README.md +++ b/llm/llama-3/README.md @@ -44,7 +44,7 @@ envs: MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct # MODEL_NAME: meta-llama/Meta-Llama-3-8B-Instruct - HF_TOKEN: # Change to your own huggingface token, or use --env to pass. + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. service: replicas: 2 diff --git a/llm/llama-3/llama3.yaml b/llm/llama-3/llama3.yaml index 0974d4db51b..1e9b236efd4 100644 --- a/llm/llama-3/llama3.yaml +++ b/llm/llama-3/llama3.yaml @@ -59,7 +59,7 @@ envs: MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct # MODEL_NAME: meta-llama/Meta-Llama-3-8B-Instruct - HF_TOKEN: # Change to your own huggingface token, or use --env to pass. + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. service: replicas: 2 diff --git a/llm/sglang/llama2.yaml b/llm/sglang/llama2.yaml index 08427ab2001..8b58c4365d6 100644 --- a/llm/sglang/llama2.yaml +++ b/llm/sglang/llama2.yaml @@ -6,7 +6,7 @@ service: envs: MODEL_NAME: meta-llama/Llama-2-7b-chat-hf - HF_TOKEN: # Change to your own huggingface token + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. resources: accelerators: {L4:1, A10G:1, A10:1, A100:1, A100-80GB:1} diff --git a/llm/vicuna-llama-2/README.md b/llm/vicuna-llama-2/README.md index 0fc5da6c4ba..899792c299d 100644 --- a/llm/vicuna-llama-2/README.md +++ b/llm/vicuna-llama-2/README.md @@ -31,7 +31,7 @@ cd skypilot/llm/vicuna-llama-2 Paste the access token into [train.yaml](https://github.com/skypilot-org/skypilot/tree/master/llm/vicuna-llama-2/train.yaml): ```yaml envs: - HF_TOKEN: # Change to your own huggingface token + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. ``` ## Train your own Vicuna on Llama-2 diff --git a/llm/vicuna-llama-2/train.yaml b/llm/vicuna-llama-2/train.yaml index e23d5797e76..8d35c2dff85 100644 --- a/llm/vicuna-llama-2/train.yaml +++ b/llm/vicuna-llama-2/train.yaml @@ -1,7 +1,7 @@ envs: - HF_TOKEN: # Change to your own huggingface token - ARTIFACT_BUCKET_NAME: YOUR_OWN_BUCKET_NAME # Change to your own bucket name - WANDB_API_KEY: "" # Change to your own wandb api key + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. + ARTIFACT_BUCKET_NAME: # TODO: Fill with your unique bucket name, or use --env to pass. + WANDB_API_KEY: # TODO: Fill with your own WANDB_API_KEY, or use --env to pass. MODEL_SIZE: 7 USE_XFORMERS: 1 diff --git a/llm/vicuna/train.yaml b/llm/vicuna/train.yaml index c577561e858..a2121aaf8fd 100644 --- a/llm/vicuna/train.yaml +++ b/llm/vicuna/train.yaml @@ -1,3 +1,10 @@ +envs: + MODEL_SIZE: 7 + SEQ_LEN: 2048 + GC_SCALE: 4 + USE_FLASH_ATTN: 0 + WANDB_API_KEY: # TODO: Fill with your own WANDB_API_KEY, or use --env to pass. + resources: accelerators: A100-80GB:8 disk_size: 1000 @@ -109,10 +116,3 @@ run: | gsutil -m rsync -r -x 'checkpoint-*' $LOCAL_CKPT_PATH/ $CKPT_PATH/ exit $returncode - -envs: - MODEL_SIZE: 7 - SEQ_LEN: 2048 - GC_SCALE: 4 - USE_FLASH_ATTN: 0 - WANDB_API_KEY: "" diff --git a/llm/vllm/serve-openai-api.yaml b/llm/vllm/serve-openai-api.yaml index 9ddf7b280ba..a68f476edc7 100644 --- a/llm/vllm/serve-openai-api.yaml +++ b/llm/vllm/serve-openai-api.yaml @@ -1,6 +1,6 @@ envs: MODEL_NAME: meta-llama/Llama-2-7b-chat-hf - HF_TOKEN: # Change to your own huggingface token + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. resources: accelerators: {L4:1, A10G:1, A10:1, A100:1, A100-80GB:1} diff --git a/llm/vllm/service.yaml b/llm/vllm/service.yaml index 335f8a50650..1e5d92a60e5 100644 --- a/llm/vllm/service.yaml +++ b/llm/vllm/service.yaml @@ -9,7 +9,7 @@ service: # Fields below are the same with `serve-openai-api.yaml`. envs: MODEL_NAME: meta-llama/Llama-2-7b-chat-hf - HF_TOKEN: # Change to your own huggingface token + HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass. resources: accelerators: {L4:1, A10G:1, A10:1, A100:1, A100-80GB:1} diff --git a/sky/task.py b/sky/task.py index b6a71581a15..3dd254838f0 100644 --- a/sky/task.py +++ b/sky/task.py @@ -353,8 +353,13 @@ def from_yaml_config( # as int causing validate_schema() to fail. envs = config.get('envs') if envs is not None and isinstance(envs, dict): - config['envs'] = {str(k): str(v) for k, v in envs.items()} - + new_envs: Dict[str, Optional[str]] = {} + for k, v in envs.items(): + if v is not None: + new_envs[str(k)] = str(v) + else: + new_envs[str(k)] = None + config['envs'] = new_envs common_utils.validate_schema(config, schemas.get_task_schema(), 'Invalid task YAML: ') if env_overrides is not None: @@ -368,6 +373,15 @@ def from_yaml_config( new_envs.update(env_overrides) config['envs'] = new_envs + for k, v in config.get('envs', {}).items(): + if v is None: + with ux_utils.print_exception_no_traceback(): + raise ValueError( + f'Environment variable {k!r} is None. Please set a ' + 'value for it in task YAML or with --env flag. ' + f'To set it to be empty, use an empty string ({k}: "" ' + f'in task YAML or --env {k}="" in CLI).') + # Fill in any Task.envs into file_mounts (src/dst paths, storage # name/source). if config.get('file_mounts') is not None: diff --git a/sky/utils/schemas.py b/sky/utils/schemas.py index c50e15185a3..878fe67178e 100644 --- a/sky/utils/schemas.py +++ b/sky/utils/schemas.py @@ -402,7 +402,7 @@ def get_task_schema(): 'patternProperties': { # Checks env keys are valid env var names. '^[a-zA-Z_][a-zA-Z0-9_]*$': { - 'type': 'string' + 'type': ['string', 'null'] } }, 'additionalProperties': False, diff --git a/tests/test_yaml_parser.py b/tests/test_yaml_parser.py index 0338084925e..1453cfe1620 100644 --- a/tests/test_yaml_parser.py +++ b/tests/test_yaml_parser.py @@ -134,3 +134,15 @@ def test_invalid_envs_type(tmp_path): with pytest.raises(ValueError) as e: Task.from_yaml(config_path) assert 'is not of type \'dict\'' in e.value.args[0] + + +def test_invalid_empty_envs(tmp_path): + config_path = _create_config_file( + textwrap.dedent(f"""\ + envs: + env_key1: abc + env_key2: + """), tmp_path) + with pytest.raises(ValueError) as e: + Task.from_yaml(config_path) + assert 'Environment variable \'env_key2\' is None.' in e.value.args[0]