Skip to content

Commit

Permalink
Adding Phi-3-medium TPU configs to Colab deployment notebook
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 687337803
  • Loading branch information
vertex-mg-bot authored and copybara-github committed Oct 18, 2024
1 parent fb1871d commit 00c35f4
Showing 1 changed file with 29 additions and 21 deletions.
50 changes: 29 additions & 21 deletions notebooks/community/model_garden/model_garden_phi3_deployment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,18 @@
"source": [
"# Vertex AI Model Garden - Phi-3 (Deployment)\n",
"\n",
"<table><tbody><tr>\n",
" <td style=\"text-align: center\">\n",
" <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_phi3_deployment.ipynb\">\n",
" <img alt=\"Google Cloud Colab Enterprise logo\" src=\"https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN\" width=\"32px\"><br> Run in Colab Enterprise\n",
" </a>\n",
" </td>\n",
" <td style=\"text-align: center\">\n",
" <a href=\"https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb\">\n",
" <img alt=\"GitHub logo\" src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" width=\"32px\"><br> View on GitHub\n",
" </a>\n",
" </td>\n",
"</tr></tbody></table>"
"\u003ctable\u003e\u003ctbody\u003e\u003ctr\u003e\n",
" \u003ctd style=\"text-align: center\"\u003e\n",
" \u003ca href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_phi3_deployment.ipynb\"\u003e\n",
" \u003cimg alt=\"Google Cloud Colab Enterprise logo\" src=\"https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN\" width=\"32px\"\u003e\u003cbr\u003e Run in Colab Enterprise\n",
" \u003c/a\u003e\n",
" \u003c/td\u003e\n",
" \u003ctd style=\"text-align: center\"\u003e\n",
" \u003ca href=\"https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb\"\u003e\n",
" \u003cimg alt=\"GitHub logo\" src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" width=\"32px\"\u003e\u003cbr\u003e View on GitHub\n",
" \u003c/a\u003e\n",
" \u003c/td\u003e\n",
"\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e"
]
},
{
Expand Down Expand Up @@ -103,7 +103,7 @@
"REGION = \"\" # @param {type:\"string\"}\n",
"\n",
"# Upgrade Vertex AI SDK.\n",
"! pip3 install --upgrade --quiet 'google-cloud-aiplatform>=1.64.0'\n",
"! pip3 install --upgrade --quiet 'google-cloud-aiplatform\u003e=1.64.0'\n",
"\n",
"# Import the necessary packages\n",
"import datetime\n",
Expand Down Expand Up @@ -366,7 +366,7 @@
" use_dedicated_endpoint: bool = False,\n",
" max_num_seqs: int = 256,\n",
" model_type: str = None,\n",
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
") -\u003e Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
" \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
" endpoint = aiplatform.Endpoint.create(\n",
" display_name=f\"{model_name}-endpoint\",\n",
Expand Down Expand Up @@ -551,7 +551,7 @@
"# @markdown This section uploads prebuilt Phi-3 models to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model.\n",
"\n",
"# @markdown Select one of the four model variations.\n",
"MODEL_ID = \"Phi-3-mini-4k-instruct\" # @param [\"Phi-3-mini-4k-instruct\", \"Phi-3-mini-128k-instruct\"] {isTemplate: true}\n",
"MODEL_ID = \"Phi-3-mini-4k-instruct\" # @param [\"Phi-3-mini-4k-instruct\", \"Phi-3-mini-128k-instruct\", \"Phi-3-medium-4k-instruct\", \"Phi-3-medium-128k-instruct\"] {isTemplate: true}\n",
"TPU_DEPLOYMENT_REGION = \"us-west1\" # @param [\"us-west1\"] {isTemplate:true}\n",
"model_id = os.path.join(MODEL_BUCKET, MODEL_ID)\n",
"hf_model_id = \"microsoft/\" + MODEL_ID\n",
Expand All @@ -564,8 +564,10 @@
"\n",
"# @markdown | Model Version | Default Max Model Length | Default TPU configuration |\n",
"# @markdown |----------------------------|------------------|-----------------------------|\n",
"# @markdown | Phi-3-mini-4k-instruct | 4096 | 1 TPU_V5e ct5lp-hightpu-1t |\n",
"# @markdown | Phi-3-mini-4k-instruct | 4096 | 1 TPU_V5e ct5lp-hightpu-1t |\n",
"# @markdown | Phi-3-mini-128k-instruct | 131072 | 4 TPU_V5e ct5lp-hightpu-4t |\n",
"# @markdown | Phi-3-medium-4k-instruct | 4096 | 4 TPU_V5e ct5lp-hightpu-4t |\n",
"# @markdown | Phi-3-medium-128k-instruct | 131072 | 4 TPU_V5e ct5lp-hightpu-4t |\n",
"\n",
"\n",
"# Note: 1 TPU V5 chip has only one core.\n",
Expand All @@ -576,7 +578,12 @@
" tpu_topo = \"1x4\"\n",
" max_model_len = 4096\n",
" machine_type = \"ct5lp-hightpu-1t\"\n",
"elif \"mini-128k\" in MODEL_ID:\n",
"elif \"medium-4k\" in MODEL_ID:\n",
" tpu_count = 4\n",
" tpu_topo = \"4x4\"\n",
" max_model_len = 4096\n",
" machine_type = \"ct5lp-hightpu-4t\"\n",
"elif \"mini-128k\" or \"medium-128k\" in MODEL_ID:\n",
" tpu_count = 4\n",
" max_model_len = 131072\n",
" tpu_topo = \"4x4\"\n",
Expand Down Expand Up @@ -621,7 +628,7 @@
" min_replica_count: int = 1,\n",
" max_replica_count: int = 1,\n",
" use_dedicated_endpoint: bool = False,\n",
") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
") -\u003e Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
" \"\"\"Deploys models with Hex-LLM on TPU in Vertex AI.\"\"\"\n",
" if endpoint_id:\n",
" aip_endpoint_name = (\n",
Expand Down Expand Up @@ -684,7 +691,7 @@
" model.deploy(\n",
" endpoint=endpoint,\n",
" machine_type=machine_type,\n",
" tpu_topology=tpu_topology if num_hosts > 1 else None,\n",
" tpu_topology=tpu_topology if num_hosts \u003e 1 else None,\n",
" deploy_request_timeout=1800,\n",
" service_account=service_account,\n",
" min_replica_count=min_replica_count,\n",
Expand Down Expand Up @@ -728,8 +735,8 @@
"# @markdown Example:\n",
"\n",
"# @markdown ```\n",
"# @markdown > What is a car?\n",
"# @markdown > A car is a four-wheeled vehicle designed for the transportation of passengers and their belongings.\n",
"# @markdown \u003e What is a car?\n",
"# @markdown \u003e A car is a four-wheeled vehicle designed for the transportation of passengers and their belongings.\n",
"# @markdown ```\n",
"\n",
"# @markdown Additionally, you can moderate the generated text with Vertex AI. See [Moderate text documentation](https://cloud.google.com/natural-language/docs/moderating-text) for more details.\n",
Expand Down Expand Up @@ -813,6 +820,7 @@
"metadata": {
"colab": {
"name": "model_garden_phi3_deployment.ipynb",
"provenance": [],
"toc_visible": true
},
"kernelspec": {
Expand Down

0 comments on commit 00c35f4

Please sign in to comment.