From 00c35f49fa4a0560486d3bb7868378544891adcb Mon Sep 17 00:00:00 2001
From: Vertex MG Team <vertex-mg-bot@google.com>
Date: Fri, 18 Oct 2024 09:55:15 -0700
Subject: [PATCH] Adding Phi-3-medium TPU configs to Colab deployment notebook

PiperOrigin-RevId: 687337803
---
 .../model_garden_phi3_deployment.ipynb        | 50 +++++++++++--------
 1 file changed, 29 insertions(+), 21 deletions(-)
diff --git a/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb b/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb
index e604ce7ae1..82a6bbbcc7 100644
--- a/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb
+++ b/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb
@@ -32,18 +32,18 @@
       "source": [
         "# Vertex AI Model Garden - Phi-3 (Deployment)\n",
         "\n",
-        "<table><tbody><tr>\n",
-        "  <td style=\"text-align: center\">\n",
-        "    <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_phi3_deployment.ipynb\">\n",
-        "      <img alt=\"Google Cloud Colab Enterprise logo\" src=\"https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN\" width=\"32px\"><br> Run in Colab Enterprise\n",
-        "    </a>\n",
-        "  </td>\n",
-        "  <td style=\"text-align: center\">\n",
-        "    <a href=\"https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb\">\n",
-        "      <img alt=\"GitHub logo\" src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" width=\"32px\"><br> View on GitHub\n",
-        "    </a>\n",
-        "  </td>\n",
-        "</tr></tbody></table>"
+        "\u003ctable\u003e\u003ctbody\u003e\u003ctr\u003e\n",
+        "  \u003ctd style=\"text-align: center\"\u003e\n",
+        "    \u003ca href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_phi3_deployment.ipynb\"\u003e\n",
+        "      \u003cimg alt=\"Google Cloud Colab Enterprise logo\" src=\"https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN\" width=\"32px\"\u003e\u003cbr\u003e Run in Colab Enterprise\n",
+        "    \u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "  \u003ctd style=\"text-align: center\"\u003e\n",
+        "    \u003ca href=\"https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb\"\u003e\n",
+        "      \u003cimg alt=\"GitHub logo\" src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" width=\"32px\"\u003e\u003cbr\u003e View on GitHub\n",
+        "    \u003c/a\u003e\n",
+        "  \u003c/td\u003e\n",
+        "\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e"
       ]
     },
     {
@@ -103,7 +103,7 @@
         "REGION = \"\"  # @param {type:\"string\"}\n",
         "\n",
         "# Upgrade Vertex AI SDK.\n",
-        "! pip3 install --upgrade --quiet 'google-cloud-aiplatform>=1.64.0'\n",
+        "! pip3 install --upgrade --quiet 'google-cloud-aiplatform\u003e=1.64.0'\n",
         "\n",
         "# Import the necessary packages\n",
         "import datetime\n",
@@ -366,7 +366,7 @@
         "    use_dedicated_endpoint: bool = False,\n",
         "    max_num_seqs: int = 256,\n",
         "    model_type: str = None,\n",
-        ") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
+        ") -\u003e Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
         "    \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n",
         "    endpoint = aiplatform.Endpoint.create(\n",
         "        display_name=f\"{model_name}-endpoint\",\n",
@@ -551,7 +551,7 @@
         "# @markdown This section uploads prebuilt Phi-3 models to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model.\n",
         "\n",
         "# @markdown Select one of the four model variations.\n",
-        "MODEL_ID = \"Phi-3-mini-4k-instruct\"  # @param [\"Phi-3-mini-4k-instruct\", \"Phi-3-mini-128k-instruct\"] {isTemplate: true}\n",
+        "MODEL_ID = \"Phi-3-mini-4k-instruct\"  # @param [\"Phi-3-mini-4k-instruct\", \"Phi-3-mini-128k-instruct\", \"Phi-3-medium-4k-instruct\", \"Phi-3-medium-128k-instruct\"] {isTemplate: true}\n",
         "TPU_DEPLOYMENT_REGION = \"us-west1\"  # @param [\"us-west1\"] {isTemplate:true}\n",
         "model_id = os.path.join(MODEL_BUCKET, MODEL_ID)\n",
         "hf_model_id = \"microsoft/\" + MODEL_ID\n",
@@ -564,8 +564,10 @@
         "\n",
         "# @markdown | Model Version              | Default Max Model Length | Default TPU configuration |\n",
         "# @markdown |----------------------------|------------------|-----------------------------|\n",
-        "# @markdown | Phi-3-mini-4k-instruct     | 4096             | 1 TPU_V5e ct5lp-hightpu-1t |\n",
+        "# @markdown | Phi-3-mini-4k-instruct     | 4096             | 1 TPU_V5e ct5lp-hightpu-1t  |\n",
         "# @markdown | Phi-3-mini-128k-instruct   | 131072           | 4 TPU_V5e ct5lp-hightpu-4t  |\n",
+        "# @markdown | Phi-3-medium-4k-instruct   | 4096             | 4 TPU_V5e ct5lp-hightpu-4t  |\n",
+        "# @markdown | Phi-3-medium-128k-instruct | 131072           | 4 TPU_V5e ct5lp-hightpu-4t  |\n",
         "\n",
         "\n",
         "# Note: 1 TPU V5 chip has only one core.\n",
@@ -576,7 +578,12 @@
         "    tpu_topo = \"1x4\"\n",
         "    max_model_len = 4096\n",
         "    machine_type = \"ct5lp-hightpu-1t\"\n",
-        "elif \"mini-128k\" in MODEL_ID:\n",
+        "elif \"medium-4k\" in MODEL_ID:\n",
+        "    tpu_count = 4\n",
+        "    tpu_topo = \"4x4\"\n",
+        "    max_model_len = 4096\n",
+        "    machine_type = \"ct5lp-hightpu-4t\"\n",
+        "elif \"mini-128k\" or \"medium-128k\" in MODEL_ID:\n",
         "    tpu_count = 4\n",
         "    max_model_len = 131072\n",
         "    tpu_topo = \"4x4\"\n",
@@ -621,7 +628,7 @@
         "    min_replica_count: int = 1,\n",
         "    max_replica_count: int = 1,\n",
         "    use_dedicated_endpoint: bool = False,\n",
-        ") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
+        ") -\u003e Tuple[aiplatform.Model, aiplatform.Endpoint]:\n",
         "    \"\"\"Deploys models with Hex-LLM on TPU in Vertex AI.\"\"\"\n",
         "    if endpoint_id:\n",
         "        aip_endpoint_name = (\n",
@@ -684,7 +691,7 @@
         "    model.deploy(\n",
         "        endpoint=endpoint,\n",
         "        machine_type=machine_type,\n",
-        "        tpu_topology=tpu_topology if num_hosts > 1 else None,\n",
+        "        tpu_topology=tpu_topology if num_hosts \u003e 1 else None,\n",
         "        deploy_request_timeout=1800,\n",
         "        service_account=service_account,\n",
         "        min_replica_count=min_replica_count,\n",
@@ -728,8 +735,8 @@
         "# @markdown Example:\n",
         "\n",
         "# @markdown ```\n",
-        "# @markdown > What is a car?\n",
-        "# @markdown > A car is a four-wheeled vehicle designed for the transportation of passengers and their belongings.\n",
+        "# @markdown \u003e What is a car?\n",
+        "# @markdown \u003e A car is a four-wheeled vehicle designed for the transportation of passengers and their belongings.\n",
         "# @markdown ```\n",
         "\n",
         "# @markdown Additionally, you can moderate the generated text with Vertex AI. See [Moderate text documentation](https://cloud.google.com/natural-language/docs/moderating-text) for more details.\n",
@@ -813,6 +820,7 @@
   "metadata": {
     "colab": {
       "name": "model_garden_phi3_deployment.ipynb",
+      "provenance": [],
       "toc_visible": true
     },
     "kernelspec": {