From 00c35f49fa4a0560486d3bb7868378544891adcb Mon Sep 17 00:00:00 2001 From: Vertex MG Team Date: Fri, 18 Oct 2024 09:55:15 -0700 Subject: [PATCH] Adding Phi-3-medium TPU configs to Colab deployment notebook PiperOrigin-RevId: 687337803 --- .../model_garden_phi3_deployment.ipynb | 50 +++++++++++-------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb b/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb index e604ce7ae1..82a6bbbcc7 100644 --- a/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb +++ b/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb @@ -32,18 +32,18 @@ "source": [ "# Vertex AI Model Garden - Phi-3 (Deployment)\n", "\n", - "\n", - " \n", - " \n", - "
\n", - " \n", - " \"Google
Run in Colab Enterprise\n", - "
\n", - "
\n", - " \n", - " \"GitHub
View on GitHub\n", - "
\n", - "
" + "\u003ctable\u003e\u003ctbody\u003e\u003ctr\u003e\n", + " \u003ctd style=\"text-align: center\"\u003e\n", + " \u003ca href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_phi3_deployment.ipynb\"\u003e\n", + " \u003cimg alt=\"Google Cloud Colab Enterprise logo\" src=\"https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN\" width=\"32px\"\u003e\u003cbr\u003e Run in Colab Enterprise\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd style=\"text-align: center\"\u003e\n", + " \u003ca href=\"https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_phi3_deployment.ipynb\"\u003e\n", + " \u003cimg alt=\"GitHub logo\" src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" width=\"32px\"\u003e\u003cbr\u003e View on GitHub\n", + " \u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/tr\u003e\u003c/tbody\u003e\u003c/table\u003e" ] }, { @@ -103,7 +103,7 @@ "REGION = \"\" # @param {type:\"string\"}\n", "\n", "# Upgrade Vertex AI SDK.\n", - "! pip3 install --upgrade --quiet 'google-cloud-aiplatform>=1.64.0'\n", + "! pip3 install --upgrade --quiet 'google-cloud-aiplatform\u003e=1.64.0'\n", "\n", "# Import the necessary packages\n", "import datetime\n", @@ -366,7 +366,7 @@ " use_dedicated_endpoint: bool = False,\n", " max_num_seqs: int = 256,\n", " model_type: str = None,\n", - ") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n", + ") -\u003e Tuple[aiplatform.Model, aiplatform.Endpoint]:\n", " \"\"\"Deploys trained models with vLLM into Vertex AI.\"\"\"\n", " endpoint = aiplatform.Endpoint.create(\n", " display_name=f\"{model_name}-endpoint\",\n", @@ -551,7 +551,7 @@ "# @markdown This section uploads prebuilt Phi-3 models to Model Registry and deploys it to a Vertex AI Endpoint. It takes 15 minutes to 1 hour to finish depending on the size of the model.\n", "\n", "# @markdown Select one of the four model variations.\n", - "MODEL_ID = \"Phi-3-mini-4k-instruct\" # @param [\"Phi-3-mini-4k-instruct\", \"Phi-3-mini-128k-instruct\"] {isTemplate: true}\n", + "MODEL_ID = \"Phi-3-mini-4k-instruct\" # @param [\"Phi-3-mini-4k-instruct\", \"Phi-3-mini-128k-instruct\", \"Phi-3-medium-4k-instruct\", \"Phi-3-medium-128k-instruct\"] {isTemplate: true}\n", "TPU_DEPLOYMENT_REGION = \"us-west1\" # @param [\"us-west1\"] {isTemplate:true}\n", "model_id = os.path.join(MODEL_BUCKET, MODEL_ID)\n", "hf_model_id = \"microsoft/\" + MODEL_ID\n", @@ -564,8 +564,10 @@ "\n", "# @markdown | Model Version | Default Max Model Length | Default TPU configuration |\n", "# @markdown |----------------------------|------------------|-----------------------------|\n", - "# @markdown | Phi-3-mini-4k-instruct | 4096 | 1 TPU_V5e ct5lp-hightpu-1t |\n", + "# @markdown | Phi-3-mini-4k-instruct | 4096 | 1 TPU_V5e ct5lp-hightpu-1t |\n", "# @markdown | Phi-3-mini-128k-instruct | 131072 | 4 TPU_V5e ct5lp-hightpu-4t |\n", + "# @markdown | Phi-3-medium-4k-instruct | 4096 | 4 TPU_V5e ct5lp-hightpu-4t |\n", + "# @markdown | Phi-3-medium-128k-instruct | 131072 | 4 TPU_V5e ct5lp-hightpu-4t |\n", "\n", "\n", "# Note: 1 TPU V5 chip has only one core.\n", @@ -576,7 +578,12 @@ " tpu_topo = \"1x4\"\n", " max_model_len = 4096\n", " machine_type = \"ct5lp-hightpu-1t\"\n", - "elif \"mini-128k\" in MODEL_ID:\n", + "elif \"medium-4k\" in MODEL_ID:\n", + " tpu_count = 4\n", + " tpu_topo = \"4x4\"\n", + " max_model_len = 4096\n", + " machine_type = \"ct5lp-hightpu-4t\"\n", + "elif \"mini-128k\" or \"medium-128k\" in MODEL_ID:\n", " tpu_count = 4\n", " max_model_len = 131072\n", " tpu_topo = \"4x4\"\n", @@ -621,7 +628,7 @@ " min_replica_count: int = 1,\n", " max_replica_count: int = 1,\n", " use_dedicated_endpoint: bool = False,\n", - ") -> Tuple[aiplatform.Model, aiplatform.Endpoint]:\n", + ") -\u003e Tuple[aiplatform.Model, aiplatform.Endpoint]:\n", " \"\"\"Deploys models with Hex-LLM on TPU in Vertex AI.\"\"\"\n", " if endpoint_id:\n", " aip_endpoint_name = (\n", @@ -684,7 +691,7 @@ " model.deploy(\n", " endpoint=endpoint,\n", " machine_type=machine_type,\n", - " tpu_topology=tpu_topology if num_hosts > 1 else None,\n", + " tpu_topology=tpu_topology if num_hosts \u003e 1 else None,\n", " deploy_request_timeout=1800,\n", " service_account=service_account,\n", " min_replica_count=min_replica_count,\n", @@ -728,8 +735,8 @@ "# @markdown Example:\n", "\n", "# @markdown ```\n", - "# @markdown > What is a car?\n", - "# @markdown > A car is a four-wheeled vehicle designed for the transportation of passengers and their belongings.\n", + "# @markdown \u003e What is a car?\n", + "# @markdown \u003e A car is a four-wheeled vehicle designed for the transportation of passengers and their belongings.\n", "# @markdown ```\n", "\n", "# @markdown Additionally, you can moderate the generated text with Vertex AI. See [Moderate text documentation](https://cloud.google.com/natural-language/docs/moderating-text) for more details.\n", @@ -813,6 +820,7 @@ "metadata": { "colab": { "name": "model_garden_phi3_deployment.ipynb", + "provenance": [], "toc_visible": true }, "kernelspec": {