Skip to content

Commit

Permalink
Add Gaudi2 Support to TGI Chart (#414)
Browse files Browse the repository at this point in the history
Signed-off-by: tylertitsworth <[email protected]>
  • Loading branch information
Tyler Titsworth authored Sep 26, 2024
1 parent 7bc90d1 commit 600e196
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 31 deletions.
2 changes: 1 addition & 1 deletion workflows/charts/tgi/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
version: 0.2.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
9 changes: 5 additions & 4 deletions workflows/charts/tgi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,24 @@ For more information about how to use Huggingface text-generation-inference with
> [!TIP]
> For Gaudi-related documentation, check out [tgi-gaudi](https://github.com/huggingface/tgi-gaudi).
![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square)
![Version: 0.2.0](https://img.shields.io/badge/Version-0.2.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.16.0](https://img.shields.io/badge/AppVersion-1.16.0-informational?style=flat-square)

## Values

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| deploy.configMapName | string | `"intel-proxy-config"` | ConfigMap of Environment Variables |
| deploy.configMap | object | `{"enabled":true,"name":"tgi-config"}` | ConfigMap of Environment Variables |
| deploy.image | string | `"ghcr.io/huggingface/text-generation-inference:latest-intel"` | Intel TGI Image |
| deploy.model | string | `"HuggingFaceTB/SmolLM-135M"` | Model to be loaded |
| deploy.quantize | string | `""` | Enable Quantization (ex: bitsandbytes-nf4) |
| deploy.replicaCount | int | `1` | Number of pods |
| deploy.resources | object | `{"limits":{"cpu":"4000m","gpu.intel.com/i915":1},"requests":{"cpu":"1000m","memory":"1Gi"}}` | Resource configuration |
| deploy.resources.limits."gpu.intel.com/i915" | int | `1` | Intel GPU Device Configuration |
| fullnameOverride | string | `""` | Full qualified Domain Name |
| ingress | object | `{"annotations":{},"className":"","enabled":false,"hosts":[{"host":"chart-example.local","paths":[{"path":"/","pathType":"ImplementationSpecific"}]}],"tls":[]}` | Ingress configuration |
| nameOverride | string | `""` | Name of the serving service |
| pvc.size | string | `"15Gi"` | |
| pvc.storageClassName | string | `"nil"` | |
| secret.encodedToken | string | `""` | Base64 Encoded Huggingface Hub API Token |
| securityContext | object | `{}` | Security Context Configuration |
| service | object | `{"port":80,"type":"NodePort"}` | Service configuration |

----------------------------------------------
Expand Down
1 change: 0 additions & 1 deletion workflows/charts/tgi/templates/NOTES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,5 @@
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "tgi.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
{{- end }}
48 changes: 28 additions & 20 deletions workflows/charts/tgi/templates/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,54 +28,62 @@ spec:
labels:
{{- include "tgi.selectorLabels" . | nindent 8 }}
spec:
securityContext:
fsGroup: 1000
runAsUser: 1000
hostIPC: true
containers:
- name: {{ .Chart.Name }}
args:
- '--model-id'
- {{ .Values.deploy.model | quote }}
{{- if index .Values.deploy.resources.limits "gpu.intel.com/i915" }}
- '--num-shard'
- {{ index .Values.deploy.resources.limits "gpu.intel.com/i915" | quote }}
{{- end }}
- '-p'
- {{ .Values.service.port | quote }}
{{- if .Values.quantize }}
- '--quantize'
- {{ .Values.deploy.quantize | quote }}
{{- end }}
- '--cuda-graphs=0'
envFrom:
{{- if eq .Values.deploy.configMap.enabled true }}
- configMapRef:
name: {{ .Values.deploy.configMapName }}
name: {{ .Values.deploy.configMap.name }}
{{- end }}
- secretRef:
name: {{ .Release.Name }}-hf-token
env:
- name: NUMBA_CACHE_DIR # https://github.com/huggingface/text-generation-inference/pull/2443
value: /data/numba_cache
# env:
# - name: NUMBA_CACHE_DIR # https://github.com/huggingface/text-generation-inference/pull/2443
# value: /data/numba_cache
image: {{ .Values.deploy.image }}
livenessProbe:
httpGet:
path: /health
port: {{ .Values.service.port }}
failureThreshold: 10
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
readinessProbe:
initialDelaySeconds: 5
periodSeconds: 5
tcpSocket:
port: http
startupProbe:
failureThreshold: 120
initialDelaySeconds: 20
periodSeconds: 5
tcpSocket:
port: http
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
resources:
{{- toYaml .Values.deploy.resources | nindent 12 }}
securityContext:
{{ toYaml .Values.securityContext | nindent 12 }}
volumeMounts:
- mountPath: /dev/shm
name: dshm
- mountPath: /data
name: hf-data
- mountPath: /tmp
name: tmp
volumes:
- name: dshm
emptyDir:
medium: Memory
- name: hf-data
persistentVolumeClaim:
claimName: {{ include "tgi.fullname" . }}-cache
- name: tmp
emptyDir: {}
29 changes: 29 additions & 0 deletions workflows/charts/tgi/templates/pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ include "tgi.fullname" . }}-cache
labels:
{{- include "tgi.labels" . | nindent 4 }}
spec:
{{- if .Values.pvc.storageClassName }}
storageClassName: {{ .Values.pvc.storageClassName }}
{{- end }}
accessModes:
- ReadWriteMany
resources:
requests:
storage: {{ .Values.pvc.size }}
1 change: 1 addition & 0 deletions workflows/charts/tgi/templates/secret.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

{{- $name := .Values.secret.encodedToken | required ".Values.secret.encodedToken is required in Base64 Format." -}}
---
apiVersion: v1
kind: Secret
metadata:
Expand Down
13 changes: 8 additions & 5 deletions workflows/charts/tgi/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@ nameOverride: ""
fullnameOverride: ""
deploy:
# -- ConfigMap of Environment Variables
configMapName: intel-proxy-config
configMap:
enabled: true
name: tgi-config
# -- Intel TGI Image
image: ghcr.io/huggingface/text-generation-inference:latest-intel
# -- Model to be loaded
model: HuggingFaceTB/SmolLM-135M
# -- Enable Quantization (ex: bitsandbytes-nf4)
quantize: ""
# -- Number of pods
replicaCount: 1
# -- Resource configuration
Expand All @@ -39,6 +37,8 @@ deploy:
requests:
cpu: 1000m
memory: "1Gi"
# -- Security Context Configuration
securityContext: {}
secret:
# -- Base64 Encoded Huggingface Hub API Token
encodedToken: ""
Expand All @@ -62,3 +62,6 @@ ingress:
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
pvc:
storageClassName: nil
size: 15Gi

0 comments on commit 600e196

Please sign in to comment.