forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into libsodium-flags
- Loading branch information
Showing
2 changed files
with
293 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,291 @@ | ||
## Global Args ################################################################# | ||
ARG BASE_UBI_IMAGE_TAG=9.4 | ||
ARG PYTHON_VERSION=3.11 | ||
# Default ROCm ARCHes to build vLLM for. | ||
ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100" | ||
|
||
## Base Layer ################################################################## | ||
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as rocm-base | ||
|
||
# Max jobs for parallel build | ||
ARG MAX_JOBS=12 | ||
|
||
ENV BUILD_TARGET='rocm' | ||
|
||
USER root | ||
|
||
ENV ROCM_VERSION=6.1.2 | ||
|
||
# Set up ROCm repository and install necessary packages | ||
|
||
RUN echo "[amdgpu]" > /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "name=amdgpu" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "baseurl=https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/9.4/main/x86_64/" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "priority=50" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "[ROCm-${ROCM_VERSION}]" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "name=ROCm${ROCM_VERSION}" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "baseurl=https://repo.radeon.com/rocm/rhel9/${ROCM_VERSION}/main" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "priority=50" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo && \ | ||
echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo | ||
|
||
RUN microdnf -y update && \ | ||
microdnf -y install rocm hipcc git which && \ | ||
microdnf clean all | ||
|
||
WORKDIR /workspace | ||
|
||
################################################################################################## | ||
|
||
FROM rocm-base as python-install | ||
ARG PYTHON_VERSION | ||
|
||
ENV VIRTUAL_ENV=/opt/vllm | ||
ENV PATH="$VIRTUAL_ENV/bin:$PATH" | ||
RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs \ | ||
python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \ | ||
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV --system-site-packages && \ | ||
$VIRTUAL_ENV/bin/pip install --no-cache -U pip wheel && \ | ||
microdnf clean all | ||
|
||
################################################################################################## | ||
|
||
FROM python-install as python-rocm-base | ||
|
||
# install common dependencies | ||
RUN --mount=type=cache,target=/root/.cache/pip \ | ||
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt,readonly \ | ||
--mount=type=bind,source=requirements-rocm.txt,target=requirements-rocm.txt,readonly \ | ||
pip install -r requirements-rocm.txt | ||
|
||
################################################################################################## | ||
|
||
FROM python-rocm-base as base | ||
|
||
# Set the application mount point | ||
ARG APP_MOUNT=/vllm-workspace | ||
WORKDIR ${APP_MOUNT} | ||
|
||
# Upgrade pip and remove unnecessary packages | ||
RUN python3 -m pip install --upgrade --no-cache-dir pip && \ | ||
microdnf -y remove sccache || true && \ | ||
python3 -m pip uninstall -y sccache || true && \ | ||
rm -f "$(which sccache)" && \ | ||
microdnf clean all && \ | ||
rm -rf /var/cache/yum /var/cache/dnf | ||
|
||
# Install torch == 2.5.0 on ROCm | ||
RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \ | ||
*"rocm-6.1"*) \ | ||
python3 -m pip uninstall -y torch torchvision \ | ||
&& python3 -m pip install --no-cache-dir --pre \ | ||
torch==2.5.0.dev20240726 \ | ||
torchvision==0.20.0.dev20240726 \ | ||
--index-url https://download.pytorch.org/whl/nightly/rocm6.1;; \ | ||
*) ;; esac | ||
|
||
# Set environment variables | ||
ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer | ||
ENV PATH=$PATH:/opt/rocm/bin:/libtorch/bin | ||
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/libtorch/lib | ||
ENV CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/libtorch/include:/libtorch/include/torch/csrc/api/include:/opt/rocm/include | ||
ENV PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100" | ||
ENV CCACHE_DIR=/root/.cache/ccache | ||
|
||
################################################################################################## | ||
|
||
FROM base as build_base | ||
|
||
RUN python3 -m pip install --upgrade --no-cache-dir ninja cmake>=3.26 | ||
|
||
################################################################################################## | ||
|
||
################################################################################################## | ||
|
||
### AMD-SMI build stage | ||
FROM build_base AS build_amdsmi | ||
|
||
# Build AMD SMI wheel | ||
RUN cd /opt/rocm/share/amd_smi && \ | ||
python3 -m pip wheel . --wheel-dir=/install | ||
|
||
################################################################################################## | ||
|
||
### Flash-Attention wheel build stage | ||
FROM build_base AS build_fa | ||
|
||
# Whether to install CK-based flash-attention | ||
ARG BUILD_FA="1" | ||
ARG TRY_FA_WHEEL="1" | ||
ARG FA_WHEEL_URL="https://github.com/ROCm/flash-attention/releases/download/v2.5.9post1-cktile-vllm/flash_attn-2.5.9.post1-cp39-cp39-linux_x86_64.whl" | ||
ARG FA_GFX_ARCHS="gfx90a;gfx942" | ||
ARG FA_BRANCH="23a2b1c2" | ||
|
||
# Ensure necessary tools are installed | ||
RUN microdnf install -y wget git && microdnf clean all | ||
|
||
# Build ROCm flash-attention wheel if `BUILD_FA` is set to 1 | ||
RUN --mount=type=cache,target=${CCACHE_DIR} \ | ||
if [ "$BUILD_FA" = "1" ]; then \ | ||
if [ "$TRY_FA_WHEEL" = "1" ] && python3 -m pip install "${FA_WHEEL_URL}"; then \ | ||
# If a suitable wheel exists, download it instead of building FA | ||
mkdir -p /install && wget -N "${FA_WHEEL_URL}" -P /install; \ | ||
else \ | ||
mkdir -p /libs && \ | ||
cd /libs && \ | ||
git clone https://github.com/ROCm/flash-attention.git && \ | ||
cd flash-attention && \ | ||
git checkout "${FA_BRANCH}" && \ | ||
git submodule update --init && \ | ||
GPU_ARCHS="${FA_GFX_ARCHS}" python3 setup.py bdist_wheel --dist-dir=/install; \ | ||
fi; \ | ||
else \ | ||
# Create an empty directory otherwise as later build stages expect one | ||
mkdir -p /install; \ | ||
fi | ||
|
||
################################################################################################## | ||
|
||
### Triton wheel build stage | ||
FROM build_base AS build_triton | ||
|
||
# Whether to build triton on rocm | ||
ARG BUILD_TRITON="1" | ||
ARG TRITON_BRANCH="e0fc12c" | ||
|
||
# Build triton wheel if `BUILD_TRITON` is set to 1 | ||
RUN --mount=type=cache,target=${CCACHE_DIR} \ | ||
if [ "$BUILD_TRITON" = "1" ]; then \ | ||
mkdir -p /libs && cd /libs && \ | ||
git clone https://github.com/OpenAI/triton.git && \ | ||
cd triton && \ | ||
git checkout "${TRITON_BRANCH}" && \ | ||
cd python && \ | ||
python3 setup.py bdist_wheel --dist-dir=/install; \ | ||
else \ | ||
# Create an empty directory otherwise as later build stages expect one | ||
mkdir -p /install; \ | ||
fi | ||
|
||
################################################################################################## | ||
|
||
### Final vLLM build stage | ||
FROM build_base AS final | ||
|
||
# Import the vLLM development directory from the build context | ||
COPY . . | ||
|
||
# Install wget only if it is needed | ||
RUN microdnf -y install wget && microdnf clean all | ||
|
||
# Package upgrades to avoid dependency issues and add functionality | ||
RUN --mount=type=cache,target=/root/.cache/pip \ | ||
python3 -m pip install --upgrade numba scipy huggingface-hub[cli] && \ | ||
microdnf clean all | ||
|
||
ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100" | ||
|
||
# Set environment variables for runtime | ||
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 | ||
# Silences the HF Tokenizers warning | ||
ENV TOKENIZERS_PARALLELISM=false | ||
|
||
# Install dependencies from requirements file and apply ROCm specific patches | ||
RUN --mount=type=cache,target=${CCACHE_DIR} \ | ||
--mount=type=cache,target=/root/.cache/pip \ | ||
python3 -m pip install -Ur requirements-rocm.txt && \ | ||
ROCM_VERSION=$(ls /opt | grep -Po 'rocm-[0-9]+\.[0-9]+') && \ | ||
case "$ROCM_VERSION" in \ | ||
"rocm-6.1") \ | ||
# Apply patch for ROCm 6.1 | ||
wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P /opt/rocm/lib && \ | ||
# Remove potentially conflicting HIP runtime from torch | ||
rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* || true ;; \ | ||
*) \ | ||
echo "ROCm version $ROCM_VERSION is not supported for patching." ;; \ | ||
esac && \ | ||
python3 setup.py clean --all && \ | ||
python3 setup.py bdist_wheel --dist-dir=dist | ||
|
||
################################################################################################## | ||
|
||
FROM base AS vllm-openai | ||
|
||
WORKDIR /workspace | ||
|
||
# Set up the virtual environment and update PATH | ||
ENV VIRTUAL_ENV=/opt/vllm | ||
ENV PATH=$VIRTUAL_ENV/bin:$PATH | ||
|
||
# Install necessary build tools | ||
RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc && \ | ||
microdnf clean all | ||
|
||
# Copy amdsmi wheel into final image | ||
RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install \ | ||
mkdir -p libs \ | ||
&& cp /install/*.whl libs \ | ||
# Preemptively uninstall to avoid same-version no-installs | ||
&& python3 -m pip uninstall -y amdsmi; | ||
|
||
# Copy triton wheel(s) into final image if they were built | ||
RUN --mount=type=bind,from=build_triton,src=/install,target=/install \ | ||
mkdir -p libs \ | ||
&& if ls /install/*.whl; then \ | ||
cp /install/*.whl libs \ | ||
# Preemptively uninstall to avoid same-version no-installs | ||
&& python3 -m pip uninstall -y triton; fi | ||
|
||
# Copy flash-attn wheel(s) into final image if they were built | ||
RUN --mount=type=bind,from=build_fa,src=/install,target=/install \ | ||
mkdir -p libs \ | ||
&& if ls /install/*.whl; then \ | ||
cp /install/*.whl libs \ | ||
# Preemptively uninstall to avoid same-version no-installs | ||
&& python3 -m pip uninstall -y flash-attn; fi | ||
|
||
# Copy vLLM wheel(s) into the final image | ||
RUN --mount=type=bind,from=final,src=/vllm-workspace/dist,target=/dist \ | ||
--mount=type=cache,target=/root/.cache/pip \ | ||
cp /dist/*.whl libs \ | ||
# Preemptively uninstall to avoid same-version no-installs | ||
&& python3 -m pip uninstall -y vllm | ||
|
||
# Install wheels that were built to the final image | ||
RUN --mount=type=cache,target=/root/.cache/pip \ | ||
if ls libs/*.whl; then \ | ||
python3 -m pip install libs/*.whl; fi | ||
|
||
# Environment variables for runtime configuration | ||
ENV HF_HUB_OFFLINE=1 \ | ||
PORT=8000 \ | ||
HOME=/home/vllm \ | ||
VLLM_USAGE_SOURCE=production-docker-image | ||
|
||
# Set up a non-root user for OpenShift | ||
RUN umask 002 && \ | ||
useradd --uid 2000 --gid 0 vllm && \ | ||
mkdir -p /licenses && \ | ||
chmod g+rwx $HOME /usr/src /workspace | ||
|
||
COPY LICENSE /licenses/vllm.md | ||
|
||
ENV HF_HUB_OFFLINE=1 \ | ||
HOME=/home/vllm \ | ||
# Allow requested max length to exceed what is extracted from the | ||
# config.json | ||
# see: https://github.com/vllm-project/vllm/pull/7080 | ||
VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \ | ||
VLLM_USAGE_SOURCE=production-docker-image \ | ||
VLLM_WORKER_MULTIPROC_METHOD=fork \ | ||
VLLM_NO_USAGE_STATS=1 | ||
|
||
# Switch to the non-root user | ||
USER 2000 | ||
|
||
# Set the entrypoint | ||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters