Merge branch 'main' into libsodium-flags

opendatahub-io · Sep 6, 2024 · f5b7004 · f5b7004
2 parents fa8c8c2 + 341a6b5
commit f5b7004
Show file tree

Hide file tree

Showing 2 changed files with 293 additions and 1 deletion.
diff --git a/Dockerfile.rocm.ubi b/Dockerfile.rocm.ubi
@@ -0,0 +1,291 @@
+## Global Args #################################################################
+ARG BASE_UBI_IMAGE_TAG=9.4
+ARG PYTHON_VERSION=3.11
+# Default ROCm ARCHes to build vLLM for.
+ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
+
+## Base Layer ##################################################################
+FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as rocm-base
+
+# Max jobs for parallel build
+ARG MAX_JOBS=12
+
+ENV BUILD_TARGET='rocm'
+
+USER root
+
+ENV ROCM_VERSION=6.1.2
+
+# Set up ROCm repository and install necessary packages
+
+RUN echo "[amdgpu]" > /etc/yum.repos.d/amdgpu.repo && \
+echo "name=amdgpu" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "baseurl=https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/9.4/main/x86_64/" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "priority=50" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "[ROCm-${ROCM_VERSION}]" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "name=ROCm${ROCM_VERSION}" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "baseurl=https://repo.radeon.com/rocm/rhel9/${ROCM_VERSION}/main" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "priority=50" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo && \
+echo "gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
+
+RUN microdnf -y update && \
+    microdnf -y install rocm hipcc git which && \
+    microdnf clean all
+
+WORKDIR /workspace
+
+##################################################################################################
+
+FROM rocm-base as python-install
+ARG PYTHON_VERSION
+
+ENV VIRTUAL_ENV=/opt/vllm
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs \
+    python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
+    python${PYTHON_VERSION} -m venv $VIRTUAL_ENV --system-site-packages && \
+    $VIRTUAL_ENV/bin/pip install --no-cache -U pip wheel && \
+    microdnf clean all
+
+##################################################################################################
+
+FROM python-install as python-rocm-base
+
+# install common dependencies
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt,readonly \
+    --mount=type=bind,source=requirements-rocm.txt,target=requirements-rocm.txt,readonly \
+    pip install -r requirements-rocm.txt
+
+##################################################################################################
+
+FROM python-rocm-base as base
+
+# Set the application mount point
+ARG APP_MOUNT=/vllm-workspace
+WORKDIR ${APP_MOUNT}
+
+# Upgrade pip and remove unnecessary packages
+RUN python3 -m pip install --upgrade --no-cache-dir pip && \
+    microdnf -y remove sccache || true && \
+    python3 -m pip uninstall -y sccache || true && \
+    rm -f "$(which sccache)" && \
+    microdnf clean all && \
+    rm -rf /var/cache/yum /var/cache/dnf
+
+# Install torch == 2.5.0 on ROCm
+RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
+    *"rocm-6.1"*) \
+    python3 -m pip uninstall -y torch torchvision \
+    && python3 -m pip install --no-cache-dir --pre \
+    torch==2.5.0.dev20240726 \
+    torchvision==0.20.0.dev20240726 \
+    --index-url https://download.pytorch.org/whl/nightly/rocm6.1;; \
+    *) ;; esac
+
+# Set environment variables
+ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer
+ENV PATH=$PATH:/opt/rocm/bin:/libtorch/bin
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/libtorch/lib
+ENV CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/libtorch/include:/libtorch/include/torch/csrc/api/include:/opt/rocm/include
+ENV PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
+ENV CCACHE_DIR=/root/.cache/ccache
+
+##################################################################################################
+
+FROM base as build_base
+
+RUN python3 -m pip install --upgrade --no-cache-dir ninja cmake>=3.26
+
+##################################################################################################
+
+##################################################################################################
+
+### AMD-SMI build stage
+FROM build_base AS build_amdsmi
+
+# Build AMD SMI wheel
+RUN cd /opt/rocm/share/amd_smi && \
+    python3 -m pip wheel . --wheel-dir=/install
+
+##################################################################################################
+
+### Flash-Attention wheel build stage
+FROM build_base AS build_fa
+
+# Whether to install CK-based flash-attention
+ARG BUILD_FA="1"
+ARG TRY_FA_WHEEL="1"
+ARG FA_WHEEL_URL="https://github.com/ROCm/flash-attention/releases/download/v2.5.9post1-cktile-vllm/flash_attn-2.5.9.post1-cp39-cp39-linux_x86_64.whl"
+ARG FA_GFX_ARCHS="gfx90a;gfx942"
+ARG FA_BRANCH="23a2b1c2"
+
+# Ensure necessary tools are installed
+RUN microdnf install -y wget git && microdnf clean all
+
+# Build ROCm flash-attention wheel if `BUILD_FA` is set to 1
+RUN --mount=type=cache,target=${CCACHE_DIR} \
+    if [ "$BUILD_FA" = "1" ]; then \
+        if [ "$TRY_FA_WHEEL" = "1" ] && python3 -m pip install "${FA_WHEEL_URL}"; then \
+	    # If a suitable wheel exists, download it instead of building FA
+            mkdir -p /install && wget -N "${FA_WHEEL_URL}" -P /install; \
+        else \
+            mkdir -p /libs && \
+            cd /libs && \
+            git clone https://github.com/ROCm/flash-attention.git && \
+            cd flash-attention && \
+            git checkout "${FA_BRANCH}" && \
+            git submodule update --init && \
+            GPU_ARCHS="${FA_GFX_ARCHS}" python3 setup.py bdist_wheel --dist-dir=/install; \
+        fi; \
+    else \
+        # Create an empty directory otherwise as later build stages expect one
+        mkdir -p /install; \
+    fi
+
+##################################################################################################
+
+### Triton wheel build stage
+FROM build_base AS build_triton
+
+# Whether to build triton on rocm
+ARG BUILD_TRITON="1"
+ARG TRITON_BRANCH="e0fc12c"
+
+# Build triton wheel if `BUILD_TRITON` is set to 1
+RUN --mount=type=cache,target=${CCACHE_DIR} \
+    if [ "$BUILD_TRITON" = "1" ]; then \
+        mkdir -p /libs && cd /libs && \
+        git clone https://github.com/OpenAI/triton.git && \
+        cd triton && \
+        git checkout "${TRITON_BRANCH}" && \
+        cd python && \
+        python3 setup.py bdist_wheel --dist-dir=/install; \
+    else \
+        # Create an empty directory otherwise as later build stages expect one
+        mkdir -p /install; \
+    fi
+
+##################################################################################################
+
+### Final vLLM build stage
+FROM build_base AS final
+
+# Import the vLLM development directory from the build context
+COPY . .
+
+# Install wget only if it is needed
+RUN microdnf -y install wget && microdnf clean all
+
+# Package upgrades to avoid dependency issues and add functionality
+RUN --mount=type=cache,target=/root/.cache/pip \
+    python3 -m pip install --upgrade numba scipy huggingface-hub[cli] && \
+    microdnf clean all
+
+ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
+
+# Set environment variables for runtime
+ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
+# Silences the HF Tokenizers warning
+ENV TOKENIZERS_PARALLELISM=false
+
+# Install dependencies from requirements file and apply ROCm specific patches
+RUN --mount=type=cache,target=${CCACHE_DIR} \
+    --mount=type=cache,target=/root/.cache/pip \
+    python3 -m pip install -Ur requirements-rocm.txt && \
+    ROCM_VERSION=$(ls /opt | grep -Po 'rocm-[0-9]+\.[0-9]+') && \
+    case "$ROCM_VERSION" in \
+        "rocm-6.1") \
+            # Apply patch for ROCm 6.1
+            wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P /opt/rocm/lib && \
+            # Remove potentially conflicting HIP runtime from torch
+            rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* || true ;; \
+        *) \
+            echo "ROCm version $ROCM_VERSION is not supported for patching." ;; \
+    esac && \
+    python3 setup.py clean --all && \
+    python3 setup.py bdist_wheel --dist-dir=dist
+
+##################################################################################################
+
+FROM base AS vllm-openai
+
+WORKDIR /workspace
+
+# Set up the virtual environment and update PATH
+ENV VIRTUAL_ENV=/opt/vllm
+ENV PATH=$VIRTUAL_ENV/bin:$PATH
+
+# Install necessary build tools
+RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc && \
+    microdnf clean all
+
+# Copy amdsmi wheel into final image
+RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install \
+    mkdir -p libs \
+    && cp /install/*.whl libs \
+    # Preemptively uninstall to avoid same-version no-installs
+    && python3 -m pip uninstall -y amdsmi;
+
+# Copy triton wheel(s) into final image if they were built
+RUN --mount=type=bind,from=build_triton,src=/install,target=/install \
+    mkdir -p libs \
+    && if ls /install/*.whl; then \
+    cp /install/*.whl libs \
+    # Preemptively uninstall to avoid same-version no-installs
+    && python3 -m pip uninstall -y triton; fi
+
+# Copy flash-attn wheel(s) into final image if they were built
+RUN --mount=type=bind,from=build_fa,src=/install,target=/install \
+    mkdir -p libs \
+    && if ls /install/*.whl; then \
+    cp /install/*.whl libs \
+    # Preemptively uninstall to avoid same-version no-installs
+    && python3 -m pip uninstall -y flash-attn; fi
+
+# Copy vLLM wheel(s) into the final image
+RUN --mount=type=bind,from=final,src=/vllm-workspace/dist,target=/dist \
+    --mount=type=cache,target=/root/.cache/pip \
+    cp /dist/*.whl libs \
+    # Preemptively uninstall to avoid same-version no-installs
+    && python3 -m pip uninstall -y vllm
+
+# Install wheels that were built to the final image
+RUN --mount=type=cache,target=/root/.cache/pip \
+    if ls libs/*.whl; then \
+    python3 -m pip install libs/*.whl; fi
+
+# Environment variables for runtime configuration
+ENV HF_HUB_OFFLINE=1 \
+    PORT=8000 \
+    HOME=/home/vllm \
+    VLLM_USAGE_SOURCE=production-docker-image
+
+# Set up a non-root user for OpenShift
+RUN umask 002 && \
+    useradd --uid 2000 --gid 0 vllm && \
+    mkdir -p /licenses && \
+    chmod g+rwx $HOME /usr/src /workspace
+
+COPY LICENSE /licenses/vllm.md
+
+ENV HF_HUB_OFFLINE=1 \
+    HOME=/home/vllm \
+    # Allow requested max length to exceed what is extracted from the
+    # config.json
+    # see: https://github.com/vllm-project/vllm/pull/7080
+    VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
+    VLLM_USAGE_SOURCE=production-docker-image \
+    VLLM_WORKER_MULTIPROC_METHOD=fork \
+    VLLM_NO_USAGE_STATS=1
+
+# Switch to the non-root user
+USER 2000
+
+# Set the entrypoint
+ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
diff --git a/Dockerfile.ubi b/Dockerfile.ubi
@@ -8,7 +8,8 @@ ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
 FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
 ARG PYTHON_VERSION
 
-RUN microdnf install -y \
+RUN microdnf -y update && \
+    microdnf install -y \
     python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
     && microdnf clean all