diff --git a/.github/workflows/build-oneflow.yml b/.github/workflows/build-oneflow.yml new file mode 100644 index 000000000..a36b8a163 --- /dev/null +++ b/.github/workflows/build-oneflow.yml @@ -0,0 +1,113 @@ +name: Build (OneFlow) + +on: + workflow_dispatch: + inputs: + useCache: + description: Use GHA cache + type: boolean + required: false + default: true + push: + branches-ignore: + - "update-dependencies-pr" + paths: + - ".github/workflows/**" + - "docker/**" + - "*.sh" + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-v2 + cancel-in-progress: true + +env: + REGION_ID: cn-beijing + ACR_REGISTRY: registry.cn-beijing.aliyuncs.com + ACR_NAMESPACE: oneflow + DOCKER_HUB_NAMESPACE: oneflowinc + +jobs: + build_manylinux: + name: ${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }} + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + include: + - tag-suffix: "cuda12.2" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.2.2-cudnn8-devel-centos7" + - tag-suffix: "cuda12.1" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:12.1.1-cudnn8-devel-centos7" + - tag-suffix: "cuda11.8" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "nvidia/cuda:11.8.0-cudnn8-devel-centos7" + - tag-suffix: "cpu" + policy: "manylinux2014" + platform: "x86_64" + CUDA_BASE_IMAGE: "" + + env: + POLICY: ${{ matrix.policy }} + PLATFORM: ${{ matrix.platform }} + COMMIT_SHA: ${{ github.sha }} + DOCKER_REPO: "${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }}" + TEST_TAG: ${{ matrix.policy }}_${{ matrix.platform }}_${{ matrix.tag-suffix }}:${{ github.sha }} + CUDA_BASE_IMAGE: ${{ matrix.CUDA_BASE_IMAGE }} + + steps: + - name: Checkout + uses: actions/checkout@v3 + # 1.1 Login to ACR + - name: Login to ACR with the AccessKey pair + uses: aliyun/acr-login@v1 + with: + login-server: https://registry.${{env.REGION_ID}}.aliyuncs.com + username: "${{ secrets.ACR_USERNAME }}" + password: "${{ secrets.ACR_PASSWORD }}" + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up emulation + if: matrix.platform != 'i686' && matrix.platform != 'x86_64' + uses: docker/setup-qemu-action@v2 + with: + platforms: ${{ matrix.platform }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Build + run: ./build.sh + - name: Build and push + uses: docker/build-push-action@v2 + with: + push: true + tags: | + ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + ${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:${{ env.COMMIT_SHA }} + ${{ env.ACR_REGISTRY }}/${{ env.ACR_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + ${{ env.ACR_REGISTRY }}/${{ env.ACR_NAMESPACE }}/${{ env.DOCKER_REPO }}:${{ env.COMMIT_SHA }} + cache-from: type=registry,ref=${{ env.DOCKER_HUB_NAMESPACE }}/${{ env.DOCKER_REPO }}:latest + cache-to: type=inline + context: ./docker/ + build-args: | + POLICY + PLATFORM + BASEIMAGE + DEVTOOLSET_ROOTPATH + PREPEND_PATH + LD_LIBRARY_PATH_ARG + + all_passed: + needs: [build_manylinux] + runs-on: ubuntu-latest + steps: + - run: echo "All jobs passed" diff --git a/.gitignore b/.gitignore index a6a86cf99..dc2ec39a5 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,4 @@ docker/sources # buildx cache .buildx-cache-*/ +.vscode/ diff --git a/build.sh b/build.sh index aed53fe52..b2c2c2f49 100755 --- a/build.sh +++ b/build.sh @@ -32,7 +32,8 @@ if [ "${POLICY}" == "manylinux2014" ]; then if [ "${PLATFORM}" == "s390x" ]; then BASEIMAGE="s390x/clefos:7" else - BASEIMAGE="${MULTIARCH_PREFIX}centos:7" + DEFAULT_BASEIMAGE="${MULTIARCH_PREFIX}centos:7" + BASEIMAGE="${CUDA_BASE_IMAGE:-${DEFAULT_BASEIMAGE}}" fi DEVTOOLSET_ROOTPATH="/opt/rh/devtoolset-10/root" PREPEND_PATH="${DEVTOOLSET_ROOTPATH}/usr/bin:" @@ -84,31 +85,18 @@ fi if [ "${MANYLINUX_BUILD_FRONTEND}" == "docker" ]; then docker build ${BUILD_ARGS_COMMON} elif [ "${MANYLINUX_BUILD_FRONTEND}" == "docker-buildx" ]; then - docker buildx build \ - --load \ - --cache-from=type=local,src=$(pwd)/.buildx-cache-${POLICY}_${PLATFORM} \ - --cache-to=type=local,dest=$(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM} \ - ${BUILD_ARGS_COMMON} + env elif [ "${MANYLINUX_BUILD_FRONTEND}" == "buildkit" ]; then - buildctl build \ - --frontend=dockerfile.v0 \ - --local context=./docker/ \ - --local dockerfile=./docker/ \ - --import-cache type=local,src=$(pwd)/.buildx-cache-${POLICY}_${PLATFORM} \ - --export-cache type=local,dest=$(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM} \ - --opt build-arg:POLICY=${POLICY} --opt build-arg:PLATFORM=${PLATFORM} --opt build-arg:BASEIMAGE=${BASEIMAGE} \ - --opt "build-arg:DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}" --opt "build-arg:PREPEND_PATH=${PREPEND_PATH}" --opt "build-arg:LD_LIBRARY_PATH_ARG=${LD_LIBRARY_PATH_ARG}" \ - --output type=docker,name=quay.io/pypa/${POLICY}_${PLATFORM}:${COMMIT_SHA} | docker load + echo "Unsupported build frontend: buildkit" + exit 1 else echo "Unsupported build frontend: '${MANYLINUX_BUILD_FRONTEND}'" exit 1 fi -docker run --rm -v $(pwd)/tests:/tests:ro quay.io/pypa/${POLICY}_${PLATFORM}:${COMMIT_SHA} /tests/run_tests.sh - -if [ "${MANYLINUX_BUILD_FRONTEND}" != "docker" ]; then - if [ -d $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} ]; then - rm -rf $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} - fi - mv $(pwd)/.buildx-cache-staging-${POLICY}_${PLATFORM} $(pwd)/.buildx-cache-${POLICY}_${PLATFORM} -fi +echo "POLICY=${POLICY}" >> $GITHUB_ENV +echo "PLATFORM=${PLATFORM}" >> $GITHUB_ENV +echo "BASEIMAGE=${BASEIMAGE}" >> $GITHUB_ENV +echo "DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}" >> $GITHUB_ENV +echo "PREPEND_PATH=${PREPEND_PATH}" >> $GITHUB_ENV +echo "LD_LIBRARY_PATH_ARG=${LD_LIBRARY_PATH_ARG}" >> $GITHUB_ENV diff --git a/docker/Dockerfile b/docker/Dockerfile index c3e64a63e..0b6eee303 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -168,4 +168,13 @@ RUN --mount=type=bind,target=/all_cpython,from=all_cpython \ ENV SSL_CERT_FILE=/opt/_internal/certs.pem +ARG BAZEL_URL="https://github.com/bazelbuild/bazel/releases/download/3.4.1/bazel-3.4.1-linux-x86_64" +RUN curl -L $BAZEL_URL -o /usr/local/bin/bazel \ + && chmod +x /usr/local/bin/bazel +RUN yum install -y wget nasm rdma-core-devel rsync gdb ninja-build openblas-static devtoolset-7-gcc* vim ccache htop fuse-devel +RUN yum install -y devtoolset-10-libasan-devel devtoolset-10-libubsan-devel devtoolset-10-libtsan-devel + +RUN wget https://github.com/Oneflow-Inc/llvm-project/releases/download/maybe-14.0.4/clang-tidy-14.AppImage -O /usr/local/bin/clangd && \ + chmod +x /usr/local/bin/clangd + CMD ["/bin/bash"] diff --git a/docker/build_scripts/install-runtime-packages.sh b/docker/build_scripts/install-runtime-packages.sh index 469fd5635..92952701d 100755 --- a/docker/build_scripts/install-runtime-packages.sh +++ b/docker/build_scripts/install-runtime-packages.sh @@ -34,6 +34,14 @@ source $MY_DIR/build_utils.sh # MANYLINUX_DEPS: Install development packages (except for libgcc which is provided by gcc install) if [ "${AUDITWHEEL_POLICY}" == "manylinux2014" ] || [ "${AUDITWHEEL_POLICY}" == "manylinux_2_28" ]; then MANYLINUX_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel zlib-devel expat-devel" + yum -y install yum-versionlock + yum versionlock add libcudnn8-devel + yum versionlock add libcudnn8 + yum versionlock add cuda-* + yum versionlock add libnccl + yum versionlock add libnccl-devel + yum versionlock list libcudnn8-devel + yum versionlock list libcudnn8 elif [ "${BASE_POLICY}" == "musllinux" ]; then MANYLINUX_DEPS="musl-dev libstdc++ glib-dev libx11-dev libxext-dev libxrender-dev mesa-dev libice-dev libsm-dev zlib-dev expat-dev" else