Skip to content

Parquet dataloader with fairseq2 primitives (#162) #4

Parquet dataloader with fairseq2 primitives (#162)

Parquet dataloader with fairseq2 primitives (#162) #4

NllbTokenizer Copyright (c) Meta Platforms, Inc. and affiliates.

Check failure on line 1 in .github/workflows/_build_wheel-linux.yaml

View workflow run for this annotation

GitHub Actions / .github/workflows/_build_wheel-linux.yaml

Invalid workflow file

You have an error in your yaml syntax on line 1
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
on:
workflow_call:
inputs:
torch:
type: string
required: true
py:
type: string
required: true
variant:
type: string
required: true
arch:
type: string
default: 'x86_64'
sanitizers:
type: string
default: 'nosan'
artifacts_name:
type: string
default: 'wheels'
version_override:
type: string
default: ''
label_version_with_variant:
type: boolean
default: true
run_integration_tests:
type: boolean
default: false
run_on_device:
type: boolean
default: false
jobs:
build:
name: Build
runs-on:
labels: 4-core-ubuntu
container:
image: ghcr.io/facebookresearch/fairseq2-ci-manylinux_${{ inputs.arch }}:2-${{ inputs.variant }}
defaults:
run:
shell: bash
steps:
- name: Check-out the repository
uses: actions/checkout@v3
with:
submodules: recursive
- name: Create the Python virtual environment
run: |
python${{ inputs.py }} -m venv ~/venv
echo ~/venv/bin >> "$GITHUB_PATH"
- name: Install PyTorch
run: |
pip install --extra-index-url https://download.pytorch.org/whl/${{ inputs.variant }}\
torch==${{ inputs.torch }}
- name: Install requirements
run: |
pip install --requirement fairseq2n/python/requirements-build.txt
- name: Override project version
if: inputs.version_override
run: |
tools/set-project-version.sh ${{ inputs.version_override }}
- name: Set build variant as local version label
if: inputs.label_version_with_variant
env:
VARIANT: ${{ inputs.variant }}
run: |
version=$(cat VERSION)
# If the version has already a local label, append the variant.
if [[ $version == *+* ]]; then
tools/set-project-version.sh $version.$VARIANT
else
tools/set-project-version.sh $version+$VARIANT
fi
- name: Build fairseq2n
working-directory: fairseq2n
env:
VARIANT: ${{ inputs.variant }}
SANITIZERS: ${{ inputs.sanitizers }}
run: |
# We build our CUDA kernels for Volta and Ampere.
if [[ $VARIANT == cu* ]]; then
cuda_archs="70-real;80-real;80-virtual"
cuda=ON
else
cuda=OFF
fi
# Since they do not play well together, we perform LTO only if no
# sanitizer is enabled.
if [[ $SANITIZERS == nosan ]]; then
build_type=Release
lto=ON
else
build_type=Debug
lto=OFF
fi
cmake\
-GNinja\
-DCMAKE_BUILD_TYPE=$build_type\
-DCMAKE_CUDA_ARCHITECTURES=$cuda_archs\
-DFAIRSEQ2N_PERFORM_LTO=$lto\
-DFAIRSEQ2N_SANITIZERS="${SANITIZERS/_/;}"\
-DFAIRSEQ2N_TREAT_WARNINGS_AS_ERRORS=ON\
-DFAIRSEQ2N_USE_CUDA=$cuda\
-DFAIRSEQ2N_PYTHON_DEVEL=OFF\
-B build
cmake --build build
- name: Package fairseq2n
working-directory: fairseq2n/python
run: |
pip wheel\
--use-pep517\
--no-build-isolation\
--no-deps\
--config-settings "--build-option=--plat-name"\
--config-settings "--build-option=manylinux2014_${{ inputs.arch }}"\
--wheel-dir build/wheelhouse\
.
- name: Package fairseq2
run: |
pip wheel --no-deps --wheel-dir build/wheelhouse .
- name: Upload wheels and native tests to staging
uses: actions/upload-artifact@v3
with:
name: ${{ inputs.artifacts_name }}-pt${{ inputs.torch }}-py${{ inputs.py }}-linux_${{ inputs.arch }}-${{ inputs.variant }}-${{ inputs.sanitizers }}
path: |
build/wheelhouse/*.whl
fairseq2n/build/tests/run-tests
fairseq2n/python/build/wheelhouse/*.whl
retention-days: 1
test:
name: Test
needs: build
runs-on:
labels: 4-core-ubuntu
container:
image: ghcr.io/facebookresearch/fairseq2-ci-manylinux_${{ inputs.arch }}:2-${{ inputs.variant }}
defaults:
run:
shell: bash
steps:
- name: Download wheels and native tests from staging
uses: actions/download-artifact@v3
with:
name: ${{ inputs.artifacts_name }}-pt${{ inputs.torch }}-py${{ inputs.py }}-linux_${{ inputs.arch }}-${{ inputs.variant }}-${{ inputs.sanitizers }}
path: ~/artifacts/
- name: Check-out the repository
uses: actions/checkout@v3
- name: Create the Python virtual environment
run: |
python${{ inputs.py }} -m venv ~/venv
echo ~/venv/bin >> "$GITHUB_PATH"
- name: Install PyTorch
run: |
pip install --extra-index-url https://download.pytorch.org/whl/${{ inputs.variant }}\
torch==${{ inputs.torch }}
- name: Install requirements
env:
ARCH: ${{ inputs.arch }}
run: |
pip install --requirement requirements-devel.txt
if [[ $ARCH == x86_64 ]]; then
# Our Docker images have glibc 2.17, but tbb versions later than
# 2021.9 are built against a newer glibc.
pip install tbb==2021.8
fi
- name: Install fairseq2n
run: |
pip install --no-cache-dir ~/artifacts/fairseq2n/python/build/wheelhouse/*.whl
- name: Install fairseq2
run: |
for whl in ~/artifacts/build/wheelhouse/*.whl; do
pip install --no-cache-dir $whl
pip install --no-cache-dir "fairseq['arrow']@$whl"
done
- name: Set the sanitizer variables
if: inputs.sanitizers != 'nosan'
env:
SANITIZERS: ${{ inputs.sanitizers }}
run: |
{
# We can't enable container overflow checks due to false positives
# in non-instrumented dependencies.
echo ASAN_OPTIONS=detect_container_overflow=0
echo LSAN_OPTIONS=suppressions=fairseq2n/LSan.supp,exitcode=0,log_path=$HOME/lsan.out
for sanitizer in ${SANITIZERS//_/ }; do
# LIBASAN and LIBTSAN environment variables are defined in the
# container image.
case $sanitizer in
asan)
echo SANITIZER_LIB=$(echo $LIBASAN)
break
;;
tsan)
echo SANITIZER_LIB=$(echo $LIBTSAN)
break
;;
esac
done
} >> "$GITHUB_ENV"
- name: Run native tests
run: |
chmod 755 ~/artifacts/fairseq2n/build/tests/run-tests
site_packages=~/venv/lib/python${{ inputs.py }}/site-packages
LD_LIBRARY_PATH=~/venv/lib:$site_packages/torch/lib:$site_packages/fairseq2n/lib:$LD_LIBRARY_PATH\
~/artifacts/fairseq2n/build/tests/run-tests
- name: Run Python tests
env:
RUN_INTEGRATION_TESTS: ${{ inputs.run_integration_tests }}
RUN_ON_DEVICE: ${{ inputs.run_on_device }}
run: |
if [[ $RUN_INTEGRATION_TESTS == true ]]; then
integration=--integration
fi
if [[ $RUN_ON_DEVICE != true ]]; then
dev=cpu
else
dev=cuda:0
fi
LD_PRELOAD=${SANITIZER_LIB} pytest -rP --device $dev $integration --verbose
- name: Check the output of the leak sanitizer
if: env.LSAN_OPTIONS != ''
run: |
# Unfortunately Python leaks quite a bit of memory, so we cannot rely
# on the raw output of LSan. As a rudimentary workaround, we check if
# any stack frame has a symbol containing the word 'fairseq2'.
for f in ~/lsan.out.*; do
cat $f
if grep --silent 'fairseq2' $f; then
exit 1
fi
done