Skip to content

Commit

Permalink
Add retry functionality
Browse files Browse the repository at this point in the history
Add a retry function for all packages that can support it.
  • Loading branch information
milljm committed Oct 17, 2024
1 parent a095220 commit a7c9f4c
Show file tree
Hide file tree
Showing 19 changed files with 371 additions and 318 deletions.
36 changes: 36 additions & 0 deletions conda/functions/retry_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash
function can_retry(){
local TRY_AGAIN_REASON=('Library not loaded: @rpath/')
for reason in "${TRY_AGAIN_REASON[@]}"; do
if [[ $(grep -c "${reason}" "${1}") -ge 1 ]]; then
return 1
fi
done
return 0
}
function no_exit_failure(){
set +e
(
set -o pipefail
do_build 2>&1 | tee -a "${SRC_DIR}"/output.log
)
}
function retry_build(){
while true; do
if no_exit_failure; then
set -e
break
elif can_retry "${SRC_DIR}"/output.log; then
tail -600 "${SRC_DIR}"/output.log && exit 1
elif [[ ${try_count} -gt 1 ]]; then
tail -50 "${SRC_DIR}"/output.log
(( try_count++ )) || true
printf "Exhausted retry attempts: %s\n" "${try_count}"
exit 1
fi
(( try_count++ )) || true
tail -100 output.log
printf "\n\nRoutine error caught, trying again...\n"
true > "${SRC_DIR}"/output.log
done
}
62 changes: 20 additions & 42 deletions conda/libmesh-vtk/build.sh
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
#!/bin/bash
set -eu
export PATH=/bin:$PATH
export HYDRA_LAUNCHER=fork
export CC=mpicc CXX=mpicxx
export VTK_PREFIX=${PREFIX}/libmesh-vtk

# set in meta.yaml env package
# shellcheck disable=SC2154
export VTK_VER=${vtk_friendly_version}
function do_build(){
export HYDRA_LAUNCHER=fork
export CC=mpicc CXX=mpicxx
export VTK_PREFIX="${PREFIX:?}/libmesh-vtk"

# Tired of failing on build events that can be fixed by an invalidation on Civet.
function build_vtk() {
# Settings guide: https://docs.vtk.org/en/latest/build_instructions/build_settings.html
cmake .. -G "Ninja" \
# shellcheck disable=SC2154 # set in meta.yaml env package
export VTK_VER=${vtk_friendly_version}

rm -rf "${VTK_PREFIX:?}" "${SRC_DIR:?}/build"
mkdir -p "${SRC_DIR:?}/build"; cd "${SRC_DIR:?}/build"

# Settings guide: https://docs.vtk.org/en/latest/build_instructions/build_settings.html
cmake .. -G "Ninja" \
-Wno-dev \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_PREFIX_PATH:PATH="${VTK_PREFIX}" \
-DCMAKE_INSTALL_PREFIX:PATH="${VTK_PREFIX}" \
-DCMAKE_INSTALL_RPATH:PATH="${VTK_PREFIX}"/lib \
-DCMAKE_OSX_SYSROOT="${CONDA_BUILD_SYSROOT}" \
-DCMAKE_OSX_SYSROOT="${CONDA_BUILD_SYSROOT:?}" \
-DCMAKE_INSTALL_LIBDIR=lib \
-DBUILD_SHARED_LIBS:BOOL=ON \
-DVTK_INSTALL_SDK:BOOL=ON \
Expand All @@ -40,41 +42,17 @@ function build_vtk() {
-DVTK_MODULE_ENABLE_VTK_IOParallelNetCDF:STRING=YES \
-DVTK_MODULE_ENABLE_VTK_IOXML:STRING=YES \
-DVTK_MODULE_ENABLE_VTK_IOXMLParser:STRING=YES \
-DVTK_MODULE_ENABLE_VTK_IOImage:STRING=YES
-DVTK_MODULE_ENABLE_VTK_IOImage:STRING=YES || return 1

ninja install -v -j "${MOOSE_JOBS:-2}"
ninja install -v -j "${MOOSE_JOBS:-2}" || return 1
}

function no_exit_failure(){
set +e
(
set -o pipefail
build_vtk 2>&1 | tee -a "${SRC_DIR}"/output.log
)
}
# shellcheck disable=SC1091 # made available through meta.yaml src path
source retry_build.sh

# Handle retries for this one step so as to not need an entire 4 hour build target redo.
TRY_AGAIN_REASON='Library not loaded: @rpath/'
while true; do
mkdir -p "${SRC_DIR}"/build
cd "${SRC_DIR}"/build
if no_exit_failure; then
set -e
break
elif [[ $("< ${SRC_DIR}"/output.log | grep -c "${TRY_AGAIN_REASON}") -eq 0 ]]; then
tail -600 "${SRC_DIR}"/output.log && exit 1
elif [[ ${try_count} -gt 2 ]]; then
tail -100 "${SRC_DIR}"/output.log
printf "Exhausted retry attempts: %s\n" "${try_count}"
exit 1
fi
(( try_count++ )) || true
tail -100 "${SRC_DIR}"/output.log
printf "\n\nLibrary not loaded Conda bug. YUCK. Trying again.\n\n"
# Start anew, clean.
rm -rf "${SRC_DIR}"/build
true > "${SRC_DIR}"/output.log
done
# Sets up retry functions and calls do_build. Blocking until success
# or 3 failed attempts, or 1 unknown/unhandled failure
retry_build

# Set VTK environment variables for those that need it
mkdir -p "${PREFIX}/etc/conda/activate.d" "${PREFIX}/etc/conda/deactivate.d"
Expand Down
1 change: 1 addition & 0 deletions conda/libmesh-vtk/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ source:
- url: https://www.vtk.org/files/release/{{ vtk_friendly_version }}/VTK-{{ vtk_version }}.tar.gz
- vtk_friendly_version: {{ vtk_friendly_version }}
- sha256: {{ sha256 }}
- path: ../functions/retry_build.sh

build:
number: {{ build }}
Expand Down
131 changes: 55 additions & 76 deletions conda/libmesh/build.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,55 @@
#!/bin/bash
set -eu
export PATH=/bin:$PATH
export PKG_CONFIG_PATH=${BUILD_PREFIX:?}/lib/pkgconfig:${PKG_CONFIG_PATH}

export PKG_CONFIG_PATH=$BUILD_PREFIX/lib/pkgconfig:$PKG_CONFIG_PATH
PETSC_DIR="$(pkg-config PETSc --variable=prefix)"
export PETSC_DIR
function set_libmesh_env(){
unset LIBMESH_DIR CFLAGS CPPFLAGS CXXFLAGS FFLAGS LIBS \
LDFLAGS DEBUG_CPPFLAGS DEBUG_CFLAGS DEBUG_CXXFLAGS \
FORTRANFLAGS DEBUG_FFLAGS DEBUG_FORTRANFLAGS

if [[ "$(uname)" == Darwin ]]; then
if [[ $HOST == arm64-apple-darwin20.0.0 ]]; then
CTUNING="-march=armv8.3-a -I${PREFIX:?}/include"
export LIBRARY_PATH="${PREFIX:?}/lib"
else
CTUNING="-march=core2 -mtune=haswell"
fi
else
CTUNING="-march=nocona -mtune=haswell"
fi

PETSC_DIR="$(pkg-config PETSc --variable=prefix)"
export PETSC_DIR
export F90=mpifort
export F77=mpifort
export FC=mpifort
export CC=mpicc
export CXX=mpicxx
export CFLAGS="${CTUNING}"
export CXXFLAGS="${CTUNING}"
export HYDRA_LAUNCHER=fork

if [[ $HOST == arm64-apple-darwin20.0.0 ]]; then
LDFLAGS="-L${PREFIX:?}/lib -Wl,-S,-rpath,${PREFIX:?}/lib"
else
export LDFLAGS="-Wl,-S"
fi
}

function do_build(){
export LIBMESH_DIR="${PREFIX:?}/libmesh"
rm -rf "${LIBMESH_DIR:?}"
mkdir -p "${SRC_DIR:?}/build"; cd "${SRC_DIR:?}/build"
export INSTALL_BINARY="${SRC_DIR:?}/build-aux/install-sh -C"
set_libmesh_env
configure_libmesh --with-vtk-lib="${BUILD_PREFIX}"/libmesh-vtk/lib \
--with-vtk-include="${BUILD_PREFIX}"/libmesh-vtk/include/vtk-"${VTK_VERSION}"

CORES=${MOOSE_JOBS:-6}
make -j "$CORES"
make install -j "$CORES"
}

function sed_replace(){
if [ "$(uname)" = "Darwin" ]; then
Expand All @@ -19,81 +64,15 @@ function sed_replace(){
fi
}

mkdir -p build; cd build
# shellcheck disable=SC1091 # made available through meta.yaml src path
source "${SRC_DIR:?}/configure_libmesh.sh"

if [[ "$(uname)" == Darwin ]]; then
if [[ $HOST == arm64-apple-darwin20.0.0 ]]; then
CTUNING="-march=armv8.3-a -I$PREFIX/include"
export LIBRARY_PATH="$PREFIX/lib"
else
CTUNING="-march=core2 -mtune=haswell"
fi
else
CTUNING="-march=nocona -mtune=haswell"
fi

unset LIBMESH_DIR CFLAGS CPPFLAGS CXXFLAGS FFLAGS LIBS \
LDFLAGS DEBUG_CPPFLAGS DEBUG_CFLAGS DEBUG_CXXFLAGS \
FORTRANFLAGS DEBUG_FFLAGS DEBUG_FORTRANFLAGS
export F90=mpifort
export F77=mpifort
export FC=mpifort
export CC=mpicc
export CXX=mpicxx
export CFLAGS="${CTUNING}"
export CXXFLAGS="${CTUNING}"
if [[ $HOST == arm64-apple-darwin20.0.0 ]]; then
LDFLAGS="-L$PREFIX/lib -Wl,-S,-rpath,$PREFIX/lib"
else
export LDFLAGS="-Wl,-S"
fi

export HYDRA_LAUNCHER=fork

# made available by contents of meta.yaml (source: path ../../scripts)
# shellcheck disable=SC1091
source "$SRC_DIR"/configure_libmesh.sh
export INSTALL_BINARY="${SRC_DIR}/build-aux/install-sh -C"

# Tired of failing on build events that can be fixed by an invalidation on Civet.
function build_libmesh() {
export LIBMESH_DIR="${PREFIX}"/libmesh
configure_libmesh --with-vtk-lib="${BUILD_PREFIX}"/libmesh-vtk/lib \
--with-vtk-include="${BUILD_PREFIX}"/libmesh-vtk/include/vtk-"${VTK_VERSION}"

CORES=${MOOSE_JOBS:-6}
make -j "$CORES"
make install -j "$CORES"
}

function no_exit_failure(){
set +e
(
set -o pipefail
build_libmesh 2>&1 | tee -a "${SRC_DIR}"/output.log
)
}
# shellcheck disable=SC1091 # made available through meta.yaml src path
source "${SRC_DIR:?}/retry_build.sh"

# Handle retries for this one step so as to not need an entire 4 hour build target redo.
TRY_AGAIN_REASON='Library not loaded: @rpath/'
while true; do
if no_exit_failure; then
set -e
break
elif [[ $(grep -c "${TRY_AGAIN_REASON}" "${SRC_DIR}"/output.log) -eq 0 ]]; then
tail -600 "${SRC_DIR}"/output.log && exit 1
elif [[ ${try_count} -gt 2 ]]; then
tail -100 "${SRC_DIR}"/output.log
printf "Exhausted retry attempts: %s\n" "${try_count}"
exit 1
fi
(( try_count++ )) || true
tail -100 output.log
printf "\n\nLibrary not loaded Conda bug. YUCK. Trying again.\n"
# Start anew, clean.
rm -rf "${PREFIX}"/libmesh
true > "${SRC_DIR}"/output.log
done
# Sets up retry functions and calls do_build. Blocking until success
# or 3 failed attempts, or 1 unknown/unhandled failure
retry_build

sed_replace

Expand Down
1 change: 1 addition & 0 deletions conda/libmesh/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ package:
source:
- path: ../../libmesh
- path: ../../scripts/configure_libmesh.sh
- path: ../functions/retry_build.sh

build:
number: {{ build }}
Expand Down
60 changes: 36 additions & 24 deletions conda/moose/build.sh
Original file line number Diff line number Diff line change
@@ -1,35 +1,47 @@
#!/bin/bash

set -eu
if [ "$(echo $SKIP_DOCS | tr '[:lower:]' '[:upper:]')" == "TRUE" ]; then
export MOOSE_SKIP_DOCS=True
fi
./configure --prefix=${PREFIX}/moose ${MOOSE_OPTIONS:-''}
CORES=${MOOSE_JOBS:-2}

# moose_test-opt
cd test
make -j $CORES
make install -j $CORES

# combined-opt
cd ../modules/combined
make -j $CORES
make install -j $CORES

cd ${PREFIX}/moose/bin

function do_build(){
rm -rf "${PREFIX:?}/moose"
if [[ "$(echo "${SKIP_DOCS}" | tr '[:lower:]' '[:upper:]')" == "TRUE" ]]; then
export MOOSE_SKIP_DOCS=True
fi
# shellcheck disable=SC2086 # we want word spliting when dealing with passing arguments
./configure --prefix="${PREFIX:?}/moose" ${MOOSE_OPTIONS:-''} || return 1
CORES="${MOOSE_JOBS:-2}"

# moose_test-opt
cd test
make -j "${CORES:?}"
make install -j "${CORES:?}"

# combined-opt
cd ../modules/combined
make -j "${CORES:?}" || return 1
make install -j "${CORES:?}" || return 1
}

# shellcheck disable=SC1091 # made available through meta.yaml src path
source "${SRC_DIR:?}/retry_build.sh"

# Sets up retry functions and calls do_build. Blocking until success
# or 3 failed attempts, or 1 unknown/unhandled failure
retry_build

cd "${PREFIX:?}/moose/bin"
ln -s combined-opt moose-opt
ln -s combined-opt moose

# Fix (hack) for moose -> moose symlink collision binary/copy inputs
cd ${PREFIX}/moose/share/moose
for sdir in `ls ../combined`; do
if [ -d ../combined/$sdir ] && [ ! -d $sdir ] && [ ! -f $sdir ] && [ ! -L $sdir ]; then
ln -s ../combined/$sdir .
fi
cd "${PREFIX:?}/moose/share/moose"
for f in ../combined/*; do
[[ -e ${f} ]] || break # handle the case of no *.wav files
if [[ -d ../combined/${f} ]] && [[ ! -d ${f} ]] && [[ -f ${f} ]] && [[ ! -L ${f} ]]; then
ln -s ../combined/"${f}" .
fi
done

mkdir -p "${PREFIX}/etc/conda/activate.d" "${PREFIX}/etc/conda/deactivate.d"
mkdir -p "${PREFIX:?}/etc/conda/activate.d" "${PREFIX:?}/etc/conda/deactivate.d"
cat <<EOF > "${PREFIX}/etc/conda/activate.d/activate_${PKG_NAME}.sh"
export PATH=\${PATH}:${PREFIX}/moose/bin
export MOOSE_BIN=${PREFIX}/moose/bin/moose
Expand Down
1 change: 1 addition & 0 deletions conda/moose/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ package:

source:
- path: ../../../moose
- path: ../functions/retry_build.sh

build:
number: 0
Expand Down
Loading

0 comments on commit a7c9f4c

Please sign in to comment.