Skip to content

Commit

Permalink
adding more error checking to script
Browse files Browse the repository at this point in the history
  • Loading branch information
ruck314 committed Feb 29, 2024
1 parent 95d2f0d commit 2281e11
Showing 1 changed file with 41 additions and 17 deletions.
58 changes: 41 additions & 17 deletions data_gpu/driver/comp_and_load_drivers.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,37 @@
#!/bin/bash

# Function to find the latest Nvidia version directory
get_latest_nvidia_version() {
# Get a list of all directories in /usr/src starting with "nvidia-"
nvidia_dirs=($(ls -d /usr/src/nvidia-*))
# Sort the directories by version number using natural sorting
IFS=$'\n' sorted_dirs=($(sort -V <<< "${nvidia_dirs[@]}"))
# Return the last element (assuming it's the latest version)
echo "${sorted_dirs[${#sorted_dirs[@]} - 1]}"
# Function to check if gcc-12 is installed
check_gcc_12_installed() {
if ! command -v gcc-12 >/dev/null 2>&1; then
echo "Error: gcc-12 is not installed. Please install gcc-12 and try again." >&2
exit 1
fi
}

# Define Nvidia path
NVIDIA_PATH=$(get_latest_nvidia_version)
echo "Using Nvidia path: $NVIDIA_PATH"
# Call the gcc-12 check function early in the script to ensure it's available
check_gcc_12_installed

# Function to find the latest Nvidia version directory
get_latest_nvidia_path() {
# Navigate to the /usr/src directory
cd /usr/src

# List and sort NVIDIA directories, then get the last one (the latest)
latest_nvidia_path=$(ls -d nvidia-* | sort -V | tail -n 1)

# Check if no NVIDIA directory was found
if [ -z "$latest_nvidia_path" ]; then
echo "Error: No NVIDIA directory found in /usr/src" >&2
exit 1
else
# Print the full path of the latest NVIDIA directory
echo "/usr/src/$latest_nvidia_path"
fi
}

# Return directory
RET_DIR=$PWD
echo "Using RET_DIR: $RET_DIR"

# Remove existing Nvidia modules (if any)
/usr/sbin/rmmod datagpu 2>/dev/null
Expand All @@ -24,17 +40,25 @@ RET_DIR=$PWD
/usr/sbin/rmmod nvidia-modeset 2>/dev/null
/usr/sbin/rmmod nvidia 2>/dev/null

# Define Nvidia path
NVIDIA_PATH=$(get_latest_nvidia_path)
echo "Using Nvidia path: $NVIDIA_PATH"

cd $NVIDIA_PATH

make CC=gcc-12

modprobe ecc
/usr/sbin/insmod nvidia.ko NVreg_OpenRmEnableUnsupportedGpus=1 NVreg_EnableStreamMemOPs=1
/usr/sbin/insmod nvidia-modeset.ko
/usr/sbin/insmod nvidia-uvm.ko
/usr/sbin/insmod nvidia-drm.ko modeset=1
modprobe ecc || { echo "Error: Failed to insert ecc module."; exit 1; }

/usr/sbin/insmod nvidia.ko NVreg_OpenRmEnableUnsupportedGpus=1 NVreg_EnableStreamMemOPs=1 || { echo "Error: Failed to insert nvidia.ko."; exit 1; }

/usr/sbin/insmod nvidia-modeset.ko || { echo "Error: Failed to insert nvidia-modeset.ko."; exit 1; }

/usr/sbin/insmod nvidia-uvm.ko || { echo "Error: Failed to insert nvidia-uvm.ko."; exit 1; }

/usr/sbin/insmod nvidia-drm.ko modeset=1 || { echo "Error: Failed to insert nvidia-drm.ko."; exit 1; }

cd $RET_DIR

make NVIDIA_DRIVERS=$NVIDIA_PATH
/usr/sbin/insmod datagpu.ko
/usr/sbin/insmod datagpu.ko || { echo "Error: Failed to insert datagpu.ko."; exit 1; }

0 comments on commit 2281e11

Please sign in to comment.