From 9130a4aa131ef41d537e2885d4b3eb274e13f333 Mon Sep 17 00:00:00 2001 From: stellarpower Date: Fri, 13 Aug 2021 20:14:35 +0100 Subject: [PATCH 1/3] First draft bumping dependencies to use CUDA 11.1 (to support RTX 30xx series cards) - Fri 13 Aug 20:14:35 BST 2021 --- docker/Dockerfile | 62 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 8aabe341..00815bde 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,12 +1,48 @@ # Copyright 2020 Toyota Research Institute. All rights reserved. -FROM nvidia/cuda:10.2-devel-ubuntu18.04 +# CUDA_VERSION is defined in the image, so add prefix +ARG PSFM_CUDA_MAJOR_VERSION=11 +ARG PSFM_CUDA_MINOR_VERSION=1 +ARG PSFM_CUDA_VERSION=${PSFM_CUDA_MAJOR_VERSION}.${PSFM_CUDA_MINOR_VERSION} + +ARG SOURCE_IMAGE=nvidia/cuda:${PSFM_CUDA_VERSION}-devel-ubuntu18.04 +######################################## +FROM $SOURCE_IMAGE as base +# Reïntroduce variables from before: +ARG PSFM_CUDA_MAJOR_VERSION +ARG PSFM_CUDA_MINOR_VERSION +ARG PSFM_CUDA_VERSION +ARG SOURCE_IMAGE ENV PROJECT=packnet-sfm -ENV PYTORCH_VERSION=1.8.1 -ENV TORCHVISION_VERSION=0.9.1 -ENV CUDNN_VERSION=7.6.5.32-1+cuda10.2 -ENV NCCL_VERSION=2.7.8-1+cuda10.2 + +# https://lambdalabs.com/blog/install-tensorflow-and-pytorch-on-rtx-30-series/ +# 3090 => CUDA 11.1 OK +# => cuDNN 8.2.1 + +# From https://github.com/pytorch/vision: +# Torch 1.9.0 <=> Torchvision 0.10.0 + +# From https://pytorch.org/get-started/locally/ +# Torch 1.9.0 => CUDA 11.1 || CUDA 10.2 + +#ENV CUDNN_VERSION=7.6.5.32-1+cuda10.2 +#ENV NCCL_VERSION=2.7.8-1+cuda10.2 + +ENV CUDNN_VERSION=8.0.5.39-1+cuda${PSFM_CUDA_VERSION} +# Use the variable as a sanity check that we're using the right CUDA +ARG CUDNN_PACKAGE=libcudnn8=${CUDNN_VERSION} +ENV NCCL_VERSION=2.7.8-1+cuda${PSFM_CUDA_VERSION} +ARG NCCL_PACKAGE=libnccl2=${NCCL_VERSION} + +ENV PYTORCH_VERSION=1.9.0 +ENV TORCHVISION_VERSION=0.10.0 + + +RUN echo "Using " ${SOURCE_IMAGE} " with CUDA version " ${PSFM_CUDA_VERSION} \ + " and Pytorch/torchvision " ${PYTORCH_VERSION}/${TORCHVISION_VERSION} >&2 + + ENV HOROVOD_VERSION=65de4c961d1e5ad2828f2f6c4329072834f27661 ENV TRT_VERSION=6.0.1.5 ENV LC_ALL=C.UTF-8 @@ -29,8 +65,8 @@ RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held- vim \ wget \ ca-certificates \ - libcudnn7=${CUDNN_VERSION} \ - libnccl2=${NCCL_VERSION} \ + ${CUDNN_PACKAGE} \ + ${NCCL_PACKAGE} \ libnccl-dev=${NCCL_VERSION} \ libjpeg-dev \ libpng-dev \ @@ -85,8 +121,14 @@ RUN pip install future typing numpy pandas matplotlib jupyter h5py \ mpi4py onnx onnxruntime pycuda yacs cython==0.29.10 # Install PyTorch -RUN pip install torch==${PYTORCH_VERSION} \ - torchvision==${TORCHVISION_VERSION} && ldconfig +#RUN pip install torch==${PYTORCH_VERSION} \ +# torchvision==${TORCHVISION_VERSION} && ldconfig +RUN pip3 install \ + torch==${PYTORCH_VERSION}+cu${PSFM_CUDA_MAJOR_VERSION}${PSFM_CUDA_MINOR_VERSION} \ + torchvision==${TORCHVISION_VERSION}+cu${PSFM_CUDA_MAJOR_VERSION}${PSFM_CUDA_MINOR_VERSION} \ + -f https://download.pytorch.org/whl/torch_stable.html \ + && ldconfig + # Install apex RUN mkdir /workspace @@ -141,4 +183,4 @@ WORKDIR /workspace/${PROJECT} # Copy project source last (to avoid cache busting) WORKDIR /workspace/${PROJECT} COPY . /workspace/${PROJECT} -ENV PYTHONPATH="/workspace/${PROJECT}:$PYTHONPATH" \ No newline at end of file +ENV PYTHONPATH="/workspace/${PROJECT}:$PYTHONPATH" From 93cbffe9b48bf3c8a138ac3ed7971efd3982bfa1 Mon Sep 17 00:00:00 2001 From: stellarpower <5004545+stellarpower@users.noreply.github.com> Date: Wed, 25 Aug 2021 16:35:07 +0000 Subject: [PATCH 2/3] Rename image. --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b7445ba4..643a1bd4 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # Handy commands: # - `make docker-build`: builds DOCKERIMAGE (default: `packnet-sfm:latest`) -PROJECT ?= packnet-sfm +PROJECT ?= stellarpower/packnet-sfm WORKSPACE ?= /workspace/$(PROJECT) DOCKER_IMAGE ?= ${PROJECT}:latest @@ -79,4 +79,4 @@ docker-run: docker-build docker-run-mpi: docker-build nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \ - bash -c "${MPI_CMD} ${COMMAND}" \ No newline at end of file + bash -c "${MPI_CMD} ${COMMAND}" From 76692aaaf57659563b9a2cb06d8c456fa6ed35ff Mon Sep 17 00:00:00 2001 From: stellarpower <5004545+stellarpower@users.noreply.github.com> Date: Wed, 25 Aug 2021 16:36:45 +0000 Subject: [PATCH 3/3] Fix for #157 --- packnet_sfm/models/model_wrapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packnet_sfm/models/model_wrapper.py b/packnet_sfm/models/model_wrapper.py index 4ac27246..b29251bd 100644 --- a/packnet_sfm/models/model_wrapper.py +++ b/packnet_sfm/models/model_wrapper.py @@ -292,14 +292,14 @@ def evaluate_depth(self, batch): """Evaluate batch to produce depth metrics.""" # Get predicted depth inv_depths = self.model(batch)['inv_depths'] - depth = inv2depth(inv_depths[0]) + depth = inv2depth(inv_depths) # Post-process predicted depth batch['rgb'] = flip_lr(batch['rgb']) if 'input_depth' in batch: batch['input_depth'] = flip_lr(batch['input_depth']) inv_depths_flipped = self.model(batch)['inv_depths'] inv_depth_pp = post_process_inv_depth( - inv_depths[0], inv_depths_flipped[0], method='mean') + inv_depths, inv_depths_flipped, method='mean') depth_pp = inv2depth(inv_depth_pp) batch['rgb'] = flip_lr(batch['rgb']) # Calculate predicted metrics