From e212340e4519fd0dbccaceab67a1556ca206b503 Mon Sep 17 00:00:00 2001 From: LAURENDEAU Matthieu Date: Wed, 14 Feb 2024 09:02:15 +0100 Subject: [PATCH] BUG: Remove CudaContextManager class and use cudaSetDevice Use the primary context with cudaSetDevice() introduced by Cuda 7 (https://developer.download.nvidia.com/compute/cuda/7_0/Prod/doc/CUDA_Toolkit_Release_Notes.pdf) instead of a new one. cudaSetDevice is called before every memory transfer between the CPU and GPU to be sure that the context is set for the current thread, see https://developer.nvidia.com/blog/cuda-pro-tip-always-set-current-device-avoid-multithreading-bugs/. --- include/itkCudaContextManager.h | 66 ----------------- include/itkCudaDataManager.h | 5 +- include/itkCudaImageDataManager.h | 1 - include/itkCudaImageDataManager.hxx | 8 +- src/CMakeLists.txt | 1 - src/itkCudaContextManager.cxx | 109 ---------------------------- src/itkCudaDataManager.cxx | 28 ++----- 7 files changed, 9 insertions(+), 209 deletions(-) delete mode 100644 include/itkCudaContextManager.h delete mode 100644 src/itkCudaContextManager.cxx diff --git a/include/itkCudaContextManager.h b/include/itkCudaContextManager.h deleted file mode 100644 index fa3b7d3..0000000 --- a/include/itkCudaContextManager.h +++ /dev/null @@ -1,66 +0,0 @@ -/*========================================================================= - * - * Copyright NumFOCUS - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0.txt - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - *=========================================================================*/ -#ifndef itkCudaContextManager_h -#define itkCudaContextManager_h - -#include "itkCudaUtil.h" -#include -#include "CudaCommonExport.h" - -// -// Singleton class for CudaContextManager -// - -/** \class CudaContextManager - * - * \brief Class to store the Cuda context. - * - * \ingroup ITKCudaCommon - */ -namespace itk -{ -class CudaCommon_EXPORT CudaContextManager : public LightObject -{ -public: - static CudaContextManager * - GetInstance(); - - static void - DestroyInstance(); - - CUcontext * - GetCurrentContext(); - - int - GetCurrentDevice(); - -private: - CudaContextManager(); - ~CudaContextManager() override; - - CUcontext m_Context; - int m_Device; - int m_DeviceIdx; - int m_NumberOfDevices; - - static CudaContextManager * m_Instance; - static bool m_Initialized; -}; -} // namespace itk - -#endif diff --git a/include/itkCudaDataManager.h b/include/itkCudaDataManager.h index e6f4649..c5cca65 100644 --- a/include/itkCudaDataManager.h +++ b/include/itkCudaDataManager.h @@ -22,7 +22,6 @@ #include "itkDataObject.h" #include "itkObjectFactory.h" #include "itkCudaUtil.h" -#include "itkCudaContextManager.h" #include "CudaCommonExport.h" #include @@ -232,6 +231,8 @@ class CudaCommon_EXPORT CudaDataManager : public Object void PrintSelf(std::ostream & os, Indent indent) const override; + int m_Device; + private: CudaDataManager(const Self &) = delete; // purposely not implemented void @@ -240,8 +241,6 @@ class CudaCommon_EXPORT CudaDataManager : public Object protected: size_t m_BufferSize; // # of bytes - CudaContextManager * m_ContextManager; - /** buffer type */ int m_MemFlags; diff --git a/include/itkCudaImageDataManager.h b/include/itkCudaImageDataManager.h index eb62557..c6c817f 100644 --- a/include/itkCudaImageDataManager.h +++ b/include/itkCudaImageDataManager.h @@ -23,7 +23,6 @@ #include #include "itkCudaUtil.h" #include "itkCudaDataManager.h" -#include "itkCudaContextManager.h" namespace itk { diff --git a/include/itkCudaImageDataManager.hxx b/include/itkCudaImageDataManager.hxx index 57fecec..670f39b 100644 --- a/include/itkCudaImageDataManager.hxx +++ b/include/itkCudaImageDataManager.hxx @@ -76,9 +76,7 @@ CudaImageDataManager::MakeCPUBufferUpToDate() std::cout << this << ": GPU->CPU data copy" << std::endl; #endif - CUDA_CHECK(cuCtxSetCurrent( - *(this->m_ContextManager->GetCurrentContext()))); // This is necessary when running multithread to bind the host - // CPU thread to the right context + CUDA_CHECK(cudaSetDevice(m_Device)); errid = cudaMemcpy(m_CPUBuffer, m_GPUBuffer->GetPointer(), m_BufferSize, cudaMemcpyDeviceToHost); CudaCheckError(errid, __FILE__, __LINE__, ITK_LOCATION); @@ -117,9 +115,7 @@ CudaImageDataManager::MakeGPUBufferUpToDate() std::cout << "CPU->GPU data copy" << std::endl; #endif - CUDA_CHECK(cuCtxSetCurrent( - *(this->m_ContextManager->GetCurrentContext()))); // This is necessary when running multithread to bind the host - // CPU thread to the right context + CUDA_CHECK(cudaSetDevice(m_Device)); errid = cudaMemcpy(m_GPUBuffer->GetPointer(), m_CPUBuffer, m_BufferSize, cudaMemcpyHostToDevice); CudaCheckError(errid, __FILE__, __LINE__, ITK_LOCATION); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b936a79..416ff0f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,4 @@ set(CudaCommon_SRCS - itkCudaContextManager.cxx itkCudaDataManager.cxx itkCudaUtil.cxx itkCudaMemoryProbe.cxx diff --git a/src/itkCudaContextManager.cxx b/src/itkCudaContextManager.cxx deleted file mode 100644 index 4c80479..0000000 --- a/src/itkCudaContextManager.cxx +++ /dev/null @@ -1,109 +0,0 @@ -/*========================================================================= - * - * Copyright NumFOCUS - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0.txt - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - *=========================================================================*/ -#include -#include "itkCudaContextManager.h" -#include "cuda.h" -#include "cuda_runtime_api.h" - -namespace itk -{ -// static variable initialization -CudaContextManager * CudaContextManager::m_Instance = nullptr; -bool CudaContextManager::m_Initialized = false; - - -CudaContextManager * -CudaContextManager::GetInstance() -{ - if (m_Instance == nullptr) - { - m_Instance = new CudaContextManager(); - } - m_Instance->Register(); - return m_Instance; -} - -void -CudaContextManager::DestroyInstance() -{ - m_Instance->UnRegister(); - if (m_Instance->GetReferenceCount() == 1) - { - m_Instance->Delete(); - m_Instance = nullptr; - } -} - -CudaContextManager::CudaContextManager() -{ - m_DeviceIdx = -1; - m_Device = 0; - - if (!m_Initialized) - { - cuInit(0); - m_Initialized = true; - } - - std::vector devices; - m_NumberOfDevices = itk::CudaGetAvailableDevices(devices); - - if (m_NumberOfDevices) - { - CUdevice device = 0; - m_DeviceIdx = itk::CudaGetMaxFlopsDev(); - CUDA_CHECK(cuDeviceGet(&device, m_DeviceIdx)); - - CUDA_CHECK(cuCtxCreate(&m_Context, CU_CTX_SCHED_AUTO, device)); - - CUDA_CHECK(cuCtxSetCurrent(m_Context)); - - m_Device = device; - } - else - { - m_Context = nullptr; - m_Device = 0; - m_DeviceIdx = 0; - } -} - -CudaContextManager::~CudaContextManager() -{ - if (m_Context) - { - CUDA_CHECK(cuCtxDestroy(m_Context)); - } - cudaDeviceReset(); -} - -int -CudaContextManager::GetCurrentDevice() -{ - int device = -1; - CUDA_CHECK(cudaGetDevice(&device)); - return device; -} - -CUcontext * -CudaContextManager::GetCurrentContext() -{ - return &m_Context; -} - -} // namespace itk diff --git a/src/itkCudaDataManager.cxx b/src/itkCudaDataManager.cxx index 6db4870..1d83101 100644 --- a/src/itkCudaDataManager.cxx +++ b/src/itkCudaDataManager.cxx @@ -24,18 +24,8 @@ namespace itk // constructor CudaDataManager::CudaDataManager() { - m_ContextManager = CudaContextManager::GetInstance(); - - // Creating the context in the constructor allows avoiding a memory leak. - // However, the cuda data manager is created even if there is no use of CUDA - // software and sometimes one compiles RTK with CUDA but wants to use it - // without CUDA. So if the context pointer is nullptr, which indicates that there - // is no CUDA device available, we just do not set the context (SR). This fixes - // the problem reported here: - // https://www.creatis.insa-lyon.fr/pipermail/rtk-users/2015-July/000570.html - CUcontext * ctx = m_ContextManager->GetCurrentContext(); - if (ctx) - CUDA_CHECK(cuCtxSetCurrent(*ctx)); + m_Device = 0; + CUDA_CHECK(cudaSetDevice(m_Device)); m_CPUBuffer = nullptr; m_GPUBuffer = GPUMemPointer::New(); @@ -56,7 +46,6 @@ CudaDataManager::CudaDataManager() CudaDataManager::~CudaDataManager() { m_GPUBuffer = nullptr; - CudaContextManager::DestroyInstance(); } void @@ -91,9 +80,7 @@ CudaDataManager::Free() { try { - CUDA_CHECK(cuCtxSetCurrent( - *(this->m_ContextManager->GetCurrentContext()))); // This is necessary when running multithread to bind the host - // CPU thread to the right context + CUDA_CHECK(cudaSetDevice(m_Device)); m_GPUBuffer->Free(); } catch (itk::ExceptionObject & e) @@ -171,9 +158,7 @@ CudaDataManager::UpdateCPUBuffer() std::cout << this << "::UpdateCPUBuffer GPU->CPU data copy " << m_GPUBuffer->GetPointer() << "->" << m_CPUBuffer << " : " << m_BufferSize << std::endl; #endif - CUDA_CHECK(cuCtxSetCurrent( - *(this->m_ContextManager->GetCurrentContext()))); // This is necessary when running multithread to bind the host - // CPU thread to the right context + CUDA_CHECK(cudaSetDevice(m_Device)); CUDA_CHECK(cudaMemcpy(m_CPUBuffer, m_GPUBuffer->GetPointer(), m_BufferSize, cudaMemcpyDeviceToHost)); m_IsCPUBufferDirty = false; } @@ -212,9 +197,7 @@ CudaDataManager::UpdateGPUBuffer() std::cout << this << "::UpdateGPUBuffer CPU->GPU data copy " << m_CPUBuffer << "->" << m_GPUBuffer->GetPointer() << " : " << m_BufferSize << std::endl; #endif - CUDA_CHECK(cuCtxSetCurrent( - *(this->m_ContextManager->GetCurrentContext()))); // This is necessary when running multithread to bind the host - // CPU thread to the right context + CUDA_CHECK(cudaSetDevice(m_Device)); CUDA_CHECK(cudaMemcpy(m_GPUBuffer->GetPointer(), m_CPUBuffer, m_BufferSize, cudaMemcpyHostToDevice)); } m_IsGPUBufferDirty = false; @@ -259,7 +242,6 @@ CudaDataManager::Graft(const CudaDataManager * data) if (data) { m_BufferSize = data->m_BufferSize; - m_ContextManager = data->m_ContextManager; m_GPUBuffer = data->m_GPUBuffer; m_CPUBuffer = data->m_CPUBuffer; m_IsCPUBufferDirty = data->m_IsCPUBufferDirty;