REL v0.5.0 RMM Release

Merge pull request #21 from rapidsai/branch-0.5 [RELEASE] v0.5 RMM Release
rapidsai · Jan 28, 2019 · 801a278 · 801a278
2 parents d615fa7 + 8bba767
commit 801a278
Show file tree

Hide file tree

Showing 23 changed files with 489 additions and 202 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ __pycache__
 .cache
 .coverage
 .vscode
+*.code-workspace
 *.swp
 *.pytest_cache
 DartConfiguration.tcl
@@ -34,3 +35,9 @@ python/librmm_cffi/librmm_cffi.py
 .project
 .cproject
 .settings
+
+# cnmem copy
+include/rmm/detail/cnmem.h
+
+# generated librmm_build.py
+python/librmm_cffi/librmm_build.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,24 +1,37 @@
-# RMM 0.5.0 (Date TBD)
+# RMM 0.5.0 (28 Jan 2019)
 
 ## New Features
 
-- PR #1: Spun off RMM from cuDF into its own repository.
-
+ - PR #2 Added CUDA Managed Memory allocation mode
+ 
 ## Improvements
+
+ - PR #12 Enable building RMM as a submodule
+ - PR #13 CMake: Added CXX11ABI option and removed Travis references
+ - PR #16 CMake: Added PARALLEL_LEVEL environment variable handling for GTest build parallelism (matches cuDF)
+ - PR #17 Update README with v0.5 changes including Managed Memory support
 
-- CUDF PR #472 RMM: Created centralized rmm::device_vector alias and rmm::exec_policy
-- CUDF PR #465 Added templated C++ API for RMM to avoid explicit cast to `void**`
+## Bug Fixes
 
-RMM was initially implemented as part of cuDF, so we include the relevant changelog history below.
+ - PR #10 Change cnmem submodule URL to use https
+ - PR #15 Temporarily disable hanging AllocateTB test for managed memory
+ - PR #28 Fix invalid reference to local stack variable in `rmm::exec_policy`
 
-# cuDF 0.4.0 (05 Dec 2018)
+# RMM 0.4.0 (20 Dec 2018)
 
 ## New Features
 
+- PR #1 Spun off RMM from cuDF into its own repository.
+
 ## Improvements
+
+- CUDF PR #472 RMM: Created centralized rmm::device_vector alias and rmm::exec_policy
+- CUDF PR #465 Added templated C++ API for RMM to avoid explicit cast to `void**`
 
 ## Bug Fixes
-
+
+
+RMM was initially implemented as part of cuDF, so we include the relevant changelog history below. 
 
 # cuDF 0.3.0 (23 Nov 2018)
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -15,7 +15,22 @@
 #=============================================================================
 cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
 
-project(RMM VERSION 0.4.0 LANGUAGES C CXX CUDA)
+project(RMM VERSION 0.5.0 LANGUAGES C CXX CUDA)
+
+###################################################################################################
+# - build type ------------------------------------------------------------------------------------
+
+# Set a default build type if none was specified
+set(DEFAULT_BUILD_TYPE "Release")
+
+if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+  message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' since none specified.")
+  set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE
+      STRING "Choose the type of build." FORCE)
+  # Set the possible values of build type for cmake-gui
+  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
+    "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
+endif()
 
 ###################################################################################################
 # - compiler options ------------------------------------------------------------------------------
@@ -27,17 +42,27 @@ set(CMAKE_CXX_COMPILER $ENV{CXX})
 
 if(CMAKE_COMPILER_IS_GNUCXX)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
-endif(CMAKE_COMPILER_IS_GNUCXX)
 
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
-
-# set default build type
-set(CMAKE_BUILD_TYPE "Release")
+    option(CMAKE_CXX11_ABI "Enable the GLIBCXX11 ABI" OFF)
+    if(CMAKE_CXX11_ABI)
+        message(STATUS "CUDF: Enabling the GLIBCXX11 ABI")
+    else()
+        message(STATUS "CUDF: Disabling the GLIBCXX11 ABI")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
+    endif(CMAKE_CXX11_ABI)
+endif(CMAKE_COMPILER_IS_GNUCXX)
 
 option(BUILD_TESTS "Configure CMake to build tests"
        ON)
 
+###################################################################################################
+# - cnmem ---------------------------------------------------------------------------------
+# include cnmem.h in the include/detail directory
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/cnmem/include/cnmem.h" 
+               "${CMAKE_CURRENT_SOURCE_DIR}/include/rmm/detail/cnmem.h" COPYONLY)
+
+
 ###################################################################################################
 # - cmake modules ---------------------------------------------------------------------------------
 
@@ -57,7 +82,7 @@ if(BUILD_TESTS)
     if(GTEST_FOUND)
         message(STATUS "Google C++ Testing Framework (Google Test) found in ${GTEST_ROOT}")
         include_directories(${GTEST_INCLUDE_DIR})
-        add_subdirectory(${CMAKE_SOURCE_DIR}/tests)
+        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tests)
     else()
         message(AUTHOR_WARNING "Google C++ Testing Framework (Google Test) not found: automated tests are disabled.")
     endif(GTEST_FOUND)
@@ -67,25 +92,28 @@ endif(BUILD_TESTS)
 # - include paths ---------------------------------------------------------------------------------
 
 include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
-                    "${CMAKE_SOURCE_DIR}/include"
-                    "${CMAKE_SOURCE_DIR}/src"
-                    "${CMAKE_SOURCE_DIR}/thirdparty/cnmem/include")
+                    "${CMAKE_CURRENT_SOURCE_DIR}/include"
+                    "${CMAKE_CURRENT_SOURCE_DIR}/src"
+                    "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/cnmem/include")
 
 ###################################################################################################
 # - library paths ---------------------------------------------------------------------------------
 
 link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc
-                 "${CMAKE_BINARY_DIR}/lib"
+                 "${CMAKE_CURRENT_BINARY_DIR}/lib"
                  "${GTEST_LIBRARY_DIR}")
 
 ###################################################################################################
 # - library targets -------------------------------------------------------------------------------
 
 add_library(rmm SHARED
-            src/memory.cpp
+            src/rmm.cpp
             src/memory_manager.cpp
             thirdparty/cnmem/src/cnmem.cpp)
 
+# override rpath for rmm
+SET_TARGET_PROPERTIES(rmm PROPERTIES BUILD_RPATH "\$ORIGIN")
+
 ###################################################################################################
 # - build options ---------------------------------------------------------------------------------
 
@@ -105,32 +133,37 @@ target_link_libraries(rmm cudart cuda)
 ###################################################################################################
 # - python cffi bindings --------------------------------------------------------------------------
 
-add_custom_command(OUTPUT PYTHON_CFFI
+# To enable building RMM as a submodule, where the binary directory is not underneath the RMM
+# source directory.
+set(RMM_API_H_PATH "${CMAKE_CURRENT_SOURCE_DIR}/include/rmm/rmm_api.h")
+# Input is a template which assigns MEMORY_H_PATH to a variable. This command replaces the variable with its value
+# and saves the file as librmm_build.py
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/python/librmm_cffi/librmm_build.py.in" 
+               "${CMAKE_CURRENT_SOURCE_DIR}/python/librmm_cffi/librmm_build.py")
+
+add_custom_command(OUTPUT RMM_PYTHON_CFFI
                    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
-                   COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_BINARY_DIR}/../python ${CMAKE_BINARY_DIR}/python
-                   COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/python python setup.py build_ext --inplace
+                   COMMAND test ! -e ${CMAKE_BINARY_DIR}/rmm/librmm.so || ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/rmm/librmm.so ${CMAKE_BINARY_DIR}/librmm.so
+                   COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/librmm_cffi ${CMAKE_BINARY_DIR}/python/librmm_cffi
+                   COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/tests ${CMAKE_BINARY_DIR}/python/tests
+                   COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py ${CMAKE_BINARY_DIR}/python/rmm_setup.py
+                   COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/python python rmm_setup.py build_ext --inplace
                    VERBATIM)
 
-add_custom_target(python_cffi DEPENDS rmm PYTHON_CFFI)
+add_custom_target(rmm_python_cffi DEPENDS rmm RMM_PYTHON_CFFI)
 
 ###################################################################################################
 # - install targets -------------------------------------------------------------------------------
 
 install(TARGETS rmm
         DESTINATION lib)
 
-install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/rmm
         DESTINATION include)
 
-add_custom_command(OUTPUT INSTALL_PYTHON_CFFI
+add_custom_command(OUTPUT RMM_INSTALL_PYTHON_CFFI
                    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
-                   COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/python python setup.py install --single-version-externally-managed --record=record.txt
+                   COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}/python python rmm_setup.py install --single-version-externally-managed --record=record.txt
                    VERBATIM)
 
-add_custom_target(install_python DEPENDS rmm PYTHON_CFFI INSTALL_PYTHON_CFFI)
-
-
-
-
-
-
+add_custom_target(rmm_install_python DEPENDS rmm RMM_PYTHON_CFFI RMM_INSTALL_PYTHON_CFFI)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,53 @@
+# Contributing to RMM
+
+If you are interested in contributing to cuDF, your contributions will fall
+into three categories:
+1. You want to report a bug, feature request, or documentation issue
+    - File an [issue](https://github.com/rapidsai/rmm/issues/new/choose)
+    describing what you encountered or what you want to see changed.
+    - The RAPIDS team will evaluate the issues and triage them, scheduling
+    them for a release. If you believe the issue needs priority attention
+    comment on the issue to notify the team.
+2. You want to propose a new Feature and implement it
+    - Post about your intended feature, and we shall discuss the design and
+    implementation.
+    - Once we agree that the plan looks good, go ahead and implement it, using
+    the [code contributions](#code-contributions) guide below.
+3. You want to implement a feature or bug-fix for an outstanding issue
+    - Follow the [code contributions](#code-contributions) guide below.
+    - If you need more context on a particular issue, please ask and we shall
+    provide.
+
+## Code contributions
+
+### Your first issue
+
+1. Read the project's [README.md](https://github.com/rapidsai/rmm/blob/master/README.md)
+    to learn how to setup the development environment
+2. Find an issue to work on. The best way is to look for the [good first issue](https://github.com/rapidsai/rmm/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
+    or [help wanted](https://github.com/rapidsai/rmm/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels
+3. Comment on the issue saying you are going to work on it
+4. Code! Make sure to update unit tests!
+5. When done, [create your pull request](https://github.com/rapidsai/rmm/compare)
+6. Verify that CI passes all [status checks](https://help.github.com/articles/about-status-checks/). Fix if needed
+7. Wait for other developers to review your code and update code as needed
+8. Once reviewed and approved, a RAPIDS developer will merge your pull request
+
+Remember, if you are unsure about anything, don't hesitate to comment on issues
+and ask for clarifications!
+
+### Seasoned developers
+
+Once you have gotten your feet wet and are more comfortable with the code, you
+can look at the prioritized issues of our next release in our [project boards](https://github.com/rapidsai/rmm/projects).
+
+> **Pro Tip:** Always look at the release board with the highest number for
+issues to work on. This is where RAPIDS developers also focus their efforts.
+
+Look at the unassigned issues, and find an issue you are comfortable with
+contributing to. Start with _Step 3_ from above, commenting on the issue to let
+others know you are working on it. If you have any questions related to the
+implementation of the issue, ask them in the issue instead of the PR.
+
+## Attribution
+Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md
diff --git a/README.md b/README.md
@@ -2,20 +2,24 @@
 
 RAPIDS Memory Manager (RMM) is:
 
- - A replacement allocator for CUDA Device Memory.
+ - A replacement allocator for CUDA Device Memory (and CUDA Managed Memory).
  - A pool allocator to make CUDA device memory allocation / deallocation faster
    and asynchronous.
- - A central place for all device memory allocations in cuDF (C++ and Python).
+ - A central place for all device memory allocations in cuDF (C++ and Python) and
+   other [RAPIDS](https://rapids.ai) libraries.
 
 RMM is not:
- - A replacement allocator for CUDA managed memory (Unified Memory, 
-   e.g. `cudaMallocManaged`). This may change in the future.
+
  - A replacement allocator for host memory (`malloc`, `new`, `cudaMallocHost`, 
    `cudaHostRegister`).
 
+**NOTE:** For the latest stable [README.md](https://github.com/rapidsai/rmm/blob/master/README.md) ensure you are on the `master` branch.
+
 ## Install RMM
 
-RMM currently must be built from source.
+RMM currently must be built from source. This happens automatically in a 
+submodule when you build or install [cuDF](https://github.com/rapidsai/cudf) or
+[RAPIDS](https://rapids.ai) containers.
 
 ## Building from Source
 
@@ -72,9 +76,9 @@ $ make test
 
 - Build, install, and test cffi bindings:
 ```bash
-$ make python_cffi                                  # build CFFI bindings for librmm.so
-$ make install_python                               # build & install CFFI python bindings. Depends on cffi package from PyPi or Conda
-$ cd python && py.test -v                           # optional, run python tests on low-level python bindings
+$ make rmm_python_cffi                              # build CFFI bindings for librmm.so
+$ make rmm_install_python                           # build & install CFFI python bindings. Depends on cffi package from PyPi or Conda
+$ cd python && pytest -v                            # optional, run python tests on low-level python bindings
 ```
 
 Done! You are ready to develop for the RMM OSS project.
@@ -97,7 +101,7 @@ cudaError_t result = cudaMalloc(&myvar, size_in_bytes) );
 cudaError_t result = cudaFree(myvar) );
 
 // new
-rmmError_t result = RMMM_ALLOC(&myvar, size_in_bytes, stream_id);
+rmmError_t result = RMM_ALLOC(&myvar, size_in_bytes, stream_id);
 // ...
 rmmError_t result = RMM_FREE(myvar, stream_id);
 ```
@@ -117,8 +121,6 @@ situations:
 
 RMM includes a custom Thrust allocator in the file `thrust_rmm_allocator.h`. This defines the template class `rmm_allocator`, and 
 a custom Thrust CUDA device execution policy called `rmm::exec_policy(stream)`.
-This instructs Thrust to use RMM for temporary memory allocation and execute on 
-the specified `stream`.
 
 #### Thrust Device Vectors
 
@@ -140,14 +142,21 @@ For convenience, you can use the alias `rmm::device_vector<T>` defined in
 #### Thrust Algorithms
 
 To instruct Thrust to use RMM to allocate temporary storage, you can use the custom
-Thrust CUDA device execution policy: `rmm::exec_policy(stream)`. This instructs 
-Thrust to use RMM for temporary memory allocation and execute on the specified `stream`.
+Thrust CUDA device execution policy: `rmm::exec_policy(stream)`. 
+This instructs Thrust to use the `rmm_allocator` on the specified stream for temporary memory allocation. 
+
+`rmm::exec_policy(stream)` returns a `std::unique_ptr` to a Thrust execution policy that uses `rmm_allocator` for temporary allocations.
+In order to specify that the Thrust algorithm be executed on a specific stream, the usage is:
 
-Example usage:
 ```
-thrust::sort(rmm::exec_policy(stream), ...);
+thrust::sort(rmm::exec_policy(stream)->on(stream), ...);
 ```
 
+The first `stream` argument is the `stream` to use for `rmm_allocator`. 
+The second `stream` argument is what should be used to execute the Thrust algorithm.
+These two arguments must be identical.
+
+
 ## Using RMM in Python Code
 
 cuDF and other Python libraries typically create arrays of CUDA device memory
@@ -209,9 +218,10 @@ experimental pool allocator using the `librmm_config` module.
 ```
 from librmm_cffi import librmm_config as rmm_cfg
 
-rmm_cfg.use_pool_allocator = True # default is False
-rmm_cfg.initial_pool_size = 2<<30 # set to 2GiB. Default is 1/2 total GPU memory
-rmm_cfg.enable_logging = True     # default is False -- has perf overhead
+rmm_cfg.use_pool_allocator = True  # default is False
+rmm_cfg.initial_pool_size = 2<<30  # set to 2GiB. Default is 1/2 total GPU memory
+rmm_cfg.use_managed_memory = False # default is false
+rmm_cfg.enable_logging = True      # default is False -- has perf overhead
 ```
 
 To configure RMM options to be used in cuDF before loading, simply do the above 
@@ -251,3 +261,21 @@ cuDF operations with device-memory-intensive computations that don't use RMM
 finalize RMM. The Mortgage E2E workflow notebook uses this technique. We are 
 working on better ways to reclaim memory, as well as making RAPIDS machine
 learning libraries use the same RMM memory pool.
+
+### CUDA Managed Memory
+
+RMM can be set to allocate all memory as managed memory (`cudaMallocManaged`
+underlying allocator). This is enabled in C++ by setting the `allocation_mode`
+member of the struct `rmmOptions_t` to include the flag `CudaManagedMemory`
+(the flags are ORed), and passing it to `rmmInitialize()`. If the flag
+`PoolAllocation` is also set, then RMM will allocate from a pool of managed
+memory. 
+
+In Python, use the `librmm_config.use_managed_memory` Boolean setting
+as shown previously. 
+
+When the allocation mode is both `CudaManagedMemory` and `PoolAllocation`, 
+RMM allocates the initial pool (and any expansion allocations) using 
+`cudaMallocManaged` and then prefetches the pool to the GPU using 
+`cudaMemPrefetchAsync` so all pool memory that will fit is initially located
+on the device.