-
Notifications
You must be signed in to change notification settings - Fork 1
/
CMakeLists.txt
216 lines (183 loc) · 9.2 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: © 2023 Aalto University
cmake_minimum_required(VERSION 3.21)
project(jax-chacha20-prng LANGUAGES CXX)
option(BUILD_TESTING "Build tests for native kernels" OFF)
option(FORCE_GENERIC "Build without CPU architecture optimized instructions" OFF)
option(DISABLE_OPENMP "Do not use OpenMP parallelisation for CPU kernels" OFF)
set(HIP_ARCHITECTURES "gfx900 gfx906 gfx908 gfx90a gfx940 gfx1010 gfx1030 gfx1100" CACHE STRING "A list of AMD GPU architectures to built kernels for")
set(CMAKE_CXX_STANDARD 17)
set(cpu_arch_path "${CMAKE_CURRENT_LIST_DIR}/lib/generic/")
set(cpu_arch_def "ARCH_GENERIC")
set(SSE_ENABLED "No")
set(NEON_ENABLED "No")
if(NOT FORCE_GENERIC)
try_run(SSE_RUN_SUCCESS SSE_COMPILE_SUCCESS
${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake_config/sse_test.cpp
#WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/cmake_config/
)
if (NOT SSE_COMPILE_SUCCESS)
# could not compile sse intrinsics, maybe we need the compiler flag?
try_run(SSE_RUN_SUCCESS SSE_COMPILE_SUCCESS
${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake_config/sse_test.cpp
COMPILE_DEFINITIONS "-msse"
)
if (SSE_COMPILE_SUCCESS)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-msse>)
endif()
endif()
if (NOT SSE_COMPILE_SUCCESS)
try_run(NEON_RUN_SUCCESS NEON_COMPILE_SUCCESS
${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake_config/neon_test.cpp
)
if (NOT NEON_COMPILE_SUCCESS)
# could not compile neon intrinsics, maybe we need the compiler flag?
try_run(NEON_RUN_SUCCESS NEON_COMPILE_SUCCESS
${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake_config/neon_test.cpp
COMPILE_DEFINITIONS "-mfpu=neon"
)
if (NEON_COMPILE_SUCCESS)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-mfpu=neon>)
endif()
endif()
endif()
if (SSE_COMPILE_SUCCESS)
set(cpu_arch_path "${CMAKE_CURRENT_LIST_DIR}/lib/intel/")
set(cpu_arch_def "ARCH_INTEL")
set(SSE_ENABLED "Yes")
if (NOT SSE_RUN_SUCCESS EQUAL 0)
message(WARNING "Can compile with Intel SSE instructions but failed a test run (cross-compiling?); will compile with SSE anyways - set FORCE_GENERIC to disable")
endif()
endif()
if (NEON_COMPILE_SUCCESS)
set(cpu_arch_path "${CMAKE_CURRENT_LIST_DIR}/lib/arm/")
set(cpu_arch_def "ARCH_ARM")
set(NEON_ENABLED "Yes")
if (NOT NEON_RUN_SUCCESS EQUAL 0)
message(WARNING "Can compile with ARM Neon instructions but failed a test run (cross-compiling?); will compile with Neon anyways - set FORCE_GENERIC to disable")
endif()
endif()
endif()
message("-- Detected architecture - " ${CMAKE_SYSTEM_PROCESSOR})
message("-- SSE instructions enabled - ${SSE_ENABLED}")
message("-- ARM Neon instructions enabled - ${NEON_ENABLED}")
# native kernels require pybind11; however, if it is installed from pip, it has issues
# in detecting the Python interpreter and libraries during CMake build in a conda env,
# despite what the docs say. We instead check out pybind11 as a submodule.
# initialize/update pybind11 submodule
find_package(Git QUIET)
if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
option(GIT_SUBMODULE_UPDATE "Check submodules during build" ON)
if(GIT_SUBMODULE_UPDATE)
message(STATUS "Initializing/updating pybind11 dependency...")
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE GIT_SUBMOD_RESULT)
if(NOT GIT_SUBMOD_RESULT EQUAL "0")
message(FATAL_ERROR "git submodule update --init --recursive failed with ${GIT_SUBMOD_RESULT}.")
endif()
else()
message(STATUS "Not updating submodules since GIT_SUBMODULE_UPDATE was OFF.")
endif()
endif()
if(NOT EXISTS "${PROJECT_SOURCE_DIR}/extern/pybind11/CMakeLists.txt")
message(FATAL_ERROR "The pybind11 repository was not downloaded! Please run manually: git submodule update --init --recursive .")
endif()
find_package(Python3 COMPONENTS Interpreter Development.Module REQUIRED)
add_subdirectory(extern/pybind11)
# find_package(pybind11 REQUIRED) # broken; does not use correct Python interpreter/libraries in a conda env
if(NOT DISABLE_OPENMP)
find_package(OpenMP)
if(OpenMP_CXX_FOUND)
add_compile_definitions(OPENMP_AVAILABLE)
else()
message(WARNING "OpenMP not found - Compiling without, but you may see lower performance.")
endif()
else()
message("-- OpenMP disabled by user")
endif()
include_directories(${CMAKE_CURRENT_LIST_DIR}/lib)
pybind11_add_module(
native
${CMAKE_CURRENT_LIST_DIR}/lib/cpu_kernel.cpp
${CMAKE_CURRENT_LIST_DIR}/lib/python_bindings.cpp NO_EXTRAS)
# NO_EXTRAS prevents pybind11 from stripping symbols; for some reason for HIP compilation this strips all
# pybind symbols as well, which means the model cannot be loaded from Python
target_include_directories(native PRIVATE ${cpu_arch_path})
target_compile_definitions(native PRIVATE ${cpu_arch_def})
target_link_options(native PRIVATE "-s") # manually strip symbols from module; this does not run into the same
# problem as letting Pybind do it for some reason...
if(OpenMP_CXX_FOUND)
target_link_libraries(native PUBLIC OpenMP::OpenMP_CXX)
endif()
if (BUILD_TESTING)
add_executable(cpu_kernel_tests
${CMAKE_CURRENT_LIST_DIR}/tests/lib/cpu_kernel_tests.cpp
${CMAKE_CURRENT_LIST_DIR}/lib/cpu_kernel.cpp)
target_include_directories(cpu_kernel_tests PRIVATE
${CMAKE_CURRENT_LIST_DIR}/lib/
${cpu_arch_path})
target_compile_definitions(cpu_kernel_tests PRIVATE ${cpu_arch_def})
if (OpenMP_CXX_FOUND)
target_link_libraries(cpu_kernel_tests PUBLIC OpenMP::OpenMP_CXX)
endif()
endif()
include(CheckLanguage)
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
enable_language(CUDA)
target_compile_definitions(native PRIVATE CUDA_ENABLED)
target_sources(native PRIVATE ${CMAKE_CURRENT_LIST_DIR}/lib/gpu_kernel.gpu.cpp)
set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/lib/gpu_kernel.gpu.cpp PROPERTIES LANGUAGE CUDA)
set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/lib/python_bindings.cpp PROPERTIES LANGUAGE CUDA)
# compile for lowest compute capability supported by XLA: 3.5
# cf. https://www.tensorflow.org/install/gpu#hardware_requirements
set_property(TARGET native PROPERTY CUDA_ARCHITECTURES 35)
if (BUILD_TESTING)
add_executable(gpu_kernel_tests
${CMAKE_CURRENT_LIST_DIR}/tests/lib/gpu_kernel_tests.gpu.cpp
${CMAKE_CURRENT_LIST_DIR}/lib/gpu_kernel.gpu.cpp)
set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/tests/lib/gpu_kernel_tests.gpu.cpp PROPERTIES LANGUAGE CUDA)
target_compile_definitions(gpu_kernel_tests PRIVATE CUDA_ENABLED)
target_include_directories(gpu_kernel_tests PRIVATE
${CMAKE_CURRENT_LIST_DIR}/lib/)
target_compile_definitions(gpu_kernel_tests PRIVATE ${cpu_arch_def})
set_property(TARGET gpu_kernel_tests PROPERTY CUDA_ARCHITECTURES 35)
if (OpenMP_CXX_FOUND)
target_link_libraries(gpu_kernel_tests PUBLIC OpenMP::OpenMP_CXX)
endif()
endif()
else()
message(WARNING "CUDA not found - not building CUDA GPU kernels!")
endif()
check_language(HIP)
separate_arguments(HIP_ARCHITECTURE_LIST UNIX_COMMAND ${HIP_ARCHITECTURES})
if (CMAKE_HIP_COMPILER)
enable_language(HIP)
# the following is done for CXX and CUDA automatically by pybind11_add_module, but not yet for HIP, so do it manually
if(NOT DEFINED CMAKE_HIP_VISIBILITY_PRESET)
set_target_properties(native PROPERTIES HIP_VISIBILITY_PRESET "hidden")
endif()
target_compile_definitions(native PRIVATE HIP_ENABLED)
target_sources(native PRIVATE ${CMAKE_CURRENT_LIST_DIR}/lib/gpu_kernel.gpu.cpp)
set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/lib/gpu_kernel.gpu.cpp PROPERTIES LANGUAGE HIP)
set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/lib/python_bindings.cpp PROPERTIES LANGUAGE HIP)
set_property(TARGET native PROPERTY HIP_ARCHITECTURES ${HIP_ARCHITECTURE_LIST})
if (BUILD_TESTING)
add_executable(gpu_kernel_tests
${CMAKE_CURRENT_LIST_DIR}/tests/lib/gpu_kernel_tests.gpu.cpp
${CMAKE_CURRENT_LIST_DIR}/lib/gpu_kernel.gpu.cpp)
set_source_files_properties(${CMAKE_CURRENT_LIST_DIR}/tests/lib/gpu_kernel_tests.gpu.cpp PROPERTIES LANGUAGE HIP)
target_compile_definitions(gpu_kernel_tests PRIVATE HIP_ENABLED)
target_include_directories(gpu_kernel_tests PRIVATE
${CMAKE_CURRENT_LIST_DIR}/lib/)
target_compile_definitions(gpu_kernel_tests PRIVATE ${cpu_arch_def})
set_property(TARGET gpu_kernel_tests PROPERTY HIP_ARCHITECTURES ${HIP_ARCHITECTURE_LIST})
if (OpenMP_CXX_FOUND)
target_link_libraries(gpu_kernel_tests PUBLIC OpenMP::OpenMP_CXX)
endif()
endif()
else()
message(WARNING "HIP not found - not building AMD GPU kernels!")
endif()
install(TARGETS native DESTINATION chacha)