CUDA: Support nvcc 11.5 new -arch=all|all-major flags
This commit is contained in:
parent
e1acb03cd9
commit
14d8a2768d
@ -20,6 +20,18 @@ variable if it is set when a target is created.
|
||||
The ``CUDA_ARCHITECTURES`` target property must be set to a non-empty value on targets
|
||||
that compile CUDA sources, or it is an error. See policy :policy:`CMP0104`.
|
||||
|
||||
.. versionadded:: 3.23
|
||||
|
||||
The ``CUDA_ARCHITECTURES`` may be set to the following special keywords:
|
||||
|
||||
``all``
|
||||
Requires NVIDIA 11.5+. Will compile for all supported major and minor real
|
||||
architectures, and the highest major virtual architecture.
|
||||
|
||||
``all-major``
|
||||
Requires NVIDIA 11.5+. Will compile for all supported major real
|
||||
architectures, and the highest major virtual architecture.
|
||||
|
||||
Examples
|
||||
^^^^^^^^
|
||||
|
||||
|
10
Help/release/dev/cuda-new-arch-modes.rst
Normal file
10
Help/release/dev/cuda-new-arch-modes.rst
Normal file
@ -0,0 +1,10 @@
|
||||
cuda-new-arch-modes
|
||||
-------------------
|
||||
|
||||
* The :prop_tgt:`CUDA_ARCHITECTURES` target property now supports the
|
||||
`all`, and `all-major` values when the CUDA compiler id is ``NVIDIA``,
|
||||
and version is 11.5+.
|
||||
|
||||
* The :variable:`CMAKE_CUDA_ARCHITECTURES` variable now supports the
|
||||
`all`, and `all-major` values when the `CUDA` compiler id is ``NVIDIA``,
|
||||
and version is 11.5+.
|
@ -258,13 +258,22 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
|
||||
|
||||
# Append user-specified architectures.
|
||||
if(CMAKE_CUDA_ARCHITECTURES)
|
||||
foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
|
||||
# Strip specifiers as PTX vs binary doesn't matter.
|
||||
string(REGEX MATCH "[0-9]+" arch_name "${arch}")
|
||||
string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}")
|
||||
string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}")
|
||||
list(APPEND tested_architectures "${arch_name}")
|
||||
endforeach()
|
||||
if("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall")
|
||||
string(APPEND nvcc_test_flags " -arch=all")
|
||||
set(architectures_mode all)
|
||||
elseif("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall-major")
|
||||
string(APPEND nvcc_test_flags " -arch=all-major")
|
||||
set(architectures_mode all-major)
|
||||
else()
|
||||
set(architectures_mode explicit)
|
||||
foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
|
||||
# Strip specifiers as PTX vs binary doesn't matter.
|
||||
string(REGEX MATCH "[0-9]+" arch_name "${arch}")
|
||||
string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}")
|
||||
string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}")
|
||||
list(APPEND tested_architectures "${arch_name}")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# If the user has specified architectures we'll want to fail during compiler detection if they don't work.
|
||||
set(CMAKE_CUDA_COMPILER_ID_REQUIRE_SUCCESS ON)
|
||||
@ -597,7 +606,18 @@ if(DEFINED detected_architecture AND "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
|
||||
if(NOT CMAKE_CUDA_ARCHITECTURES)
|
||||
message(FATAL_ERROR "Failed to find a working CUDA architecture.")
|
||||
endif()
|
||||
elseif(architectures)
|
||||
elseif(architectures AND (architectures_mode STREQUAL "xall" OR
|
||||
architectures_mode STREQUAL "xall-major"))
|
||||
if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
|
||||
message(FATAL_ERROR
|
||||
"The CMAKE_CUDA_ARCHITECTURES:\n"
|
||||
" ${CMAKE_CUDA_ARCHITECTURES}\n"
|
||||
"is not supported with the ${CMAKE_CUDA_COMPILER_ID} compiler. Try:\n"
|
||||
" ${architectures}\n"
|
||||
"instead.")
|
||||
endif()
|
||||
|
||||
elseif(architectures AND architectures_mode STREQUAL "xexplicit")
|
||||
# Sort since order mustn't matter.
|
||||
list(SORT architectures)
|
||||
list(SORT tested_architectures)
|
||||
@ -630,5 +650,7 @@ unset(_CUDA_LIBRARY_DIR)
|
||||
unset(_CUDA_TARGET_DIR)
|
||||
unset(_CUDA_TARGET_NAME)
|
||||
|
||||
unset(architectures_mode)
|
||||
|
||||
set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
|
||||
set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX")
|
||||
|
@ -3317,6 +3317,22 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
|
||||
return;
|
||||
}
|
||||
|
||||
std::string const& compiler =
|
||||
this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
|
||||
|
||||
// Check for special modes: `all`, `all-major`.
|
||||
if (property == "all") {
|
||||
if (compiler == "NVIDIA") {
|
||||
flags += " -arch=all";
|
||||
return;
|
||||
}
|
||||
} else if (property == "all-major") {
|
||||
if (compiler == "NVIDIA") {
|
||||
flags += " -arch=all-major";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
struct CudaArchitecture
|
||||
{
|
||||
std::string name;
|
||||
@ -3358,9 +3374,6 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
|
||||
}
|
||||
}
|
||||
|
||||
std::string const& compiler =
|
||||
this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
|
||||
|
||||
if (compiler == "NVIDIA") {
|
||||
for (CudaArchitecture& architecture : architectures) {
|
||||
flags +=
|
||||
|
44
Tests/CudaOnly/All/CMakeLists.txt
Normal file
44
Tests/CudaOnly/All/CMakeLists.txt
Normal file
@ -0,0 +1,44 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
project(CudaOnlyAll CUDA)
|
||||
|
||||
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND
|
||||
CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.5.0)
|
||||
|
||||
set(compile_options -Wno-deprecated-gpu-targets)
|
||||
function(verify_output flag output_var)
|
||||
string(REGEX MATCHALL "-arch compute_([0-9]+)" target_archs "${${output_var}}")
|
||||
list(LENGTH target_archs count)
|
||||
if(count LESS 2)
|
||||
message(FATAL_ERROR "${flag} failed to map to multiple architectures")
|
||||
endif()
|
||||
endfunction()
|
||||
endif()
|
||||
|
||||
if(COMMAND verify_output)
|
||||
set(try_compile_flags -v ${compile_options})
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES all)
|
||||
try_compile(all_archs_compiles
|
||||
${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_archs_compiles
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/main.cu
|
||||
COMPILE_DEFINITIONS ${try_compile_flags}
|
||||
OUTPUT_VARIABLE output
|
||||
)
|
||||
verify_output(all output)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES all-major)
|
||||
try_compile(all_major_archs_compiles
|
||||
${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_major_archs_compiles
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/main.cu
|
||||
COMPILE_DEFINITIONS ${try_compile_flags}
|
||||
OUTPUT_VARIABLE output
|
||||
)
|
||||
verify_output(all-major output)
|
||||
|
||||
if(all_archs_compiles AND all_major_archs_compiles)
|
||||
add_executable(CudaOnlyAll main.cu)
|
||||
target_compile_options(CudaOnlyAll PRIVATE ${compile_options})
|
||||
endif()
|
||||
else()
|
||||
add_executable(CudaOnlyAll main.cu)
|
||||
endif()
|
3
Tests/CudaOnly/All/main.cu
Normal file
3
Tests/CudaOnly/All/main.cu
Normal file
@ -0,0 +1,3 @@
|
||||
int main()
|
||||
{
|
||||
}
|
@ -4,8 +4,10 @@ macro (add_cuda_test_macro name)
|
||||
PROPERTY LABELS "CUDA")
|
||||
endmacro ()
|
||||
|
||||
add_cuda_test_macro(CudaOnly.All CudaOnlyAll)
|
||||
add_cuda_test_macro(CudaOnly.Architecture Architecture)
|
||||
add_cuda_test_macro(CudaOnly.CompileFlags CudaOnlyCompileFlags)
|
||||
|
||||
add_cuda_test_macro(CudaOnly.EnableStandard CudaOnlyEnableStandard)
|
||||
add_cuda_test_macro(CudaOnly.ExportPTX CudaOnlyExportPTX)
|
||||
add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit)
|
||||
|
Loading…
Reference in New Issue
Block a user