Merge topic 'cuda_add_lto_support'

96bc59b1ca CUDA: Add Device LTO support for nvcc
1527d48cd0 CheckIPO: Refactor logic selecting test source files
4a552ab4ad remove unused variables
6eda92d037 remove unused variable

Acked-by: Kitware Robot <kwrobot@kitware.com>
Merge-request: !7389
This commit is contained in:
Brad King 2022-08-03 13:42:05 +00:00 committed by Kitware Robot
commit f6917a2f1f
29 changed files with 412 additions and 63 deletions

View File

@ -0,0 +1,7 @@
cuda-device-lto
---------------
* ``CUDA`` language now supports device link time optimization when using
``nvcc``. The :variable:`CMAKE_INTERPROCEDURAL_OPTIMIZATION` variable and
the associated :prop_tgt:`INTERPROCEDURAL_OPTIMIZATION` target property will
activate device LTO.

View File

@ -76,6 +76,23 @@ endmacro()
# Run IPO/LTO test
macro(_ipo_run_language_check language)
set(_C_ext "c")
set(_CXX_ext "cpp")
set(_Fortran_ext "f")
string(COMPARE EQUAL "${language}" "CUDA" is_cuda)
set(ext ${_${language}_ext})
if(NOT "${ext}" STREQUAL "")
set(copy_sources foo.${ext} main.${ext})
elseif(is_cuda)
if(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE)
set("${X_RESULT}" YES PARENT_SCOPE)
endif()
return()
else()
message(FATAL_ERROR "Language not supported")
endif()
set(testdir "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/_CMakeLTOTest-${language}")
file(REMOVE_RECURSE "${testdir}")
@ -100,20 +117,6 @@ macro(_ipo_run_language_check language)
@ONLY
)
string(COMPARE EQUAL "${language}" "C" is_c)
string(COMPARE EQUAL "${language}" "CXX" is_cxx)
string(COMPARE EQUAL "${language}" "Fortran" is_fortran)
if(is_c)
set(copy_sources foo.c main.c)
elseif(is_cxx)
set(copy_sources foo.cpp main.cpp)
elseif(is_fortran)
set(copy_sources foo.f main.f)
else()
message(FATAL_ERROR "Language not supported")
endif()
foreach(x ${copy_sources})
configure_file(
"${try_compile_src}/${x}"
@ -214,6 +217,11 @@ function(check_ipo_supported)
list(APPEND languages "C")
endif()
list(FIND enabled_languages "CUDA" result)
if(NOT result EQUAL -1)
list(APPEND languages "CUDA")
endif()
list(FIND enabled_languages "Fortran" result)
if(NOT result EQUAL -1)
list(APPEND languages "Fortran")
@ -222,7 +230,7 @@ function(check_ipo_supported)
string(COMPARE EQUAL "${languages}" "" no_languages)
if(no_languages)
_ipo_not_supported(
"no C/CXX/Fortran languages found in ENABLED_LANGUAGES global property"
"no C/CXX/CUDA/Fortran languages found in ENABLED_LANGUAGES global property"
)
return()
endif()
@ -230,7 +238,7 @@ function(check_ipo_supported)
set(languages "${X_LANGUAGES}")
set(unsupported_languages "${languages}")
list(REMOVE_ITEM unsupported_languages "C" "CXX" "Fortran")
list(REMOVE_ITEM unsupported_languages "C" "CXX" "CUDA" "Fortran")
string(COMPARE NOTEQUAL "${unsupported_languages}" "" has_unsupported)
if(has_unsupported)
_ipo_not_supported(

View File

@ -35,6 +35,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static")
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart")
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "")
# Clang doesn't support CUDA device LTO
set(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE NO)
set(_CMAKE_CUDA_IPO_MAY_BE_SUPPORTED_BY_COMPILER NO)
if(UNIX)
list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
endif()

View File

@ -48,6 +48,13 @@ if((NOT DEFINED CMAKE_DEPENDS_USE_COMPILER OR CMAKE_DEPENDS_USE_COMPILER)
set(CMAKE_CUDA_DEPENDS_USE_COMPILER TRUE)
endif()
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2)
set(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE YES)
set(_CMAKE_CUDA_IPO_MAY_BE_SUPPORTED_BY_COMPILER YES)
set(CMAKE_CUDA_DEVICE_LINK_OPTIONS_IPO " -dlto")
endif()
if(NOT "x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
set(CMAKE_CUDA_COMPILE_OPTIONS_PIE -Xcompiler=-fPIE)
set(CMAKE_CUDA_COMPILE_OPTIONS_PIC -Xcompiler=-fPIC)
@ -61,6 +68,7 @@ if(NOT "x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL_INIT " -O1 -DNDEBUG")
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT " -O2 -g -DNDEBUG")
endif()
set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared)
set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)

View File

@ -917,11 +917,19 @@ bool cmGeneratorTarget::IsIPOEnabled(std::string const& lang,
return false;
}
if (lang != "C" && lang != "CXX" && lang != "Fortran") {
if (lang != "C" && lang != "CXX" && lang != "CUDA" && lang != "Fortran") {
// We do not define IPO behavior for other languages.
return false;
}
if (lang == "CUDA") {
// CUDA IPO requires both CUDA_ARCHITECTURES and CUDA_SEPARABLE_COMPILATION
if (cmIsOff(this->GetSafeProperty("CUDA_ARCHITECTURES")) ||
cmIsOff(this->GetSafeProperty("CUDA_SEPARABLE_COMPILATION"))) {
return false;
}
}
cmPolicies::PolicyStatus cmp0069 = this->GetPolicyStatusCMP0069();
if (cmp0069 == cmPolicies::OLD || cmp0069 == cmPolicies::WARN) {
@ -3429,7 +3437,9 @@ void cmGeneratorTarget::AddExplicitLanguageFlags(std::string& flags,
"EXPLICIT_LANGUAGE");
}
void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
void cmGeneratorTarget::AddCUDAArchitectureFlags(cmBuildStep compileOrLink,
const std::string& config,
std::string& flags) const
{
std::string property = this->GetSafeProperty("CUDA_ARCHITECTURES");
@ -3461,6 +3471,7 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
std::string const& compiler =
this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
const bool ipoEnabled = this->IsIPOEnabled("CUDA", config);
// Check for special modes: `all`, `all-major`.
if (property == "all" || property == "all-major") {
@ -3540,6 +3551,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
}
if (compiler == "NVIDIA") {
if (ipoEnabled && compileOrLink == cmBuildStep::Link) {
if (cmValue cudaIPOFlags =
this->Makefile->GetDefinition("CMAKE_CUDA_LINK_OPTIONS_IPO")) {
flags += cudaIPOFlags;
}
}
for (CudaArchitecture& architecture : architectures) {
flags +=
" --generate-code=arch=compute_" + architecture.name + ",code=[";
@ -3552,7 +3570,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
}
}
if (architecture.real) {
if (ipoEnabled) {
if (compileOrLink == cmBuildStep::Compile) {
flags += "lto_" + architecture.name;
} else if (compileOrLink == cmBuildStep::Link) {
flags += "sm_" + architecture.name;
}
} else if (architecture.real) {
flags += "sm_" + architecture.name;
}

View File

@ -23,6 +23,7 @@
#include "cmStateTypes.h"
#include "cmValue.h"
enum class cmBuildStep;
class cmComputeLinkInformation;
class cmCustomCommand;
class cmGlobalGenerator;
@ -471,7 +472,9 @@ public:
void AddExplicitLanguageFlags(std::string& flags,
cmSourceFile const& sf) const;
void AddCUDAArchitectureFlags(std::string& flags) const;
void AddCUDAArchitectureFlags(cmBuildStep compileOrLink,
const std::string& config,
std::string& flags) const;
void AddCUDAToolkitFlags(std::string& flags) const;
void AddHIPArchitectureFlags(std::string& flags) const;

View File

@ -183,8 +183,8 @@ void cmGhsMultiTargetGenerator::SetCompilerFlags(std::string const& config,
auto i = this->FlagsByLanguage.find(language);
if (i == this->FlagsByLanguage.end()) {
std::string flags;
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
language, config);
this->LocalGenerator->AddLanguageFlags(
flags, this->GeneratorTarget, cmBuildStep::Compile, language, config);
this->LocalGenerator->AddCMP0018Flags(flags, this->GeneratorTarget,
language, config);
this->LocalGenerator->AddVisibilityPresetFlags(

View File

@ -2368,8 +2368,8 @@ void cmGlobalXCodeGenerator::CreateBuildSettings(cmGeneratorTarget* gtgt,
std::string& flags = cflags[lang];
// Add language-specific flags.
this->CurrentLocalGenerator->AddLanguageFlags(flags, gtgt, lang,
configName);
this->CurrentLocalGenerator->AddLanguageFlags(
flags, gtgt, cmBuildStep::Compile, lang, configName);
if (gtgt->IsIPOEnabled(lang, configName)) {
this->CurrentLocalGenerator->AppendFeatureOptions(flags, lang, "IPO");

View File

@ -57,7 +57,6 @@ bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinking(
// For this we only consider targets
using ItemVector = cmComputeLinkInformation::ItemVector;
ItemVector const& items = cli.GetItems();
std::string config = cli.GetConfig();
return std::any_of(
items.begin(), items.end(),
[](cmComputeLinkInformation::Item const& item) -> bool {
@ -69,6 +68,26 @@ bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinking(
});
}
bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinkingIPOFlag(
cmComputeLinkInformation& cli)
{
// Determine if this item might requires device linking.
// For this we only consider targets
using ItemVector = cmComputeLinkInformation::ItemVector;
ItemVector const& items = cli.GetItems();
std::string config = cli.GetConfig();
return std::any_of(
items.begin(), items.end(),
[config](cmComputeLinkInformation::Item const& item) -> bool {
return item.Target &&
item.Target->GetType() == cmStateEnums::STATIC_LIBRARY &&
// this dependency requires us to device link it
!item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS") &&
item.Target->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION") &&
item.Target->IsIPOEnabled("CUDA", config);
});
}
void cmLinkLineDeviceComputer::ComputeLinkLibraries(
cmComputeLinkInformation& cli, std::string const& stdLibString,
std::vector<BT<std::string>>& linkLibraries)

View File

@ -30,6 +30,7 @@ public:
delete;
bool ComputeRequiresDeviceLinking(cmComputeLinkInformation& cli);
bool ComputeRequiresDeviceLinkingIPOFlag(cmComputeLinkInformation& cli);
void ComputeLinkLibraries(
cmComputeLinkInformation& cli, std::string const& stdLibString,

View File

@ -36,6 +36,7 @@
#include "cmInstallScriptGenerator.h"
#include "cmInstallTargetGenerator.h"
#include "cmLinkLineComputer.h"
#include "cmLinkLineDeviceComputer.h"
#include "cmMakefile.h"
#include "cmRange.h"
#include "cmRulePlaceholderExpander.h"
@ -1381,7 +1382,7 @@ std::vector<BT<std::string>> cmLocalGenerator::GetStaticLibraryFlags(
}
void cmLocalGenerator::GetDeviceLinkFlags(
cmLinkLineComputer& linkLineComputer, const std::string& config,
cmLinkLineDeviceComputer& linkLineComputer, const std::string& config,
std::string& linkLibs, std::string& linkFlags, std::string& frameworkPath,
std::string& linkPath, cmGeneratorTarget* target)
{
@ -1389,6 +1390,18 @@ void cmLocalGenerator::GetDeviceLinkFlags(
cmComputeLinkInformation* pcli = target->GetLinkInformation(config);
auto linklang = linkLineComputer.GetLinkerLanguage(target, config);
auto ipoEnabled = target->IsIPOEnabled(linklang, config);
if (!ipoEnabled) {
ipoEnabled = linkLineComputer.ComputeRequiresDeviceLinkingIPOFlag(*pcli);
}
if (ipoEnabled) {
if (cmValue cudaIPOFlags = this->Makefile->GetDefinition(
"CMAKE_CUDA_DEVICE_LINK_OPTIONS_IPO")) {
linkFlags += cudaIPOFlags;
}
}
if (pcli) {
// Compute the required device link libraries when
// resolving gpu lang device symbols
@ -1396,6 +1409,8 @@ void cmLocalGenerator::GetDeviceLinkFlags(
linkPath);
}
// iterate link deps and see if any of them need IPO
std::vector<std::string> linkOpts;
target->GetLinkOptions(linkOpts, config, "CUDA");
// LINK_OPTIONS are escaped.
@ -1590,7 +1605,8 @@ std::vector<BT<std::string>> cmLocalGenerator::GetTargetCompileFlags(
cmMakefile* mf = this->GetMakefile();
// Add language-specific flags.
this->AddLanguageFlags(compileFlags, target, lang, config);
this->AddLanguageFlags(compileFlags, target, cmBuildStep::Compile, lang,
config);
if (target->IsIPOEnabled(lang, config)) {
this->AppendFeatureOptions(compileFlags, lang, "IPO");
@ -1903,6 +1919,7 @@ void cmLocalGenerator::AddArchitectureFlags(std::string& flags,
void cmLocalGenerator::AddLanguageFlags(std::string& flags,
cmGeneratorTarget const* target,
cmBuildStep compileOrLink,
const std::string& lang,
const std::string& config)
{
@ -1926,7 +1943,7 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags,
}
}
} else if (lang == "CUDA") {
target->AddCUDAArchitectureFlags(flags);
target->AddCUDAArchitectureFlags(compileOrLink, config, flags);
target->AddCUDAToolkitFlags(flags);
} else if (lang == "ISPC") {
target->AddISPCTargetFlags(flags);
@ -2038,7 +2055,7 @@ void cmLocalGenerator::AddLanguageFlagsForLinking(
this->AddCompilerRequirementFlag(flags, target, lang, config);
}
this->AddLanguageFlags(flags, target, lang, config);
this->AddLanguageFlags(flags, target, cmBuildStep::Link, lang, config);
if (target->IsIPOEnabled(lang, config)) {
this->AppendFeatureOptions(flags, lang, "IPO");

View File

@ -35,6 +35,7 @@ class cmGeneratorTarget;
class cmGlobalGenerator;
class cmImplicitDependsList;
class cmLinkLineComputer;
class cmLinkLineDeviceComputer;
class cmMakefile;
class cmRulePlaceholderExpander;
class cmSourceFile;
@ -59,6 +60,13 @@ enum class cmDependencyScannerKind
Compiler
};
/** What to compute language flags for */
enum class cmBuildStep
{
Compile,
Link
};
/** Target and source file which have a specific output. */
struct cmSourcesWithOutput
{
@ -143,7 +151,8 @@ public:
const std::string& filterArch = std::string());
void AddLanguageFlags(std::string& flags, cmGeneratorTarget const* target,
const std::string& lang, const std::string& config);
cmBuildStep compileOrLink, const std::string& lang,
const std::string& config);
void AddLanguageFlagsForLinking(std::string& flags,
cmGeneratorTarget const* target,
const std::string& lang,
@ -476,7 +485,7 @@ public:
/** Fill out these strings for the given target. Libraries to link,
* flags, and linkflags. */
void GetDeviceLinkFlags(cmLinkLineComputer& linkLineComputer,
void GetDeviceLinkFlags(cmLinkLineDeviceComputer& linkLineComputer,
const std::string& config, std::string& linkLibs,
std::string& linkFlags, std::string& frameworkPath,
std::string& linkPath, cmGeneratorTarget* target);

View File

@ -680,7 +680,8 @@ void cmLocalVisualStudio7Generator::WriteConfiguration(
langForClCompile = linkLanguage;
if (langForClCompile == "C" || langForClCompile == "CXX" ||
langForClCompile == "Fortran") {
this->AddLanguageFlags(flags, target, langForClCompile, configName);
this->AddLanguageFlags(flags, target, cmBuildStep::Compile,
langForClCompile, configName);
}
// set the correct language
if (linkLanguage == "C") {

View File

@ -136,17 +136,11 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
std::vector<std::string> depends;
this->AppendLinkDepends(depends, linkLanguage);
// Build a list of compiler flags and linker flags.
std::string langFlags;
std::string linkFlags;
// Add language feature flags.
std::string langFlags;
this->LocalGenerator->AddLanguageFlagsForLinking(
langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
// Add device-specific linker flags.
this->GetDeviceLinkFlags(linkFlags, linkLanguage);
// Construct a list of files associated with this executable that
// may need to be cleaned.
std::vector<std::string> exeCleanFiles;
@ -173,13 +167,20 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
// Set path conversion for link script shells.
this->LocalGenerator->SetLinkScriptShell(useLinkScript);
std::unique_ptr<cmLinkLineComputer> linkLineComputer(
std::unique_ptr<cmLinkLineDeviceComputer> linkLineComputer(
new cmLinkLineDeviceComputer(
this->LocalGenerator,
this->LocalGenerator->GetStateSnapshot().GetDirectory()));
linkLineComputer->SetForResponse(useResponseFileForLibs);
linkLineComputer->SetRelink(relink);
// Create set of linking flags.
std::string linkFlags;
std::string ignored_;
this->LocalGenerator->GetDeviceLinkFlags(
*linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_,
ignored_, this->GeneratorTarget);
// Collect up flags to link in needed libraries.
std::string linkLibs;
this->CreateLinkLibs(

View File

@ -287,10 +287,6 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
this->LocalGenerator->AddLanguageFlagsForLinking(
langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
// Create set of linking flags.
std::string linkFlags;
this->GetDeviceLinkFlags(linkFlags, linkLanguage);
// Clean files associated with this library.
std::set<std::string> libCleanFiles;
libCleanFiles.insert(
@ -315,13 +311,20 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
// Collect up flags to link in needed libraries.
std::string linkLibs;
std::unique_ptr<cmLinkLineComputer> linkLineComputer(
std::unique_ptr<cmLinkLineDeviceComputer> linkLineComputer(
new cmLinkLineDeviceComputer(
this->LocalGenerator,
this->LocalGenerator->GetStateSnapshot().GetDirectory()));
linkLineComputer->SetForResponse(useResponseFileForLibs);
linkLineComputer->SetRelink(relink);
// Create set of linking flags.
std::string linkFlags;
std::string ignored_;
this->LocalGenerator->GetDeviceLinkFlags(
*linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_,
ignored_, this->GeneratorTarget);
this->CreateLinkLibs(
linkLineComputer.get(), linkLibs, useResponseFileForLibs, depends,
cmMakefileTargetGenerator::ResponseFlagFor::DeviceLink);

View File

@ -537,7 +537,6 @@ std::vector<std::string> cmNinjaNormalTargetGenerator::ComputeDeviceLinkCmd()
// this target requires separable cuda compilation
// now build the correct command depending on if the target is
// an executable or a dynamic library.
std::string linkCmd;
switch (this->GetGeneratorTarget()->GetType()) {
case cmStateEnums::STATIC_LIBRARY:
case cmStateEnums::SHARED_LIBRARY:

View File

@ -3137,6 +3137,7 @@ bool cmVisualStudio10TargetGenerator::ComputeClOptions(
this->LangForClCompile = langForClCompile;
if (!langForClCompile.empty()) {
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
cmBuildStep::Compile,
langForClCompile, configName);
this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget,
langForClCompile, configName);
@ -3521,8 +3522,8 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
// Get compile flags for CUDA in this directory.
std::string flags;
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, "CUDA",
configName);
this->LocalGenerator->AddLanguageFlags(
flags, this->GeneratorTarget, cmBuildStep::Compile, "CUDA", configName);
this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget, "CUDA",
configName);
@ -3793,7 +3794,8 @@ bool cmVisualStudio10TargetGenerator::ComputeMasmOptions(
std::string flags;
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
"ASM_MASM", configName);
cmBuildStep::Compile, "ASM_MASM",
configName);
masmOptions.Parse(flags);
@ -3845,7 +3847,8 @@ bool cmVisualStudio10TargetGenerator::ComputeNasmOptions(
std::string flags;
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
"ASM_NASM", configName);
cmBuildStep::Compile, "ASM_NASM",
configName);
flags += " -f";
flags += this->Makefile->GetSafeDefinition("CMAKE_ASM_NASM_OBJECT_FORMAT");
nasmOptions.Parse(flags);

View File

@ -618,6 +618,11 @@ if(BUILD_TESTING)
set(Module.CheckIPOSupported-CXX_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_CXX=${CMake_TEST_IPO_WORKS_CXX})
ADD_TEST_MACRO(Module.CheckIPOSupported-CXX CheckIPOSupported-CXX)
if(CMake_TEST_CUDA)
ADD_TEST_MACRO(Module.CheckIPOSupported-CUDA CheckIPOSupported-CUDA)
set_property(TEST Module.CheckIPOSupported-CUDA APPEND PROPERTY LABELS "CUDA")
endif()
if(CMAKE_Fortran_COMPILER)
set(Module.CheckIPOSupported-Fortran_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_Fortran=${CMake_TEST_IPO_WORKS_Fortran})
ADD_TEST_MACRO(Module.CheckIPOSupported-Fortran CheckIPOSupported-Fortran)

View File

@ -7,7 +7,6 @@ endmacro ()
add_cuda_test_macro(CudaOnly.Architecture Architecture)
add_cuda_test_macro(CudaOnly.ArchSpecial CudaOnlyArchSpecial)
add_cuda_test_macro(CudaOnly.CompileFlags CudaOnlyCompileFlags)
add_cuda_test_macro(CudaOnly.EnableStandard CudaOnlyEnableStandard)
add_cuda_test_macro(CudaOnly.ExportPTX CudaOnlyExportPTX)
add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit)
@ -28,6 +27,19 @@ if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
endif()
# The CUDA only ships the shared version of the toolkit libraries
# on windows
if(NOT WIN32)
add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit)
endif()
add_cuda_test_macro(CudaOnly.DeviceLTO CudaOnlyDeviceLTO)
if(MSVC)
# Tests for features that only work with MSVC
add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB)
endif()
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
--build-and-test
@ -41,16 +53,6 @@ add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND
PROPERTY LABELS "CUDA")
# The CUDA only ships the shared version of the toolkit libraries
# on windows
if(NOT WIN32)
add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit)
endif()
if(MSVC)
add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB)
endif()
add_test(NAME CudaOnly.RuntimeControls COMMAND
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
--build-and-test

View File

@ -0,0 +1,37 @@
cmake_minimum_required(VERSION 3.18)
project(DeviceLTO CUDA)
# Goal:
# Verify that we correctly compile with device LTO
# Verify that device LTO requirements are propagated to
# the final device link line
add_library(CUDA_dlto STATIC file1.cu file2.cu file3.cu)
add_executable(CudaOnlyDeviceLTO main.cu)
set_target_properties(CUDA_dlto
PROPERTIES
CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON)
set_target_properties(CudaOnlyDeviceLTO
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
)
target_link_libraries(CudaOnlyDeviceLTO PRIVATE CUDA_dlto)
include(CheckIPOSupported)
check_ipo_supported(LANGUAGES CUDA RESULT ipo_supported)
if(ipo_supported)
set_target_properties(CUDA_dlto
PROPERTIES
INTERPROCEDURAL_OPTIMIZATION ON)
# When non-LTO variants (i.e. virtual) are built together with LTO ones the
# linker warns about missing device LTO for the virtual architectures.
# Ignore these warnings.
target_link_options(CudaOnlyDeviceLTO PRIVATE "$<DEVICE_LINK:-w>")
endif()

View File

@ -0,0 +1,17 @@
#ifdef _WIN32
# define EXPORT __declspec(dllexport)
#else
# define EXPORT
#endif
extern __device__ int file2_func(int);
void __global__ kernel(int x)
{
file2_func(x);
}
EXPORT int launch_kernel(int x)
{
kernel<<<1, 1>>>(x);
return x;
}

View File

@ -0,0 +1,5 @@
extern __device__ int file3_func(int);
int __device__ file2_func(int x)
{
return x + file3_func(x);
}

View File

@ -0,0 +1,4 @@
int __device__ file3_func(int x)
{
return x * x * x;
}

View File

@ -0,0 +1,62 @@
#include <iostream>
#include "cuda.h"
#ifdef _WIN32
# define IMPORT __declspec(dllimport)
#else
# define IMPORT
#endif
IMPORT int launch_kernel(int x);
int choose_cuda_device()
{
int nDevices = 0;
cudaError_t err = cudaGetDeviceCount(&nDevices);
if (err != cudaSuccess) {
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
<< std::endl;
return 1;
}
for (int i = 0; i < nDevices; ++i) {
cudaDeviceProp prop;
cudaError_t err = cudaGetDeviceProperties(&prop, i);
if (err != cudaSuccess) {
std::cerr << "Could not retrieve properties from CUDA device " << i
<< std::endl;
return 1;
}
std::cout << "prop.major: " << prop.major << std::endl;
err = cudaSetDevice(i);
if (err != cudaSuccess) {
std::cout << "Could not select CUDA device " << i << std::endl;
} else {
return 0;
}
}
std::cout << "Could not find a CUDA enabled card" << std::endl;
return 1;
}
int main()
{
int ret = choose_cuda_device();
if (ret) {
return 0;
}
cudaError_t err;
launch_kernel(1);
err = cudaGetLastError();
if (err != cudaSuccess) {
std::cerr << "launch_kernel: kernel launch should have passed.\n "
"Error message: "
<< cudaGetErrorString(err) << std::endl;
return 1;
}
return 0;
}

View File

@ -0,0 +1,32 @@
cmake_minimum_required(VERSION 3.8)
project(CheckIPOSupported-CUDA LANGUAGES CUDA)
cmake_policy(SET CMP0069 NEW)
include(CheckIPOSupported)
check_ipo_supported(RESULT ipo_supported OUTPUT ipo_output)
if(ipo_supported)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
endif()
if(NOT ipo_supported AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA"
AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2)
message(FATAL_ERROR "CheckIPOSupported failed to correctly identify NVIDIA CUDA IPO support")
endif()
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
add_library(foo STATIC foo.cu)
set_target_properties(foo PROPERTIES
WINDOWS_EXPORT_ALL_SYMBOLS ON
POSITION_INDEPENDENT_CODE ON)
add_library(bar SHARED bar.cu)
set_target_properties(bar PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
target_link_libraries(bar PRIVATE foo)
add_executable(CheckIPOSupported-CUDA main.cu)
target_link_libraries(CheckIPOSupported-CUDA PUBLIC bar)
enable_testing()
add_test(NAME CheckIPOSupported-CUDA COMMAND CheckIPOSupported-CUDA)

View File

@ -0,0 +1,12 @@
__device__ int foo_func(int);
void __global__ bar_kernel(int x)
{
foo_func(x);
}
int launch_kernel(int x)
{
bar_kernel<<<1, 1>>>(x);
return x;
}

View File

@ -0,0 +1,4 @@
extern __device__ int foo_func(int a)
{
return a * 42 + 9;
}

View File

@ -0,0 +1,62 @@
#include <iostream>
#include "cuda.h"
#ifdef _WIN32
# define IMPORT __declspec(dllimport)
#else
# define IMPORT
#endif
IMPORT int launch_kernel(int x);
int choose_cuda_device()
{
int nDevices = 0;
cudaError_t err = cudaGetDeviceCount(&nDevices);
if (err != cudaSuccess) {
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
<< std::endl;
return 1;
}
for (int i = 0; i < nDevices; ++i) {
cudaDeviceProp prop;
cudaError_t err = cudaGetDeviceProperties(&prop, i);
if (err != cudaSuccess) {
std::cerr << "Could not retrieve properties from CUDA device " << i
<< std::endl;
return 1;
}
std::cout << "prop.major: " << prop.major << std::endl;
err = cudaSetDevice(i);
if (err != cudaSuccess) {
std::cout << "Could not select CUDA device " << i << std::endl;
} else {
return 0;
}
}
std::cout << "Could not find a CUDA enabled card" << std::endl;
return 1;
}
int main()
{
int ret = choose_cuda_device();
if (ret) {
return 0;
}
cudaError_t err;
launch_kernel(1);
err = cudaGetLastError();
if (err != cudaSuccess) {
std::cerr << "launch_kernel: kernel launch should have passed.\n "
"Error message: "
<< cudaGetErrorString(err) << std::endl;
return 1;
}
return 0;
}

View File

@ -1,6 +1,6 @@
^CMake Error at .*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(message\):
IPO is not supported \(no C/CXX/Fortran languages found in ENABLED_LANGUAGES
global property\)\.
IPO is not supported \(no C/CXX/CUDA/Fortran languages found in
ENABLED_LANGUAGES global property\)\.
Call Stack \(most recent call first\):
.*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(_ipo_not_supported\)
default-lang-none\.cmake:[0-9]+ \(check_ipo_supported\)