Merge topic 'cuda_add_lto_support'
96bc59b1ca
CUDA: Add Device LTO support for nvcc1527d48cd0
CheckIPO: Refactor logic selecting test source files4a552ab4ad
remove unused variables6eda92d037
remove unused variable Acked-by: Kitware Robot <kwrobot@kitware.com> Merge-request: !7389
This commit is contained in:
commit
f6917a2f1f
7
Help/release/dev/cuda-device-lto.rst
Normal file
7
Help/release/dev/cuda-device-lto.rst
Normal file
@ -0,0 +1,7 @@
|
||||
cuda-device-lto
|
||||
---------------
|
||||
|
||||
* ``CUDA`` language now supports device link time optimization when using
|
||||
``nvcc``. The :variable:`CMAKE_INTERPROCEDURAL_OPTIMIZATION` variable and
|
||||
the associated :prop_tgt:`INTERPROCEDURAL_OPTIMIZATION` target property will
|
||||
activate device LTO.
|
@ -76,6 +76,23 @@ endmacro()
|
||||
|
||||
# Run IPO/LTO test
|
||||
macro(_ipo_run_language_check language)
|
||||
set(_C_ext "c")
|
||||
set(_CXX_ext "cpp")
|
||||
set(_Fortran_ext "f")
|
||||
string(COMPARE EQUAL "${language}" "CUDA" is_cuda)
|
||||
|
||||
set(ext ${_${language}_ext})
|
||||
if(NOT "${ext}" STREQUAL "")
|
||||
set(copy_sources foo.${ext} main.${ext})
|
||||
elseif(is_cuda)
|
||||
if(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE)
|
||||
set("${X_RESULT}" YES PARENT_SCOPE)
|
||||
endif()
|
||||
return()
|
||||
else()
|
||||
message(FATAL_ERROR "Language not supported")
|
||||
endif()
|
||||
|
||||
set(testdir "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/_CMakeLTOTest-${language}")
|
||||
|
||||
file(REMOVE_RECURSE "${testdir}")
|
||||
@ -100,20 +117,6 @@ macro(_ipo_run_language_check language)
|
||||
@ONLY
|
||||
)
|
||||
|
||||
string(COMPARE EQUAL "${language}" "C" is_c)
|
||||
string(COMPARE EQUAL "${language}" "CXX" is_cxx)
|
||||
string(COMPARE EQUAL "${language}" "Fortran" is_fortran)
|
||||
|
||||
if(is_c)
|
||||
set(copy_sources foo.c main.c)
|
||||
elseif(is_cxx)
|
||||
set(copy_sources foo.cpp main.cpp)
|
||||
elseif(is_fortran)
|
||||
set(copy_sources foo.f main.f)
|
||||
else()
|
||||
message(FATAL_ERROR "Language not supported")
|
||||
endif()
|
||||
|
||||
foreach(x ${copy_sources})
|
||||
configure_file(
|
||||
"${try_compile_src}/${x}"
|
||||
@ -214,6 +217,11 @@ function(check_ipo_supported)
|
||||
list(APPEND languages "C")
|
||||
endif()
|
||||
|
||||
list(FIND enabled_languages "CUDA" result)
|
||||
if(NOT result EQUAL -1)
|
||||
list(APPEND languages "CUDA")
|
||||
endif()
|
||||
|
||||
list(FIND enabled_languages "Fortran" result)
|
||||
if(NOT result EQUAL -1)
|
||||
list(APPEND languages "Fortran")
|
||||
@ -222,7 +230,7 @@ function(check_ipo_supported)
|
||||
string(COMPARE EQUAL "${languages}" "" no_languages)
|
||||
if(no_languages)
|
||||
_ipo_not_supported(
|
||||
"no C/CXX/Fortran languages found in ENABLED_LANGUAGES global property"
|
||||
"no C/CXX/CUDA/Fortran languages found in ENABLED_LANGUAGES global property"
|
||||
)
|
||||
return()
|
||||
endif()
|
||||
@ -230,7 +238,7 @@ function(check_ipo_supported)
|
||||
set(languages "${X_LANGUAGES}")
|
||||
|
||||
set(unsupported_languages "${languages}")
|
||||
list(REMOVE_ITEM unsupported_languages "C" "CXX" "Fortran")
|
||||
list(REMOVE_ITEM unsupported_languages "C" "CXX" "CUDA" "Fortran")
|
||||
string(COMPARE NOTEQUAL "${unsupported_languages}" "" has_unsupported)
|
||||
if(has_unsupported)
|
||||
_ipo_not_supported(
|
||||
|
@ -35,6 +35,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static")
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart")
|
||||
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "")
|
||||
|
||||
# Clang doesn't support CUDA device LTO
|
||||
set(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE NO)
|
||||
set(_CMAKE_CUDA_IPO_MAY_BE_SUPPORTED_BY_COMPILER NO)
|
||||
|
||||
if(UNIX)
|
||||
list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
|
||||
endif()
|
||||
|
@ -48,6 +48,13 @@ if((NOT DEFINED CMAKE_DEPENDS_USE_COMPILER OR CMAKE_DEPENDS_USE_COMPILER)
|
||||
set(CMAKE_CUDA_DEPENDS_USE_COMPILER TRUE)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2)
|
||||
set(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE YES)
|
||||
set(_CMAKE_CUDA_IPO_MAY_BE_SUPPORTED_BY_COMPILER YES)
|
||||
|
||||
set(CMAKE_CUDA_DEVICE_LINK_OPTIONS_IPO " -dlto")
|
||||
endif()
|
||||
|
||||
if(NOT "x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
|
||||
set(CMAKE_CUDA_COMPILE_OPTIONS_PIE -Xcompiler=-fPIE)
|
||||
set(CMAKE_CUDA_COMPILE_OPTIONS_PIC -Xcompiler=-fPIC)
|
||||
@ -61,6 +68,7 @@ if(NOT "x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL_INIT " -O1 -DNDEBUG")
|
||||
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT " -O2 -g -DNDEBUG")
|
||||
endif()
|
||||
|
||||
set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared)
|
||||
set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)
|
||||
|
||||
|
@ -917,11 +917,19 @@ bool cmGeneratorTarget::IsIPOEnabled(std::string const& lang,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lang != "C" && lang != "CXX" && lang != "Fortran") {
|
||||
if (lang != "C" && lang != "CXX" && lang != "CUDA" && lang != "Fortran") {
|
||||
// We do not define IPO behavior for other languages.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lang == "CUDA") {
|
||||
// CUDA IPO requires both CUDA_ARCHITECTURES and CUDA_SEPARABLE_COMPILATION
|
||||
if (cmIsOff(this->GetSafeProperty("CUDA_ARCHITECTURES")) ||
|
||||
cmIsOff(this->GetSafeProperty("CUDA_SEPARABLE_COMPILATION"))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
cmPolicies::PolicyStatus cmp0069 = this->GetPolicyStatusCMP0069();
|
||||
|
||||
if (cmp0069 == cmPolicies::OLD || cmp0069 == cmPolicies::WARN) {
|
||||
@ -3429,7 +3437,9 @@ void cmGeneratorTarget::AddExplicitLanguageFlags(std::string& flags,
|
||||
"EXPLICIT_LANGUAGE");
|
||||
}
|
||||
|
||||
void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
|
||||
void cmGeneratorTarget::AddCUDAArchitectureFlags(cmBuildStep compileOrLink,
|
||||
const std::string& config,
|
||||
std::string& flags) const
|
||||
{
|
||||
std::string property = this->GetSafeProperty("CUDA_ARCHITECTURES");
|
||||
|
||||
@ -3461,6 +3471,7 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
|
||||
|
||||
std::string const& compiler =
|
||||
this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
|
||||
const bool ipoEnabled = this->IsIPOEnabled("CUDA", config);
|
||||
|
||||
// Check for special modes: `all`, `all-major`.
|
||||
if (property == "all" || property == "all-major") {
|
||||
@ -3540,6 +3551,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
|
||||
}
|
||||
|
||||
if (compiler == "NVIDIA") {
|
||||
if (ipoEnabled && compileOrLink == cmBuildStep::Link) {
|
||||
if (cmValue cudaIPOFlags =
|
||||
this->Makefile->GetDefinition("CMAKE_CUDA_LINK_OPTIONS_IPO")) {
|
||||
flags += cudaIPOFlags;
|
||||
}
|
||||
}
|
||||
|
||||
for (CudaArchitecture& architecture : architectures) {
|
||||
flags +=
|
||||
" --generate-code=arch=compute_" + architecture.name + ",code=[";
|
||||
@ -3552,7 +3570,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
|
||||
}
|
||||
}
|
||||
|
||||
if (architecture.real) {
|
||||
if (ipoEnabled) {
|
||||
if (compileOrLink == cmBuildStep::Compile) {
|
||||
flags += "lto_" + architecture.name;
|
||||
} else if (compileOrLink == cmBuildStep::Link) {
|
||||
flags += "sm_" + architecture.name;
|
||||
}
|
||||
} else if (architecture.real) {
|
||||
flags += "sm_" + architecture.name;
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "cmStateTypes.h"
|
||||
#include "cmValue.h"
|
||||
|
||||
enum class cmBuildStep;
|
||||
class cmComputeLinkInformation;
|
||||
class cmCustomCommand;
|
||||
class cmGlobalGenerator;
|
||||
@ -471,7 +472,9 @@ public:
|
||||
void AddExplicitLanguageFlags(std::string& flags,
|
||||
cmSourceFile const& sf) const;
|
||||
|
||||
void AddCUDAArchitectureFlags(std::string& flags) const;
|
||||
void AddCUDAArchitectureFlags(cmBuildStep compileOrLink,
|
||||
const std::string& config,
|
||||
std::string& flags) const;
|
||||
void AddCUDAToolkitFlags(std::string& flags) const;
|
||||
|
||||
void AddHIPArchitectureFlags(std::string& flags) const;
|
||||
|
@ -183,8 +183,8 @@ void cmGhsMultiTargetGenerator::SetCompilerFlags(std::string const& config,
|
||||
auto i = this->FlagsByLanguage.find(language);
|
||||
if (i == this->FlagsByLanguage.end()) {
|
||||
std::string flags;
|
||||
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
|
||||
language, config);
|
||||
this->LocalGenerator->AddLanguageFlags(
|
||||
flags, this->GeneratorTarget, cmBuildStep::Compile, language, config);
|
||||
this->LocalGenerator->AddCMP0018Flags(flags, this->GeneratorTarget,
|
||||
language, config);
|
||||
this->LocalGenerator->AddVisibilityPresetFlags(
|
||||
|
@ -2368,8 +2368,8 @@ void cmGlobalXCodeGenerator::CreateBuildSettings(cmGeneratorTarget* gtgt,
|
||||
std::string& flags = cflags[lang];
|
||||
|
||||
// Add language-specific flags.
|
||||
this->CurrentLocalGenerator->AddLanguageFlags(flags, gtgt, lang,
|
||||
configName);
|
||||
this->CurrentLocalGenerator->AddLanguageFlags(
|
||||
flags, gtgt, cmBuildStep::Compile, lang, configName);
|
||||
|
||||
if (gtgt->IsIPOEnabled(lang, configName)) {
|
||||
this->CurrentLocalGenerator->AppendFeatureOptions(flags, lang, "IPO");
|
||||
|
@ -57,7 +57,6 @@ bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinking(
|
||||
// For this we only consider targets
|
||||
using ItemVector = cmComputeLinkInformation::ItemVector;
|
||||
ItemVector const& items = cli.GetItems();
|
||||
std::string config = cli.GetConfig();
|
||||
return std::any_of(
|
||||
items.begin(), items.end(),
|
||||
[](cmComputeLinkInformation::Item const& item) -> bool {
|
||||
@ -69,6 +68,26 @@ bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinking(
|
||||
});
|
||||
}
|
||||
|
||||
bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinkingIPOFlag(
|
||||
cmComputeLinkInformation& cli)
|
||||
{
|
||||
// Determine if this item might requires device linking.
|
||||
// For this we only consider targets
|
||||
using ItemVector = cmComputeLinkInformation::ItemVector;
|
||||
ItemVector const& items = cli.GetItems();
|
||||
std::string config = cli.GetConfig();
|
||||
return std::any_of(
|
||||
items.begin(), items.end(),
|
||||
[config](cmComputeLinkInformation::Item const& item) -> bool {
|
||||
return item.Target &&
|
||||
item.Target->GetType() == cmStateEnums::STATIC_LIBRARY &&
|
||||
// this dependency requires us to device link it
|
||||
!item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS") &&
|
||||
item.Target->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION") &&
|
||||
item.Target->IsIPOEnabled("CUDA", config);
|
||||
});
|
||||
}
|
||||
|
||||
void cmLinkLineDeviceComputer::ComputeLinkLibraries(
|
||||
cmComputeLinkInformation& cli, std::string const& stdLibString,
|
||||
std::vector<BT<std::string>>& linkLibraries)
|
||||
|
@ -30,6 +30,7 @@ public:
|
||||
delete;
|
||||
|
||||
bool ComputeRequiresDeviceLinking(cmComputeLinkInformation& cli);
|
||||
bool ComputeRequiresDeviceLinkingIPOFlag(cmComputeLinkInformation& cli);
|
||||
|
||||
void ComputeLinkLibraries(
|
||||
cmComputeLinkInformation& cli, std::string const& stdLibString,
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include "cmInstallScriptGenerator.h"
|
||||
#include "cmInstallTargetGenerator.h"
|
||||
#include "cmLinkLineComputer.h"
|
||||
#include "cmLinkLineDeviceComputer.h"
|
||||
#include "cmMakefile.h"
|
||||
#include "cmRange.h"
|
||||
#include "cmRulePlaceholderExpander.h"
|
||||
@ -1381,7 +1382,7 @@ std::vector<BT<std::string>> cmLocalGenerator::GetStaticLibraryFlags(
|
||||
}
|
||||
|
||||
void cmLocalGenerator::GetDeviceLinkFlags(
|
||||
cmLinkLineComputer& linkLineComputer, const std::string& config,
|
||||
cmLinkLineDeviceComputer& linkLineComputer, const std::string& config,
|
||||
std::string& linkLibs, std::string& linkFlags, std::string& frameworkPath,
|
||||
std::string& linkPath, cmGeneratorTarget* target)
|
||||
{
|
||||
@ -1389,6 +1390,18 @@ void cmLocalGenerator::GetDeviceLinkFlags(
|
||||
|
||||
cmComputeLinkInformation* pcli = target->GetLinkInformation(config);
|
||||
|
||||
auto linklang = linkLineComputer.GetLinkerLanguage(target, config);
|
||||
auto ipoEnabled = target->IsIPOEnabled(linklang, config);
|
||||
if (!ipoEnabled) {
|
||||
ipoEnabled = linkLineComputer.ComputeRequiresDeviceLinkingIPOFlag(*pcli);
|
||||
}
|
||||
if (ipoEnabled) {
|
||||
if (cmValue cudaIPOFlags = this->Makefile->GetDefinition(
|
||||
"CMAKE_CUDA_DEVICE_LINK_OPTIONS_IPO")) {
|
||||
linkFlags += cudaIPOFlags;
|
||||
}
|
||||
}
|
||||
|
||||
if (pcli) {
|
||||
// Compute the required device link libraries when
|
||||
// resolving gpu lang device symbols
|
||||
@ -1396,6 +1409,8 @@ void cmLocalGenerator::GetDeviceLinkFlags(
|
||||
linkPath);
|
||||
}
|
||||
|
||||
// iterate link deps and see if any of them need IPO
|
||||
|
||||
std::vector<std::string> linkOpts;
|
||||
target->GetLinkOptions(linkOpts, config, "CUDA");
|
||||
// LINK_OPTIONS are escaped.
|
||||
@ -1590,7 +1605,8 @@ std::vector<BT<std::string>> cmLocalGenerator::GetTargetCompileFlags(
|
||||
cmMakefile* mf = this->GetMakefile();
|
||||
|
||||
// Add language-specific flags.
|
||||
this->AddLanguageFlags(compileFlags, target, lang, config);
|
||||
this->AddLanguageFlags(compileFlags, target, cmBuildStep::Compile, lang,
|
||||
config);
|
||||
|
||||
if (target->IsIPOEnabled(lang, config)) {
|
||||
this->AppendFeatureOptions(compileFlags, lang, "IPO");
|
||||
@ -1903,6 +1919,7 @@ void cmLocalGenerator::AddArchitectureFlags(std::string& flags,
|
||||
|
||||
void cmLocalGenerator::AddLanguageFlags(std::string& flags,
|
||||
cmGeneratorTarget const* target,
|
||||
cmBuildStep compileOrLink,
|
||||
const std::string& lang,
|
||||
const std::string& config)
|
||||
{
|
||||
@ -1926,7 +1943,7 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags,
|
||||
}
|
||||
}
|
||||
} else if (lang == "CUDA") {
|
||||
target->AddCUDAArchitectureFlags(flags);
|
||||
target->AddCUDAArchitectureFlags(compileOrLink, config, flags);
|
||||
target->AddCUDAToolkitFlags(flags);
|
||||
} else if (lang == "ISPC") {
|
||||
target->AddISPCTargetFlags(flags);
|
||||
@ -2038,7 +2055,7 @@ void cmLocalGenerator::AddLanguageFlagsForLinking(
|
||||
this->AddCompilerRequirementFlag(flags, target, lang, config);
|
||||
}
|
||||
|
||||
this->AddLanguageFlags(flags, target, lang, config);
|
||||
this->AddLanguageFlags(flags, target, cmBuildStep::Link, lang, config);
|
||||
|
||||
if (target->IsIPOEnabled(lang, config)) {
|
||||
this->AppendFeatureOptions(flags, lang, "IPO");
|
||||
|
@ -35,6 +35,7 @@ class cmGeneratorTarget;
|
||||
class cmGlobalGenerator;
|
||||
class cmImplicitDependsList;
|
||||
class cmLinkLineComputer;
|
||||
class cmLinkLineDeviceComputer;
|
||||
class cmMakefile;
|
||||
class cmRulePlaceholderExpander;
|
||||
class cmSourceFile;
|
||||
@ -59,6 +60,13 @@ enum class cmDependencyScannerKind
|
||||
Compiler
|
||||
};
|
||||
|
||||
/** What to compute language flags for */
|
||||
enum class cmBuildStep
|
||||
{
|
||||
Compile,
|
||||
Link
|
||||
};
|
||||
|
||||
/** Target and source file which have a specific output. */
|
||||
struct cmSourcesWithOutput
|
||||
{
|
||||
@ -143,7 +151,8 @@ public:
|
||||
const std::string& filterArch = std::string());
|
||||
|
||||
void AddLanguageFlags(std::string& flags, cmGeneratorTarget const* target,
|
||||
const std::string& lang, const std::string& config);
|
||||
cmBuildStep compileOrLink, const std::string& lang,
|
||||
const std::string& config);
|
||||
void AddLanguageFlagsForLinking(std::string& flags,
|
||||
cmGeneratorTarget const* target,
|
||||
const std::string& lang,
|
||||
@ -476,7 +485,7 @@ public:
|
||||
|
||||
/** Fill out these strings for the given target. Libraries to link,
|
||||
* flags, and linkflags. */
|
||||
void GetDeviceLinkFlags(cmLinkLineComputer& linkLineComputer,
|
||||
void GetDeviceLinkFlags(cmLinkLineDeviceComputer& linkLineComputer,
|
||||
const std::string& config, std::string& linkLibs,
|
||||
std::string& linkFlags, std::string& frameworkPath,
|
||||
std::string& linkPath, cmGeneratorTarget* target);
|
||||
|
@ -680,7 +680,8 @@ void cmLocalVisualStudio7Generator::WriteConfiguration(
|
||||
langForClCompile = linkLanguage;
|
||||
if (langForClCompile == "C" || langForClCompile == "CXX" ||
|
||||
langForClCompile == "Fortran") {
|
||||
this->AddLanguageFlags(flags, target, langForClCompile, configName);
|
||||
this->AddLanguageFlags(flags, target, cmBuildStep::Compile,
|
||||
langForClCompile, configName);
|
||||
}
|
||||
// set the correct language
|
||||
if (linkLanguage == "C") {
|
||||
|
@ -136,17 +136,11 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
|
||||
std::vector<std::string> depends;
|
||||
this->AppendLinkDepends(depends, linkLanguage);
|
||||
|
||||
// Build a list of compiler flags and linker flags.
|
||||
std::string langFlags;
|
||||
std::string linkFlags;
|
||||
|
||||
// Add language feature flags.
|
||||
std::string langFlags;
|
||||
this->LocalGenerator->AddLanguageFlagsForLinking(
|
||||
langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
|
||||
|
||||
// Add device-specific linker flags.
|
||||
this->GetDeviceLinkFlags(linkFlags, linkLanguage);
|
||||
|
||||
// Construct a list of files associated with this executable that
|
||||
// may need to be cleaned.
|
||||
std::vector<std::string> exeCleanFiles;
|
||||
@ -173,13 +167,20 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
|
||||
// Set path conversion for link script shells.
|
||||
this->LocalGenerator->SetLinkScriptShell(useLinkScript);
|
||||
|
||||
std::unique_ptr<cmLinkLineComputer> linkLineComputer(
|
||||
std::unique_ptr<cmLinkLineDeviceComputer> linkLineComputer(
|
||||
new cmLinkLineDeviceComputer(
|
||||
this->LocalGenerator,
|
||||
this->LocalGenerator->GetStateSnapshot().GetDirectory()));
|
||||
linkLineComputer->SetForResponse(useResponseFileForLibs);
|
||||
linkLineComputer->SetRelink(relink);
|
||||
|
||||
// Create set of linking flags.
|
||||
std::string linkFlags;
|
||||
std::string ignored_;
|
||||
this->LocalGenerator->GetDeviceLinkFlags(
|
||||
*linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_,
|
||||
ignored_, this->GeneratorTarget);
|
||||
|
||||
// Collect up flags to link in needed libraries.
|
||||
std::string linkLibs;
|
||||
this->CreateLinkLibs(
|
||||
|
@ -287,10 +287,6 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
|
||||
this->LocalGenerator->AddLanguageFlagsForLinking(
|
||||
langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
|
||||
|
||||
// Create set of linking flags.
|
||||
std::string linkFlags;
|
||||
this->GetDeviceLinkFlags(linkFlags, linkLanguage);
|
||||
|
||||
// Clean files associated with this library.
|
||||
std::set<std::string> libCleanFiles;
|
||||
libCleanFiles.insert(
|
||||
@ -315,13 +311,20 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
|
||||
|
||||
// Collect up flags to link in needed libraries.
|
||||
std::string linkLibs;
|
||||
std::unique_ptr<cmLinkLineComputer> linkLineComputer(
|
||||
std::unique_ptr<cmLinkLineDeviceComputer> linkLineComputer(
|
||||
new cmLinkLineDeviceComputer(
|
||||
this->LocalGenerator,
|
||||
this->LocalGenerator->GetStateSnapshot().GetDirectory()));
|
||||
linkLineComputer->SetForResponse(useResponseFileForLibs);
|
||||
linkLineComputer->SetRelink(relink);
|
||||
|
||||
// Create set of linking flags.
|
||||
std::string linkFlags;
|
||||
std::string ignored_;
|
||||
this->LocalGenerator->GetDeviceLinkFlags(
|
||||
*linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_,
|
||||
ignored_, this->GeneratorTarget);
|
||||
|
||||
this->CreateLinkLibs(
|
||||
linkLineComputer.get(), linkLibs, useResponseFileForLibs, depends,
|
||||
cmMakefileTargetGenerator::ResponseFlagFor::DeviceLink);
|
||||
|
@ -537,7 +537,6 @@ std::vector<std::string> cmNinjaNormalTargetGenerator::ComputeDeviceLinkCmd()
|
||||
// this target requires separable cuda compilation
|
||||
// now build the correct command depending on if the target is
|
||||
// an executable or a dynamic library.
|
||||
std::string linkCmd;
|
||||
switch (this->GetGeneratorTarget()->GetType()) {
|
||||
case cmStateEnums::STATIC_LIBRARY:
|
||||
case cmStateEnums::SHARED_LIBRARY:
|
||||
|
@ -3137,6 +3137,7 @@ bool cmVisualStudio10TargetGenerator::ComputeClOptions(
|
||||
this->LangForClCompile = langForClCompile;
|
||||
if (!langForClCompile.empty()) {
|
||||
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
|
||||
cmBuildStep::Compile,
|
||||
langForClCompile, configName);
|
||||
this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget,
|
||||
langForClCompile, configName);
|
||||
@ -3521,8 +3522,8 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
|
||||
|
||||
// Get compile flags for CUDA in this directory.
|
||||
std::string flags;
|
||||
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, "CUDA",
|
||||
configName);
|
||||
this->LocalGenerator->AddLanguageFlags(
|
||||
flags, this->GeneratorTarget, cmBuildStep::Compile, "CUDA", configName);
|
||||
this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget, "CUDA",
|
||||
configName);
|
||||
|
||||
@ -3793,7 +3794,8 @@ bool cmVisualStudio10TargetGenerator::ComputeMasmOptions(
|
||||
|
||||
std::string flags;
|
||||
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
|
||||
"ASM_MASM", configName);
|
||||
cmBuildStep::Compile, "ASM_MASM",
|
||||
configName);
|
||||
|
||||
masmOptions.Parse(flags);
|
||||
|
||||
@ -3845,7 +3847,8 @@ bool cmVisualStudio10TargetGenerator::ComputeNasmOptions(
|
||||
|
||||
std::string flags;
|
||||
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
|
||||
"ASM_NASM", configName);
|
||||
cmBuildStep::Compile, "ASM_NASM",
|
||||
configName);
|
||||
flags += " -f";
|
||||
flags += this->Makefile->GetSafeDefinition("CMAKE_ASM_NASM_OBJECT_FORMAT");
|
||||
nasmOptions.Parse(flags);
|
||||
|
@ -618,6 +618,11 @@ if(BUILD_TESTING)
|
||||
set(Module.CheckIPOSupported-CXX_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_CXX=${CMake_TEST_IPO_WORKS_CXX})
|
||||
ADD_TEST_MACRO(Module.CheckIPOSupported-CXX CheckIPOSupported-CXX)
|
||||
|
||||
if(CMake_TEST_CUDA)
|
||||
ADD_TEST_MACRO(Module.CheckIPOSupported-CUDA CheckIPOSupported-CUDA)
|
||||
set_property(TEST Module.CheckIPOSupported-CUDA APPEND PROPERTY LABELS "CUDA")
|
||||
endif()
|
||||
|
||||
if(CMAKE_Fortran_COMPILER)
|
||||
set(Module.CheckIPOSupported-Fortran_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_Fortran=${CMake_TEST_IPO_WORKS_Fortran})
|
||||
ADD_TEST_MACRO(Module.CheckIPOSupported-Fortran CheckIPOSupported-Fortran)
|
||||
|
@ -7,7 +7,6 @@ endmacro ()
|
||||
add_cuda_test_macro(CudaOnly.Architecture Architecture)
|
||||
add_cuda_test_macro(CudaOnly.ArchSpecial CudaOnlyArchSpecial)
|
||||
add_cuda_test_macro(CudaOnly.CompileFlags CudaOnlyCompileFlags)
|
||||
|
||||
add_cuda_test_macro(CudaOnly.EnableStandard CudaOnlyEnableStandard)
|
||||
add_cuda_test_macro(CudaOnly.ExportPTX CudaOnlyExportPTX)
|
||||
add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit)
|
||||
@ -28,6 +27,19 @@ if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
|
||||
add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
|
||||
endif()
|
||||
|
||||
# The CUDA only ships the shared version of the toolkit libraries
|
||||
# on windows
|
||||
if(NOT WIN32)
|
||||
add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit)
|
||||
endif()
|
||||
|
||||
add_cuda_test_macro(CudaOnly.DeviceLTO CudaOnlyDeviceLTO)
|
||||
|
||||
if(MSVC)
|
||||
# Tests for features that only work with MSVC
|
||||
add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB)
|
||||
endif()
|
||||
|
||||
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
|
||||
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
|
||||
--build-and-test
|
||||
@ -41,16 +53,6 @@ add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
|
||||
set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND
|
||||
PROPERTY LABELS "CUDA")
|
||||
|
||||
# The CUDA only ships the shared version of the toolkit libraries
|
||||
# on windows
|
||||
if(NOT WIN32)
|
||||
add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB)
|
||||
endif()
|
||||
|
||||
add_test(NAME CudaOnly.RuntimeControls COMMAND
|
||||
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
|
||||
--build-and-test
|
||||
|
37
Tests/CudaOnly/DeviceLTO/CMakeLists.txt
Normal file
37
Tests/CudaOnly/DeviceLTO/CMakeLists.txt
Normal file
@ -0,0 +1,37 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(DeviceLTO CUDA)
|
||||
|
||||
# Goal:
|
||||
# Verify that we correctly compile with device LTO
|
||||
# Verify that device LTO requirements are propagated to
|
||||
# the final device link line
|
||||
|
||||
add_library(CUDA_dlto STATIC file1.cu file2.cu file3.cu)
|
||||
add_executable(CudaOnlyDeviceLTO main.cu)
|
||||
|
||||
set_target_properties(CUDA_dlto
|
||||
PROPERTIES
|
||||
CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
|
||||
CUDA_SEPARABLE_COMPILATION ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set_target_properties(CudaOnlyDeviceLTO
|
||||
PROPERTIES
|
||||
CUDA_SEPARABLE_COMPILATION ON
|
||||
CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
|
||||
)
|
||||
|
||||
target_link_libraries(CudaOnlyDeviceLTO PRIVATE CUDA_dlto)
|
||||
|
||||
include(CheckIPOSupported)
|
||||
check_ipo_supported(LANGUAGES CUDA RESULT ipo_supported)
|
||||
if(ipo_supported)
|
||||
set_target_properties(CUDA_dlto
|
||||
PROPERTIES
|
||||
INTERPROCEDURAL_OPTIMIZATION ON)
|
||||
|
||||
# When non-LTO variants (i.e. virtual) are built together with LTO ones the
|
||||
# linker warns about missing device LTO for the virtual architectures.
|
||||
# Ignore these warnings.
|
||||
target_link_options(CudaOnlyDeviceLTO PRIVATE "$<DEVICE_LINK:-w>")
|
||||
endif()
|
17
Tests/CudaOnly/DeviceLTO/file1.cu
Normal file
17
Tests/CudaOnly/DeviceLTO/file1.cu
Normal file
@ -0,0 +1,17 @@
|
||||
#ifdef _WIN32
|
||||
# define EXPORT __declspec(dllexport)
|
||||
#else
|
||||
# define EXPORT
|
||||
#endif
|
||||
|
||||
extern __device__ int file2_func(int);
|
||||
void __global__ kernel(int x)
|
||||
{
|
||||
file2_func(x);
|
||||
}
|
||||
|
||||
EXPORT int launch_kernel(int x)
|
||||
{
|
||||
kernel<<<1, 1>>>(x);
|
||||
return x;
|
||||
}
|
5
Tests/CudaOnly/DeviceLTO/file2.cu
Normal file
5
Tests/CudaOnly/DeviceLTO/file2.cu
Normal file
@ -0,0 +1,5 @@
|
||||
extern __device__ int file3_func(int);
|
||||
int __device__ file2_func(int x)
|
||||
{
|
||||
return x + file3_func(x);
|
||||
}
|
4
Tests/CudaOnly/DeviceLTO/file3.cu
Normal file
4
Tests/CudaOnly/DeviceLTO/file3.cu
Normal file
@ -0,0 +1,4 @@
|
||||
int __device__ file3_func(int x)
|
||||
{
|
||||
return x * x * x;
|
||||
}
|
62
Tests/CudaOnly/DeviceLTO/main.cu
Normal file
62
Tests/CudaOnly/DeviceLTO/main.cu
Normal file
@ -0,0 +1,62 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "cuda.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
# define IMPORT __declspec(dllimport)
|
||||
#else
|
||||
# define IMPORT
|
||||
#endif
|
||||
|
||||
IMPORT int launch_kernel(int x);
|
||||
|
||||
int choose_cuda_device()
|
||||
{
|
||||
int nDevices = 0;
|
||||
cudaError_t err = cudaGetDeviceCount(&nDevices);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
for (int i = 0; i < nDevices; ++i) {
|
||||
cudaDeviceProp prop;
|
||||
cudaError_t err = cudaGetDeviceProperties(&prop, i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Could not retrieve properties from CUDA device " << i
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::cout << "prop.major: " << prop.major << std::endl;
|
||||
err = cudaSetDevice(i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cout << "Could not select CUDA device " << i << std::endl;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Could not find a CUDA enabled card" << std::endl;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int ret = choose_cuda_device();
|
||||
if (ret) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
cudaError_t err;
|
||||
launch_kernel(1);
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "launch_kernel: kernel launch should have passed.\n "
|
||||
"Error message: "
|
||||
<< cudaGetErrorString(err) << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
32
Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt
Normal file
32
Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt
Normal file
@ -0,0 +1,32 @@
|
||||
cmake_minimum_required(VERSION 3.8)
|
||||
project(CheckIPOSupported-CUDA LANGUAGES CUDA)
|
||||
|
||||
cmake_policy(SET CMP0069 NEW)
|
||||
|
||||
include(CheckIPOSupported)
|
||||
check_ipo_supported(RESULT ipo_supported OUTPUT ipo_output)
|
||||
if(ipo_supported)
|
||||
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
|
||||
endif()
|
||||
|
||||
if(NOT ipo_supported AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA"
|
||||
AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2)
|
||||
message(FATAL_ERROR "CheckIPOSupported failed to correctly identify NVIDIA CUDA IPO support")
|
||||
endif()
|
||||
|
||||
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
add_library(foo STATIC foo.cu)
|
||||
set_target_properties(foo PROPERTIES
|
||||
WINDOWS_EXPORT_ALL_SYMBOLS ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
add_library(bar SHARED bar.cu)
|
||||
set_target_properties(bar PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
target_link_libraries(bar PRIVATE foo)
|
||||
|
||||
add_executable(CheckIPOSupported-CUDA main.cu)
|
||||
target_link_libraries(CheckIPOSupported-CUDA PUBLIC bar)
|
||||
|
||||
enable_testing()
|
||||
add_test(NAME CheckIPOSupported-CUDA COMMAND CheckIPOSupported-CUDA)
|
12
Tests/Module/CheckIPOSupported-CUDA/bar.cu
Normal file
12
Tests/Module/CheckIPOSupported-CUDA/bar.cu
Normal file
@ -0,0 +1,12 @@
|
||||
__device__ int foo_func(int);
|
||||
|
||||
void __global__ bar_kernel(int x)
|
||||
{
|
||||
foo_func(x);
|
||||
}
|
||||
|
||||
int launch_kernel(int x)
|
||||
{
|
||||
bar_kernel<<<1, 1>>>(x);
|
||||
return x;
|
||||
}
|
4
Tests/Module/CheckIPOSupported-CUDA/foo.cu
Normal file
4
Tests/Module/CheckIPOSupported-CUDA/foo.cu
Normal file
@ -0,0 +1,4 @@
|
||||
extern __device__ int foo_func(int a)
|
||||
{
|
||||
return a * 42 + 9;
|
||||
}
|
62
Tests/Module/CheckIPOSupported-CUDA/main.cu
Normal file
62
Tests/Module/CheckIPOSupported-CUDA/main.cu
Normal file
@ -0,0 +1,62 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "cuda.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
# define IMPORT __declspec(dllimport)
|
||||
#else
|
||||
# define IMPORT
|
||||
#endif
|
||||
|
||||
IMPORT int launch_kernel(int x);
|
||||
|
||||
int choose_cuda_device()
|
||||
{
|
||||
int nDevices = 0;
|
||||
cudaError_t err = cudaGetDeviceCount(&nDevices);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
for (int i = 0; i < nDevices; ++i) {
|
||||
cudaDeviceProp prop;
|
||||
cudaError_t err = cudaGetDeviceProperties(&prop, i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Could not retrieve properties from CUDA device " << i
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::cout << "prop.major: " << prop.major << std::endl;
|
||||
err = cudaSetDevice(i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cout << "Could not select CUDA device " << i << std::endl;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Could not find a CUDA enabled card" << std::endl;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int ret = choose_cuda_device();
|
||||
if (ret) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
cudaError_t err;
|
||||
launch_kernel(1);
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "launch_kernel: kernel launch should have passed.\n "
|
||||
"Error message: "
|
||||
<< cudaGetErrorString(err) << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
^CMake Error at .*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(message\):
|
||||
IPO is not supported \(no C/CXX/Fortran languages found in ENABLED_LANGUAGES
|
||||
global property\)\.
|
||||
IPO is not supported \(no C/CXX/CUDA/Fortran languages found in
|
||||
ENABLED_LANGUAGES global property\)\.
|
||||
Call Stack \(most recent call first\):
|
||||
.*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(_ipo_not_supported\)
|
||||
default-lang-none\.cmake:[0-9]+ \(check_ipo_supported\)
|
||||
|
Loading…
Reference in New Issue
Block a user