CUDA: static lib device linking computes required static libs
Previously the CMake didn't compute the required set of libraries needed to properly device link a static library when CUDA_RESOLVE_DEVICE_SYMBOLS was enabled.
This commit is contained in:
parent
09032f09f8
commit
2d7bb13da7
@ -82,6 +82,9 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
|
||||
ItemVector const& items = cli.GetItems();
|
||||
std::string config = cli.GetConfig();
|
||||
bool skipItemAfterFramework = false;
|
||||
// Note:
|
||||
// Any modification of this algorithm should be reflected also in
|
||||
// cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions
|
||||
for (auto const& item : items) {
|
||||
if (skipItemAfterFramework) {
|
||||
skipItemAfterFramework = false;
|
||||
@ -91,6 +94,7 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
|
||||
if (item.Target) {
|
||||
bool skip = false;
|
||||
switch (item.Target->GetType()) {
|
||||
case cmStateEnums::SHARED_LIBRARY:
|
||||
case cmStateEnums::MODULE_LIBRARY:
|
||||
case cmStateEnums::INTERFACE_LIBRARY:
|
||||
skip = true;
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "cmInstallScriptGenerator.h"
|
||||
#include "cmInstallTargetGenerator.h"
|
||||
#include "cmLinkLineComputer.h"
|
||||
#include "cmLinkLineDeviceComputer.h"
|
||||
#include "cmMakefile.h"
|
||||
#include "cmRulePlaceholderExpander.h"
|
||||
#include "cmSourceFile.h"
|
||||
@ -1152,6 +1153,12 @@ void cmLocalGenerator::GetTargetFlags(
|
||||
switch (target->GetType()) {
|
||||
case cmStateEnums::STATIC_LIBRARY:
|
||||
this->GetStaticLibraryFlags(linkFlags, buildType, linkLanguage, target);
|
||||
if (pcli && dynamic_cast<cmLinkLineDeviceComputer*>(linkLineComputer)) {
|
||||
// Compute the required cuda device link libraries when
|
||||
// resolving cuda device symbols
|
||||
this->OutputLinkLibraries(pcli, linkLineComputer, linkLibs,
|
||||
frameworkPath, linkPath);
|
||||
}
|
||||
break;
|
||||
case cmStateEnums::MODULE_LIBRARY:
|
||||
libraryLinkVariable = "CMAKE_MODULE_LINKER_FLAGS";
|
||||
|
@ -300,19 +300,16 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
|
||||
|
||||
// Collect up flags to link in needed libraries.
|
||||
std::string linkLibs;
|
||||
if (this->GeneratorTarget->GetType() != cmStateEnums::STATIC_LIBRARY) {
|
||||
std::unique_ptr<cmLinkLineComputer> linkLineComputer(
|
||||
new cmLinkLineDeviceComputer(
|
||||
this->LocalGenerator,
|
||||
this->LocalGenerator->GetStateSnapshot().GetDirectory()));
|
||||
linkLineComputer->SetForResponse(useResponseFileForLibs);
|
||||
linkLineComputer->SetUseWatcomQuote(useWatcomQuote);
|
||||
linkLineComputer->SetRelink(relink);
|
||||
|
||||
std::unique_ptr<cmLinkLineComputer> linkLineComputer(
|
||||
new cmLinkLineDeviceComputer(
|
||||
this->LocalGenerator,
|
||||
this->LocalGenerator->GetStateSnapshot().GetDirectory()));
|
||||
linkLineComputer->SetForResponse(useResponseFileForLibs);
|
||||
linkLineComputer->SetUseWatcomQuote(useWatcomQuote);
|
||||
linkLineComputer->SetRelink(relink);
|
||||
|
||||
this->CreateLinkLibs(linkLineComputer.get(), linkLibs,
|
||||
useResponseFileForLibs, depends);
|
||||
}
|
||||
this->CreateLinkLibs(linkLineComputer.get(), linkLibs,
|
||||
useResponseFileForLibs, depends);
|
||||
|
||||
// Construct object file lists that may be needed to expand the
|
||||
// rule.
|
||||
|
@ -3101,6 +3101,82 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
|
||||
"-Wno-deprecated-gpu-targets");
|
||||
}
|
||||
|
||||
// For static libraries that have device linking enabled compute
|
||||
// the libraries
|
||||
if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY &&
|
||||
doDeviceLinking) {
|
||||
cmComputeLinkInformation* pcli =
|
||||
this->GeneratorTarget->GetLinkInformation(configName);
|
||||
if (!pcli) {
|
||||
cmSystemTools::Error(
|
||||
"CMake can not compute cmComputeLinkInformation for target: " +
|
||||
this->Name);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Would like to use:
|
||||
// cmLinkLineDeviceComputer computer(this->LocalGenerator,
|
||||
// this->LocalGenerator->GetStateSnapshot().GetDirectory());
|
||||
// std::string computed_libs = computer.ComputeLinkLibraries(cli,
|
||||
// std::string{}); but it outputs in "<libA> <libB>" format instead of
|
||||
// "<libA>;<libB>"
|
||||
// Note:
|
||||
// Any modification of this algorithm should be reflected also in
|
||||
// cmLinkLineDeviceComputer
|
||||
cmComputeLinkInformation& cli = *pcli;
|
||||
std::vector<std::string> libVec;
|
||||
const std::string currentBinDir =
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory();
|
||||
const auto& libs = cli.GetItems();
|
||||
for (cmComputeLinkInformation::Item const& l : libs) {
|
||||
|
||||
if (l.Target) {
|
||||
auto managedType = l.Target->GetManagedType(configName);
|
||||
// Do not allow C# targets to be added to the LIB listing. LIB files
|
||||
// are used for linking C++ dependencies. C# libraries do not have lib
|
||||
// files. Instead, they compile down to C# reference libraries (DLL
|
||||
// files). The
|
||||
// `<ProjectReference>` elements added to the vcxproj are enough for
|
||||
// the IDE to deduce the DLL file required by other C# projects that
|
||||
// need its reference library.
|
||||
if (managedType == cmGeneratorTarget::ManagedType::Managed) {
|
||||
continue;
|
||||
}
|
||||
const auto type = l.Target->GetType();
|
||||
|
||||
bool skip = false;
|
||||
switch (type) {
|
||||
case cmStateEnums::SHARED_LIBRARY:
|
||||
case cmStateEnums::MODULE_LIBRARY:
|
||||
case cmStateEnums::INTERFACE_LIBRARY:
|
||||
skip = true;
|
||||
break;
|
||||
case cmStateEnums::STATIC_LIBRARY:
|
||||
skip = l.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (skip) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (l.IsPath) {
|
||||
std::string path = this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
currentBinDir, l.Value);
|
||||
ConvertToWindowsSlash(path);
|
||||
if (!cmVS10IsTargetsFile(l.Value)) {
|
||||
libVec.push_back(path);
|
||||
}
|
||||
} else {
|
||||
libVec.push_back(l.Value);
|
||||
}
|
||||
}
|
||||
|
||||
cudaLinkOptions.AddFlag("AdditionalDependencies", libVec);
|
||||
}
|
||||
|
||||
this->CudaLinkOptions[configName] = std::move(pOptions);
|
||||
return true;
|
||||
}
|
||||
|
@ -16,21 +16,29 @@ else()
|
||||
endif()
|
||||
|
||||
#Goal for this example:
|
||||
# Build a static library that defines multiple methods and kernels that
|
||||
# use each other.
|
||||
# Resolve the device symbols into that static library
|
||||
# Verify that we can't use those device symbols from anything that links
|
||||
# 1. Build two static libraries that defines multiple methods and kernels
|
||||
# 2. Resolve the device symbols into the second static library, therefore
|
||||
# confirming that the first static library is on the device link line
|
||||
# 3. Verify that we can't use those device symbols from anything that links
|
||||
# to the static library
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[sm_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CUDA_STANDARD 11)
|
||||
|
||||
add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu)
|
||||
add_library(CUDAResolveDeviceDepsA STATIC file1.cu)
|
||||
add_library(CUDAResolveDeviceDepsB STATIC file2.cu)
|
||||
set_target_properties(CUDAResolveDeviceDepsA CUDAResolveDeviceDepsB
|
||||
PROPERTIES
|
||||
CUDA_SEPARABLE_COMPILATION ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
add_library(CUDAResolveDeviceLib STATIC file2_launch.cu)
|
||||
set_target_properties(CUDAResolveDeviceLib
|
||||
PROPERTIES
|
||||
CUDA_SEPARABLE_COMPILATION ON
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
target_link_libraries(CUDAResolveDeviceLib PRIVATE CUDAResolveDeviceDepsA CUDAResolveDeviceDepsB)
|
||||
|
||||
if(dump_command)
|
||||
add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD
|
||||
@ -45,7 +53,8 @@ endif()
|
||||
add_executable(CudaOnlyResolveDeviceSymbols main.cu)
|
||||
set_target_properties(CudaOnlyResolveDeviceSymbols
|
||||
PROPERTIES
|
||||
CUDA_SEPARABLE_COMPILATION ON)
|
||||
CUDA_SEPARABLE_COMPILATION OFF
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS OFF)
|
||||
|
||||
target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib)
|
||||
|
||||
|
@ -1,7 +1,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
struct result_type
|
||||
{
|
||||
int input;
|
||||
int sum;
|
||||
};
|
||||
|
||||
result_type __device__ file1_func(int x);
|
||||
|
@ -1,25 +1,9 @@
|
||||
|
||||
#include "file2.h"
|
||||
|
||||
result_type __device__ file1_func(int x);
|
||||
|
||||
result_type_dynamic __device__ file2_func(int x)
|
||||
{
|
||||
const result_type r = file1_func(x);
|
||||
const result_type_dynamic rd{ r.input, r.sum, true };
|
||||
return rd;
|
||||
}
|
||||
|
||||
static __global__ void file2_kernel(result_type_dynamic& r, int x)
|
||||
{
|
||||
// call static_func which is a method that is defined in the
|
||||
// static library that is always out of date
|
||||
r = file2_func(x);
|
||||
}
|
||||
|
||||
int file2_launch_kernel(int x)
|
||||
{
|
||||
result_type_dynamic r;
|
||||
file2_kernel<<<1, 1>>>(r, x);
|
||||
return r.sum;
|
||||
}
|
||||
|
@ -8,3 +8,5 @@ struct result_type_dynamic
|
||||
int sum;
|
||||
bool from_static;
|
||||
};
|
||||
|
||||
result_type_dynamic __device__ file2_func(int x);
|
||||
|
18
Tests/CudaOnly/ResolveDeviceSymbols/file2_launch.cu
Normal file
18
Tests/CudaOnly/ResolveDeviceSymbols/file2_launch.cu
Normal file
@ -0,0 +1,18 @@
|
||||
|
||||
#include "file2.h"
|
||||
|
||||
static __global__ void file2_kernel(result_type_dynamic& r, int x)
|
||||
{
|
||||
// call static_func which is a method that is defined in the
|
||||
// static library that is always out of date
|
||||
r = file2_func(x);
|
||||
}
|
||||
|
||||
static __global__ void file2_kernel(result_type_dynamic& r, int x);
|
||||
|
||||
int file2_launch_kernel(int x)
|
||||
{
|
||||
result_type_dynamic r;
|
||||
file2_kernel<<<1, 1>>>(r, x);
|
||||
return r.sum;
|
||||
}
|
@ -1,26 +1,10 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "file1.h"
|
||||
#include "file2.h"
|
||||
|
||||
int file2_launch_kernel(int x);
|
||||
|
||||
result_type_dynamic __device__ file2_func(int x);
|
||||
static __global__ void main_kernel(result_type_dynamic& r, int x)
|
||||
{
|
||||
// call function that was not device linked to us, this will cause
|
||||
// a runtime failure of "invalid device function"
|
||||
r = file2_func(x);
|
||||
}
|
||||
|
||||
int main_launch_kernel(int x)
|
||||
{
|
||||
result_type_dynamic r;
|
||||
main_kernel<<<1, 1>>>(r, x);
|
||||
return r.sum;
|
||||
}
|
||||
|
||||
int choose_cuda_device()
|
||||
{
|
||||
int nDevices = 0;
|
||||
@ -62,12 +46,10 @@ int main(int argc, char** argv)
|
||||
return 0;
|
||||
}
|
||||
|
||||
main_launch_kernel(1);
|
||||
file2_launch_kernel(1);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (err == cudaSuccess) {
|
||||
// This kernel launch should fail as the file2_func was device linked
|
||||
// into the static library and is not usable by the executable
|
||||
std::cerr << "main_launch_kernel: kernel launch should have failed"
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "file2_launch_kernel: kernel launch should have passed"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user