CUDA/Clang: Fix separable compilation in non-root directories with Makefiles

Seems the relative paths were wrong basically all around such that only
compiling files in the top-level directory would work. I've modified
CudaOnly.SeparateCompilation to cover this.

Fixes #22482.
This commit is contained in:
root 2021-07-27 22:58:03 +03:00
parent f7cf69e34a
commit 0b1cea66cd
6 changed files with 42 additions and 31 deletions

View File

@ -304,3 +304,9 @@ Changes made since CMake 3.21.0 include the following.
* The :generator:`Visual Studio 17 2022` generator is now based on
"Visual Studio 2022 Preview 2". Previously it was based on "Preview 1.1".
3.21.2
------
* ``CUDA`` targets with :prop_tgt:`CUDA_SEPARABLE_COMPILATION` enabled are now
correctly generated in non-root directories.

View File

@ -1484,14 +1484,18 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
}
std::vector<std::string> architectures = cmExpandedList(architecturesStr);
std::string const& relPath =
this->LocalGenerator->GetHomeRelativeOutputPath();
// Ensure there are no duplicates.
const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> {
std::vector<std::string> deps;
this->AppendTargetDepends(deps, true);
this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA");
std::copy(this->Objects.begin(), this->Objects.end(),
std::back_inserter(deps));
for (std::string const& obj : this->Objects) {
deps.emplace_back(cmStrCat(relPath, obj));
}
std::unordered_set<std::string> depsSet(deps.begin(), deps.end());
deps.clear();
@ -1510,7 +1514,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
std::string profiles;
std::vector<std::string> fatbinaryDepends;
std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h");
std::string const registerFile =
cmStrCat(objectDir, "cmake_cuda_register.h");
// Link device code for each architecture.
for (const std::string& architectureKind : architectures) {
@ -1518,7 +1523,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
const std::string architecture =
architectureKind.substr(0, architectureKind.find('-'));
const std::string cubin =
cmStrCat(relObjectDir, "sm_", architecture, ".cubin");
cmStrCat(objectDir, "sm_", architecture, ".cubin");
profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
fatbinaryDepends.emplace_back(cubin);
@ -1530,8 +1535,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
// all architectures the register file will be the same too. Thus
// generate it only on the first invocation to reduce overhead.
if (fatbinaryDepends.size() == 1) {
std::string registerFileRel =
this->LocalGenerator->MaybeRelativeToCurBinDir(registerFile);
std::string const registerFileRel =
cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h");
registerFileCmd =
cmStrCat(" --register-link-binaries=", registerFileRel);
cleanFiles.push_back(registerFileRel);
@ -1555,7 +1560,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
const std::string fatbinaryOutput =
cmStrCat(objectDir, "cmake_cuda_fatbin.h");
const std::string fatbinaryOutputRel =
this->LocalGenerator->MaybeRelativeToCurBinDir(fatbinaryOutput);
cmStrCat(relPath, relObjectDir, "cmake_cuda_fatbin.h");
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
fatbinaryOutputRel, fatbinaryDepends,
@ -1583,9 +1588,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
compileCmd, vars);
commands.emplace_back(compileCmd);
this->LocalGenerator->WriteMakeRule(
*this->BuildFileStream, nullptr, output,
{ cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false);
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, output,
{ fatbinaryOutputRel }, commands, false);
// Clean all the possible executable names and symlinks.
this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end());

View File

@ -15,7 +15,7 @@ add_cuda_test_macro(CudaOnly.ToolkitBeforeLang CudaOnlyToolkitBeforeLang)
add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs)
add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation)
if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
# Clang doesn't have flags for selecting the runtime.

View File

@ -34,26 +34,9 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
target_compile_features(CUDASeparateLibB PRIVATE cuda_std_11)
target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
add_executable(CudaOnlySeparateCompilation main.cu)
target_link_libraries(CudaOnlySeparateCompilation
PRIVATE CUDASeparateLibB)
set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD 11)
set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD_REQUIRED TRUE)
set_target_properties(CUDASeparateLibA
CUDASeparateLibB
PROPERTIES CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON)
if (CMAKE_GENERATOR MATCHES "^Visual Studio")
#Visual Studio CUDA integration will not perform device linking
#on a target that itself does not have GenerateRelocatableDeviceCode
#enabled.
set_target_properties(CudaOnlySeparateCompilation
PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()
if(APPLE)
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
endif()
add_subdirectory(main)

View File

@ -0,0 +1,18 @@
add_executable(CudaOnlySeparateCompilation main.cu)
target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB)
set_target_properties(CudaOnlySeparateCompilation PROPERTIES
CUDA_STANDARD 11
CUDA_STANDARD_REQUIRED TRUE
)
if(CMAKE_GENERATOR MATCHES "^Visual Studio")
# Visual Studio CUDA integration will not perform device linking
# on a target that itself does not have GenerateRelocatableDeviceCode
# enabled.
set_property(TARGET CudaOnlySeparateCompilation PROPERTY CUDA_SEPARABLE_COMPILATION ON)
endif()
if(APPLE)
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
endif()

View File

@ -1,8 +1,8 @@
#include <iostream>
#include "file1.h"
#include "file2.h"
#include "../file1.h"
#include "../file2.h"
int file4_launch_kernel(int x);
int file5_launch_kernel(int x);