CUDA/Clang: Fix separable compilation in non-root directories with Makefiles
Seems the relative paths were wrong basically all around such that only compiling files in the top-level directory would work. I've modified CudaOnly.SeparateCompilation to cover this. Fixes #22482.
This commit is contained in:
parent
f7cf69e34a
commit
0b1cea66cd
@ -304,3 +304,9 @@ Changes made since CMake 3.21.0 include the following.
|
||||
|
||||
* The :generator:`Visual Studio 17 2022` generator is now based on
|
||||
"Visual Studio 2022 Preview 2". Previously it was based on "Preview 1.1".
|
||||
|
||||
3.21.2
|
||||
------
|
||||
|
||||
* ``CUDA`` targets with :prop_tgt:`CUDA_SEPARABLE_COMPILATION` enabled are now
|
||||
correctly generated in non-root directories.
|
||||
|
@ -1484,14 +1484,18 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
|
||||
}
|
||||
|
||||
std::vector<std::string> architectures = cmExpandedList(architecturesStr);
|
||||
std::string const& relPath =
|
||||
this->LocalGenerator->GetHomeRelativeOutputPath();
|
||||
|
||||
// Ensure there are no duplicates.
|
||||
const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> {
|
||||
std::vector<std::string> deps;
|
||||
this->AppendTargetDepends(deps, true);
|
||||
this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA");
|
||||
std::copy(this->Objects.begin(), this->Objects.end(),
|
||||
std::back_inserter(deps));
|
||||
|
||||
for (std::string const& obj : this->Objects) {
|
||||
deps.emplace_back(cmStrCat(relPath, obj));
|
||||
}
|
||||
|
||||
std::unordered_set<std::string> depsSet(deps.begin(), deps.end());
|
||||
deps.clear();
|
||||
@ -1510,7 +1514,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
|
||||
|
||||
std::string profiles;
|
||||
std::vector<std::string> fatbinaryDepends;
|
||||
std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h");
|
||||
std::string const registerFile =
|
||||
cmStrCat(objectDir, "cmake_cuda_register.h");
|
||||
|
||||
// Link device code for each architecture.
|
||||
for (const std::string& architectureKind : architectures) {
|
||||
@ -1518,7 +1523,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
|
||||
const std::string architecture =
|
||||
architectureKind.substr(0, architectureKind.find('-'));
|
||||
const std::string cubin =
|
||||
cmStrCat(relObjectDir, "sm_", architecture, ".cubin");
|
||||
cmStrCat(objectDir, "sm_", architecture, ".cubin");
|
||||
|
||||
profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
|
||||
fatbinaryDepends.emplace_back(cubin);
|
||||
@ -1530,8 +1535,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
|
||||
// all architectures the register file will be the same too. Thus
|
||||
// generate it only on the first invocation to reduce overhead.
|
||||
if (fatbinaryDepends.size() == 1) {
|
||||
std::string registerFileRel =
|
||||
this->LocalGenerator->MaybeRelativeToCurBinDir(registerFile);
|
||||
std::string const registerFileRel =
|
||||
cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h");
|
||||
registerFileCmd =
|
||||
cmStrCat(" --register-link-binaries=", registerFileRel);
|
||||
cleanFiles.push_back(registerFileRel);
|
||||
@ -1555,7 +1560,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
|
||||
const std::string fatbinaryOutput =
|
||||
cmStrCat(objectDir, "cmake_cuda_fatbin.h");
|
||||
const std::string fatbinaryOutputRel =
|
||||
this->LocalGenerator->MaybeRelativeToCurBinDir(fatbinaryOutput);
|
||||
cmStrCat(relPath, relObjectDir, "cmake_cuda_fatbin.h");
|
||||
|
||||
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
|
||||
fatbinaryOutputRel, fatbinaryDepends,
|
||||
@ -1583,9 +1588,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
|
||||
compileCmd, vars);
|
||||
|
||||
commands.emplace_back(compileCmd);
|
||||
this->LocalGenerator->WriteMakeRule(
|
||||
*this->BuildFileStream, nullptr, output,
|
||||
{ cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false);
|
||||
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, output,
|
||||
{ fatbinaryOutputRel }, commands, false);
|
||||
|
||||
// Clean all the possible executable names and symlinks.
|
||||
this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end());
|
||||
|
@ -15,7 +15,7 @@ add_cuda_test_macro(CudaOnly.ToolkitBeforeLang CudaOnlyToolkitBeforeLang)
|
||||
add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs)
|
||||
add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
|
||||
add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
|
||||
add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
|
||||
add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation)
|
||||
|
||||
if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
|
||||
# Clang doesn't have flags for selecting the runtime.
|
||||
|
@ -34,26 +34,9 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
|
||||
target_compile_features(CUDASeparateLibB PRIVATE cuda_std_11)
|
||||
target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
|
||||
|
||||
add_executable(CudaOnlySeparateCompilation main.cu)
|
||||
target_link_libraries(CudaOnlySeparateCompilation
|
||||
PRIVATE CUDASeparateLibB)
|
||||
set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD 11)
|
||||
set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD_REQUIRED TRUE)
|
||||
|
||||
set_target_properties(CUDASeparateLibA
|
||||
CUDASeparateLibB
|
||||
PROPERTIES CUDA_SEPARABLE_COMPILATION ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if (CMAKE_GENERATOR MATCHES "^Visual Studio")
|
||||
#Visual Studio CUDA integration will not perform device linking
|
||||
#on a target that itself does not have GenerateRelocatableDeviceCode
|
||||
#enabled.
|
||||
set_target_properties(CudaOnlySeparateCompilation
|
||||
PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
|
||||
set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||
endif()
|
||||
add_subdirectory(main)
|
||||
|
18
Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt
Normal file
18
Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt
Normal file
@ -0,0 +1,18 @@
|
||||
add_executable(CudaOnlySeparateCompilation main.cu)
|
||||
target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB)
|
||||
set_target_properties(CudaOnlySeparateCompilation PROPERTIES
|
||||
CUDA_STANDARD 11
|
||||
CUDA_STANDARD_REQUIRED TRUE
|
||||
)
|
||||
|
||||
if(CMAKE_GENERATOR MATCHES "^Visual Studio")
|
||||
# Visual Studio CUDA integration will not perform device linking
|
||||
# on a target that itself does not have GenerateRelocatableDeviceCode
|
||||
# enabled.
|
||||
set_property(TARGET CudaOnlySeparateCompilation PROPERTY CUDA_SEPARABLE_COMPILATION ON)
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
|
||||
set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||
endif()
|
@ -1,8 +1,8 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "file1.h"
|
||||
#include "file2.h"
|
||||
#include "../file1.h"
|
||||
#include "../file2.h"
|
||||
|
||||
int file4_launch_kernel(int x);
|
||||
int file5_launch_kernel(int x);
|
Loading…
Reference in New Issue
Block a user