Skip to content

Commit 77df5a8

Browse files
committed
[HIP] Move HIP Linking Logic into HIP ToolChain
This patch is a follow up on https://reviews.llvm.org/D78759. Extract the HIP Linker script from generic GNU linker, and move it into HIP ToolChain. Update OffloadActionBuilder Link actions feature to apply device linking and host linking actions separately. Using MC Directives, embed the device images and define symbols. Reviewers: JonChesterfield, yaxunl Subscribers: tra, echristo, jdoerfert, msearles, scchan Differential Revision: https://reviews.llvm.org/D81963
1 parent 315bd96 commit 77df5a8

13 files changed

+304
-233
lines changed

clang/lib/Driver/Driver.cpp

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2325,8 +2325,11 @@ class OffloadingActionBuilder final {
23252325
/// Append top level actions generated by the builder.
23262326
virtual void appendTopLevelActions(ActionList &AL) {}
23272327

2328-
/// Append linker actions generated by the builder.
2329-
virtual void appendLinkActions(ActionList &AL) {}
2328+
/// Append linker device actions generated by the builder.
2329+
virtual void appendLinkDeviceActions(ActionList &AL) {}
2330+
2331+
/// Append linker host action generated by the builder.
2332+
virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; }
23302333

23312334
/// Append linker actions generated by the builder.
23322335
virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
@@ -2796,17 +2799,45 @@ class OffloadingActionBuilder final {
27962799
: ABRT_Success;
27972800
}
27982801

2799-
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
2802+
void appendLinkDeviceActions(ActionList &AL) override {
2803+
if (DeviceLinkerInputs.size() == 0)
2804+
return;
2805+
2806+
assert(DeviceLinkerInputs.size() == GpuArchList.size() &&
2807+
"Linker inputs and GPU arch list sizes do not match.");
2808+
28002809
// Append a new link action for each device.
28012810
unsigned I = 0;
28022811
for (auto &LI : DeviceLinkerInputs) {
2812+
// Each entry in DeviceLinkerInputs corresponds to a GPU arch.
28032813
auto *DeviceLinkAction =
28042814
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
2805-
DA.add(*DeviceLinkAction, *ToolChains[0],
2806-
CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
2815+
// Linking all inputs for the current GPU arch.
2816+
// LI contains all the inputs for the linker.
2817+
OffloadAction::DeviceDependences DeviceLinkDeps;
2818+
DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0],
2819+
CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
2820+
AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
2821+
DeviceLinkAction->getType()));
28072822
++I;
28082823
}
2824+
DeviceLinkerInputs.clear();
2825+
2826+
// Create a host object from all the device images by embedding them
2827+
// in a fat binary.
2828+
OffloadAction::DeviceDependences DDeps;
2829+
auto *TopDeviceLinkAction =
2830+
C.MakeAction<LinkJobAction>(AL, types::TY_Object);
2831+
DDeps.add(*TopDeviceLinkAction, *ToolChains[0],
2832+
nullptr, AssociatedOffloadKind);
2833+
2834+
// Offload the host object to the host linker.
2835+
AL.push_back(C.MakeAction<OffloadAction>(DDeps, TopDeviceLinkAction->getType()));
28092836
}
2837+
2838+
Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); }
2839+
2840+
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
28102841
};
28112842

28122843
/// OpenMP action builder. The host bitcode is passed to the device frontend
@@ -2934,7 +2965,7 @@ class OffloadingActionBuilder final {
29342965
OpenMPDeviceActions.clear();
29352966
}
29362967

2937-
void appendLinkActions(ActionList &AL) override {
2968+
void appendLinkDeviceActions(ActionList &AL) override {
29382969
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
29392970
"Toolchains and linker inputs sizes do not match.");
29402971

@@ -2953,6 +2984,14 @@ class OffloadingActionBuilder final {
29532984
DeviceLinkerInputs.clear();
29542985
}
29552986

2987+
Action* appendLinkHostActions(ActionList &AL) override {
2988+
// Create wrapper bitcode from the result of device link actions and compile
2989+
// it to an object which will be added to the host link command.
2990+
auto *BC = C.MakeAction<OffloadWrapperJobAction>(AL, types::TY_LLVM_BC);
2991+
auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
2992+
return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
2993+
}
2994+
29562995
void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
29572996

29582997
bool initialize() override {
@@ -3185,17 +3224,20 @@ class OffloadingActionBuilder final {
31853224
for (DeviceActionBuilder *SB : SpecializedBuilders) {
31863225
if (!SB->isValid())
31873226
continue;
3188-
SB->appendLinkActions(DeviceAL);
3227+
SB->appendLinkDeviceActions(DeviceAL);
31893228
}
31903229

31913230
if (DeviceAL.empty())
31923231
return nullptr;
31933232

3194-
// Create wrapper bitcode from the result of device link actions and compile
3195-
// it to an object which will be added to the host link command.
3196-
auto *BC = C.MakeAction<OffloadWrapperJobAction>(DeviceAL, types::TY_LLVM_BC);
3197-
auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
3198-
return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
3233+
// Let builders add host linking actions.
3234+
Action* HA;
3235+
for (DeviceActionBuilder *SB : SpecializedBuilders) {
3236+
if (!SB->isValid())
3237+
continue;
3238+
HA = SB->appendLinkHostActions(DeviceAL);
3239+
}
3240+
return HA;
31993241
}
32003242

32013243
/// Processes the host linker action. This currently consists of replacing it

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 3 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -152,14 +152,12 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
152152
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
153153

154154
for (const auto &II : Inputs) {
155-
// If the current tool chain refers to an OpenMP or HIP offloading host, we
156-
// should ignore inputs that refer to OpenMP or HIP offloading devices -
155+
// If the current tool chain refers to an OpenMP offloading host, we
156+
// should ignore inputs that refer to OpenMP offloading devices -
157157
// they will be embedded according to a proper linker script.
158158
if (auto *IA = II.getAction())
159159
if ((JA.isHostOffloading(Action::OFK_OpenMP) &&
160-
IA->isDeviceOffloading(Action::OFK_OpenMP)) ||
161-
(JA.isHostOffloading(Action::OFK_HIP) &&
162-
IA->isDeviceOffloading(Action::OFK_HIP)))
160+
IA->isDeviceOffloading(Action::OFK_OpenMP)))
163161
continue;
164162

165163
if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType()))
@@ -1298,115 +1296,6 @@ void tools::AddRunTimeLibs(const ToolChain &TC, const Driver &D,
12981296
}
12991297
}
13001298

1301-
/// Add HIP linker script arguments at the end of the argument list so that
1302-
/// the fat binary is built by embedding the device images into the host. The
1303-
/// linker script also defines a symbol required by the code generation so that
1304-
/// the image can be retrieved at runtime. This should be used only in tool
1305-
/// chains that support linker scripts.
1306-
void tools::AddHIPLinkerScript(const ToolChain &TC, Compilation &C,
1307-
const InputInfo &Output,
1308-
const InputInfoList &Inputs, const ArgList &Args,
1309-
ArgStringList &CmdArgs, const JobAction &JA,
1310-
const Tool &T) {
1311-
1312-
// If this is not a HIP host toolchain, we don't need to do anything.
1313-
if (!JA.isHostOffloading(Action::OFK_HIP))
1314-
return;
1315-
1316-
InputInfoList DeviceInputs;
1317-
for (const auto &II : Inputs) {
1318-
const Action *A = II.getAction();
1319-
// Is this a device linking action?
1320-
if (A && isa<LinkJobAction>(A) && A->isDeviceOffloading(Action::OFK_HIP)) {
1321-
DeviceInputs.push_back(II);
1322-
}
1323-
}
1324-
1325-
if (DeviceInputs.empty())
1326-
return;
1327-
1328-
// Create temporary linker script. Keep it if save-temps is enabled.
1329-
const char *LKS;
1330-
std::string Name =
1331-
std::string(llvm::sys::path::filename(Output.getFilename()));
1332-
if (C.getDriver().isSaveTempsEnabled()) {
1333-
LKS = C.getArgs().MakeArgString(Name + ".lk");
1334-
} else {
1335-
auto TmpName = C.getDriver().GetTemporaryPath(Name, "lk");
1336-
LKS = C.addTempFile(C.getArgs().MakeArgString(TmpName));
1337-
}
1338-
1339-
// Add linker script option to the command.
1340-
CmdArgs.push_back("-T");
1341-
CmdArgs.push_back(LKS);
1342-
1343-
// Create a buffer to write the contents of the linker script.
1344-
std::string LksBuffer;
1345-
llvm::raw_string_ostream LksStream(LksBuffer);
1346-
1347-
// Get the HIP offload tool chain.
1348-
auto *HIPTC = static_cast<const toolchains::HIPToolChain *>(
1349-
C.getSingleOffloadToolChain<Action::OFK_HIP>());
1350-
assert(HIPTC->getTriple().getArch() == llvm::Triple::amdgcn &&
1351-
"Wrong platform");
1352-
(void)HIPTC;
1353-
1354-
const char *BundleFile;
1355-
if (C.getDriver().isSaveTempsEnabled()) {
1356-
BundleFile = C.getArgs().MakeArgString(Name + ".hipfb");
1357-
} else {
1358-
auto TmpName = C.getDriver().GetTemporaryPath(Name, "hipfb");
1359-
BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpName));
1360-
}
1361-
AMDGCN::constructHIPFatbinCommand(C, JA, BundleFile, DeviceInputs, Args, T);
1362-
1363-
// Add commands to embed target binaries. We ensure that each section and
1364-
// image is 16-byte aligned. This is not mandatory, but increases the
1365-
// likelihood of data to be aligned with a cache block in several main host
1366-
// machines.
1367-
LksStream << "/*\n";
1368-
LksStream << " HIP Offload Linker Script\n";
1369-
LksStream << " *** Automatically generated by Clang ***\n";
1370-
LksStream << "*/\n";
1371-
LksStream << "TARGET(binary)\n";
1372-
LksStream << "INPUT(" << BundleFile << ")\n";
1373-
LksStream << "SECTIONS\n";
1374-
LksStream << "{\n";
1375-
LksStream << " .hip_fatbin :\n";
1376-
LksStream << " ALIGN(0x10)\n";
1377-
LksStream << " {\n";
1378-
LksStream << " PROVIDE_HIDDEN(__hip_fatbin = .);\n";
1379-
LksStream << " " << BundleFile << "\n";
1380-
LksStream << " }\n";
1381-
LksStream << " /DISCARD/ :\n";
1382-
LksStream << " {\n";
1383-
LksStream << " * ( __CLANG_OFFLOAD_BUNDLE__* )\n";
1384-
LksStream << " }\n";
1385-
LksStream << "}\n";
1386-
LksStream << "INSERT BEFORE .data\n";
1387-
LksStream.flush();
1388-
1389-
// Dump the contents of the linker script if the user requested that. We
1390-
// support this option to enable testing of behavior with -###.
1391-
if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
1392-
llvm::errs() << LksBuffer;
1393-
1394-
// If this is a dry run, do not create the linker script file.
1395-
if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
1396-
return;
1397-
1398-
// Open script file and write the contents.
1399-
std::error_code EC;
1400-
llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::OF_None);
1401-
1402-
if (EC) {
1403-
C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
1404-
return;
1405-
}
1406-
1407-
Lksf << LksBuffer;
1408-
}
1409-
14101299
SmallString<128> tools::getStatsFileName(const llvm::opt::ArgList &Args,
14111300
const InputInfo &Output,
14121301
const InputInfo &Input,

clang/lib/Driver/ToolChains/CommonArgs.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,6 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
4545
llvm::opt::ArgStringList &CmdArgs,
4646
const llvm::opt::ArgList &Args);
4747

48-
void AddHIPLinkerScript(const ToolChain &TC, Compilation &C,
49-
const InputInfo &Output, const InputInfoList &Inputs,
50-
const llvm::opt::ArgList &Args,
51-
llvm::opt::ArgStringList &CmdArgs, const JobAction &JA,
52-
const Tool &T);
53-
5448
const char *SplitDebugName(const llvm::opt::ArgList &Args,
5549
const InputInfo &Input, const InputInfo &Output);
5650

clang/lib/Driver/ToolChains/Gnu.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -625,10 +625,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
625625
}
626626
}
627627

628-
// Add HIP offloading linker script args if required.
629-
AddHIPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA,
630-
*this);
631-
632628
Args.AddAllArgs(CmdArgs, options::OPT_T);
633629

634630
const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());

clang/lib/Driver/ToolChains/HIP.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,87 @@ void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
104104
C.addCommand(std::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs));
105105
}
106106

107+
/// Add Generated HIP Object File which has device images embedded into the
108+
/// host to the argument list for linking. Using MC directives, embed the
109+
/// device code and also define symbols required by the code generation so that
110+
/// the image can be retrieved at runtime.
111+
void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
112+
Compilation &C, const InputInfo &Output,
113+
const InputInfoList &Inputs, const ArgList &Args,
114+
const JobAction &JA) const {
115+
const ToolChain &TC = getToolChain();
116+
std::string Name =
117+
std::string(llvm::sys::path::stem(Output.getFilename()));
118+
119+
// Create Temp Object File Generator,
120+
// Offload Bundled file and Bundled Object file.
121+
// Keep them if save-temps is enabled.
122+
const char *McinFile;
123+
const char *BundleFile;
124+
if (C.getDriver().isSaveTempsEnabled()) {
125+
McinFile = C.getArgs().MakeArgString(Name + ".mcin");
126+
BundleFile = C.getArgs().MakeArgString(Name + ".hipfb");
127+
} else {
128+
auto TmpNameMcin = C.getDriver().GetTemporaryPath(Name, "mcin");
129+
McinFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameMcin));
130+
auto TmpNameFb = C.getDriver().GetTemporaryPath(Name, "hipfb");
131+
BundleFile = C.addTempFile(C.getArgs().MakeArgString(TmpNameFb));
132+
}
133+
constructHIPFatbinCommand(C, JA, BundleFile, Inputs, Args, *this);
134+
135+
// Create a buffer to write the contents of the temp obj generator.
136+
std::string ObjBuffer;
137+
llvm::raw_string_ostream ObjStream(ObjBuffer);
138+
139+
// Add MC directives to embed target binaries. We ensure that each
140+
// section and image is 16-byte aligned. This is not mandatory, but
141+
// increases the likelihood of data to be aligned with a cache block
142+
// in several main host machines.
143+
ObjStream << "# HIP Object Generator\n";
144+
ObjStream << "# *** Automatically generated by Clang ***\n";
145+
ObjStream << " .type __hip_fatbin,@object\n";
146+
ObjStream << " .section .hip_fatbin,\"aMS\",@progbits,1\n";
147+
ObjStream << " .data\n";
148+
ObjStream << " .globl __hip_fatbin\n";
149+
ObjStream << " .p2align 3\n";
150+
ObjStream << "__hip_fatbin:\n";
151+
ObjStream << " .incbin \"" << BundleFile << "\"\n";
152+
ObjStream.flush();
153+
154+
// Dump the contents of the temp object file gen if the user requested that.
155+
// We support this option to enable testing of behavior with -###.
156+
if (C.getArgs().hasArg(options::OPT_fhip_dump_offload_linker_script))
157+
llvm::errs() << ObjBuffer;
158+
159+
// Open script file and write the contents.
160+
std::error_code EC;
161+
llvm::raw_fd_ostream Objf(McinFile, EC, llvm::sys::fs::OF_None);
162+
163+
if (EC) {
164+
C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
165+
return;
166+
}
167+
168+
Objf << ObjBuffer;
169+
170+
ArgStringList McArgs{"-triple", Args.MakeArgString(TC.getTripleString()),
171+
"-o", Output.getFilename(),
172+
McinFile, "--filetype=obj"};
173+
const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc"));
174+
C.addCommand(std::make_unique<Command>(JA, *this, Mc, McArgs, Inputs));
175+
}
176+
107177
// For amdgcn the inputs of the linker job are device bitcode and output is
108178
// object file. It calls llvm-link, opt, llc, then lld steps.
109179
void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
110180
const InputInfo &Output,
111181
const InputInfoList &Inputs,
112182
const ArgList &Args,
113183
const char *LinkingOutput) const {
184+
if (Inputs.size() > 0 &&
185+
Inputs[0].getType() == types::TY_Image &&
186+
JA.getType() == types::TY_Object)
187+
return constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, Args, JA);
114188

115189
if (JA.getType() == types::TY_HIP_FATBIN)
116190
return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);

clang/lib/Driver/ToolChains/HIP.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
4242
void constructLldCommand(Compilation &C, const JobAction &JA,
4343
const InputInfoList &Inputs, const InputInfo &Output,
4444
const llvm::opt::ArgList &Args) const;
45+
46+
// Construct command for creating Object from HIP fatbin.
47+
void constructGenerateObjFileFromHIPFatBinary(Compilation &C,
48+
const InputInfo &Output,
49+
const InputInfoList &Inputs,
50+
const llvm::opt::ArgList &Args,
51+
const JobAction &JA) const;
4552
};
4653

4754
} // end namespace AMDGCN

clang/test/Driver/hip-binding.hip

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,15 @@
2525
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\
2626
// RUN: 2>&1 | FileCheck %s
2727

28-
// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"]
29-
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*out]]"
28+
// CHECK: # "x86_64-unknown-linux-gnu" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[HOSTOBJ:.*o]]", "{{.*o}}", "{{.*o}}"]
29+
// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN]]"], outputs: ["{{.*o}}", "[[DOBJ1:.*o]]", "[[DOBJ2:.*o]]"]
30+
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ1]]"], output: "[[IMG1:.*out]]"
3031
// CHECK-NOT: offload bundler
31-
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]"
32+
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[DOBJ2]]"], output: "[[IMG2:.*out]]"
3233
// CHECK-NOT: offload bundler
33-
// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "[[IMG2]]", "[[IMG3]]"], output: "a.out"
34+
// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBINOBJ:.*o]]"
35+
// CHECK-NOT: offload bundler
36+
// CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[HOSTOBJ]]", "[[FATBINOBJ]]"], output: "a.out"
3437

3538
// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
3639
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\

0 commit comments

Comments
 (0)