@@ -242,6 +242,13 @@ Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
242
242
return *Path;
243
243
}
244
244
245
+ // / We will defer LTO to the target's linker if we are not doing JIT and it is
246
+ // / supported by the toolchain.
247
+ bool linkerSupportsLTO (const ArgList &Args) {
248
+ llvm::Triple Triple (Args.getLastArgValue (OPT_triple_EQ));
249
+ return Triple.isNVPTX () || Triple.isAMDGPU ();
250
+ }
251
+
245
252
// / Returns the hashed value for a constant string.
246
253
std::string getHash (StringRef Str) {
247
254
llvm::MD5 Hasher;
@@ -504,18 +511,23 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
504
511
llvm::copy (LinkerArgs, std::back_inserter (CmdArgs));
505
512
}
506
513
507
- // Pass on -mllvm options to the clang invocation.
508
- for (const opt::Arg *Arg : Args.filtered (OPT_mllvm)) {
509
- CmdArgs.push_back (" -mllvm" );
510
- CmdArgs.push_back (Arg->getValue ());
511
- }
514
+ // Pass on -mllvm options to the linker invocation.
515
+ for (const opt::Arg *Arg : Args.filtered (OPT_mllvm))
516
+ CmdArgs.push_back (
517
+ Args.MakeArgString (" -Wl,-mllvm=" + StringRef (Arg->getValue ())));
512
518
513
519
if (Args.hasArg (OPT_debug))
514
520
CmdArgs.push_back (" -g" );
515
521
516
522
if (SaveTemps)
517
523
CmdArgs.push_back (" -save-temps" );
518
524
525
+ if (SaveTemps && linkerSupportsLTO (Args))
526
+ CmdArgs.push_back (" -Wl,--save-temps" );
527
+
528
+ if (Args.hasArg (OPT_embed_bitcode))
529
+ CmdArgs.push_back (" -Wl,--lto-emit-llvm" );
530
+
519
531
if (Verbose)
520
532
CmdArgs.push_back (" -v" );
521
533
@@ -536,8 +548,8 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
536
548
Args.MakeArgString (Arg.split (' =' ).second )});
537
549
}
538
550
539
- // The OpenMPOpt pass can introduce new calls and is expensive, we do not want
540
- // this when running CodeGen through clang.
551
+ // The OpenMPOpt pass can introduce new calls and is expensive, we do
552
+ // not want this when running CodeGen through clang.
541
553
if (Args.hasArg (OPT_clang_backend) || Args.hasArg (OPT_builtin_bitcode_EQ))
542
554
CmdArgs.append ({" -mllvm" , " -openmp-opt-disable" });
543
555
@@ -703,8 +715,9 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
703
715
BumpPtrAllocator Alloc;
704
716
StringSaver Saver (Alloc);
705
717
706
- // Search for bitcode files in the input and create an LTO input file. If it
707
- // is not a bitcode file, scan its symbol table for symbols we need to save.
718
+ // Search for bitcode files in the input and create an LTO input file. If
719
+ // it is not a bitcode file, scan its symbol table for symbols we need to
720
+ // save.
708
721
for (OffloadFile &File : InputFiles) {
709
722
MemoryBufferRef Buffer = MemoryBufferRef (File.getBinary ()->getImage (), " " );
710
723
@@ -738,7 +751,8 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
738
751
if (!Name)
739
752
return Name.takeError ();
740
753
741
- // Record if we've seen these symbols in any object or shared libraries.
754
+ // Record if we've seen these symbols in any object or shared
755
+ // libraries.
742
756
if ((*ObjFile)->isRelocatableObject ())
743
757
UsedInRegularObj.insert (Saver.save (*Name));
744
758
else
@@ -775,17 +789,18 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
775
789
return false ;
776
790
};
777
791
778
- // We assume visibility of the whole program if every input file was bitcode.
792
+ // We assume visibility of the whole program if every input file was
793
+ // bitcode.
779
794
auto Features = getTargetFeatures (BitcodeInputFiles);
780
795
auto LTOBackend = Args.hasArg (OPT_embed_bitcode) ||
781
796
Args.hasArg (OPT_builtin_bitcode_EQ) ||
782
797
Args.hasArg (OPT_clang_backend)
783
798
? createLTO (Args, Features, OutputBitcode)
784
799
: createLTO (Args, Features);
785
800
786
- // We need to resolve the symbols so the LTO backend knows which symbols need
787
- // to be kept or can be internalized. This is a simplified symbol resolution
788
- // scheme to approximate the full resolution a linker would do.
801
+ // We need to resolve the symbols so the LTO backend knows which symbols
802
+ // need to be kept or can be internalized. This is a simplified symbol
803
+ // resolution scheme to approximate the full resolution a linker would do.
789
804
uint64_t Idx = 0 ;
790
805
DenseSet<StringRef> PrevailingSymbols;
791
806
for (auto &BitcodeInput : BitcodeInputFiles) {
@@ -817,7 +832,8 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
817
832
// We need LTO to preseve the following global symbols:
818
833
// 1) Symbols used in regular objects.
819
834
// 2) Sections that will be given a __start/__stop symbol.
820
- // 3) Prevailing symbols that are needed visible to external libraries.
835
+ // 3) Prevailing symbols that are needed visible to external
836
+ // libraries.
821
837
Res.VisibleToRegularObj =
822
838
UsedInRegularObj.contains (Sym.getName ()) ||
823
839
isValidCIdentifier (Sym.getSectionName ()) ||
@@ -832,9 +848,9 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
832
848
(UsedInSharedLib.contains (Sym.getName ()) ||
833
849
!Sym.canBeOmittedFromSymbolTable ());
834
850
835
- // The final definition will reside in this linkage unit if the symbol is
836
- // defined and local to the module. This only checks for bitcode files,
837
- // full assertion will require complete symbol resolution.
851
+ // The final definition will reside in this linkage unit if the symbol
852
+ // is defined and local to the module. This only checks for bitcode
853
+ // files, full assertion will require complete symbol resolution.
838
854
Res.FinalDefinitionInLinkageUnit =
839
855
Sym.getVisibility () != GlobalValue::DefaultVisibility &&
840
856
(!Sym.isUndefined () && !Sym.isCommon ());
@@ -887,8 +903,8 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
887
903
return Error::success ();
888
904
}
889
905
890
- // Append the new inputs to the device linker input. If the user requested an
891
- // internalizing link we need to pass the bitcode to clang.
906
+ // Append the new inputs to the device linker input. If the user requested
907
+ // an internalizing link we need to pass the bitcode to clang.
892
908
for (StringRef File :
893
909
Args.hasArg (OPT_clang_backend) || Args.hasArg (OPT_builtin_bitcode_EQ)
894
910
? BitcodeOutput
@@ -903,10 +919,9 @@ Expected<StringRef> writeOffloadFile(const OffloadFile &File) {
903
919
904
920
StringRef Prefix =
905
921
sys::path::stem (Binary.getMemoryBufferRef ().getBufferIdentifier ());
906
- StringRef Suffix = getImageKindName (Binary.getImageKind ());
907
922
908
923
auto TempFileOrErr = createOutputFile (
909
- Prefix + " -" + Binary.getTriple () + " -" + Binary.getArch (), Suffix );
924
+ Prefix + " -" + Binary.getTriple () + " -" + Binary.getArch (), " o " );
910
925
if (!TempFileOrErr)
911
926
return TempFileOrErr.takeError ();
912
927
@@ -1119,8 +1134,8 @@ DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
1119
1134
DAL.AddJoinedArg (nullptr , Tbl.getOption (OPT_triple_EQ),
1120
1135
Args.MakeArgString (Input.front ().getBinary ()->getTriple ()));
1121
1136
1122
- // If every input file is bitcode we have whole program visibility as we do
1123
- // only support static linking with bitcode.
1137
+ // If every input file is bitcode we have whole program visibility as we
1138
+ // do only support static linking with bitcode.
1124
1139
auto ContainsBitcode = [](const OffloadFile &F) {
1125
1140
return identify_magic (F.getBinary ()->getImage ()) == file_magic::bitcode;
1126
1141
};
@@ -1208,12 +1223,15 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
1208
1223
if (File.getBinary ()->getOffloadKind () != OFK_None)
1209
1224
ActiveOffloadKinds.insert (File.getBinary ()->getOffloadKind ());
1210
1225
1211
- // First link and remove all the input files containing bitcode.
1226
+ // First link and remove all the input files containing bitcode if
1227
+ // the target linker does not support it natively.
1212
1228
SmallVector<StringRef> InputFiles;
1213
- if (Error Err = linkBitcodeFiles (Input, InputFiles, LinkerArgs))
1214
- return Err;
1229
+ if (!linkerSupportsLTO (LinkerArgs))
1230
+ if (Error Err = linkBitcodeFiles (Input, InputFiles, LinkerArgs))
1231
+ return Err;
1215
1232
1216
- // Write any remaining device inputs to an output file for the linker.
1233
+ // Write any remaining device inputs to an output file for the
1234
+ // linker.
1217
1235
for (const OffloadFile &File : Input) {
1218
1236
auto FileNameOrErr = writeOffloadFile (File);
1219
1237
if (!FileNameOrErr)
@@ -1222,9 +1240,10 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
1222
1240
}
1223
1241
1224
1242
// Link the remaining device files using the device linker.
1225
- auto OutputOrErr = !Args.hasArg (OPT_embed_bitcode)
1226
- ? linkDevice (InputFiles, LinkerArgs)
1227
- : InputFiles.front ();
1243
+ auto OutputOrErr =
1244
+ !Args.hasArg (OPT_embed_bitcode) || linkerSupportsLTO (LinkerArgs)
1245
+ ? linkDevice (InputFiles, LinkerArgs)
1246
+ : InputFiles.front ();
1228
1247
if (!OutputOrErr)
1229
1248
return OutputOrErr.takeError ();
1230
1249
@@ -1351,12 +1370,14 @@ Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
1351
1370
bool NewSymbol = Syms.count (Sym.getName ()) == 0 ;
1352
1371
auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName ()];
1353
1372
1354
- // We will extract if it defines a currenlty undefined non-weak symbol.
1373
+ // We will extract if it defines a currenlty undefined non-weak
1374
+ // symbol.
1355
1375
bool ResolvesStrongReference =
1356
1376
((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) &&
1357
1377
!Sym.isUndefined ());
1358
- // We will extract if it defines a new global symbol visible to the host.
1359
- // This is only necessary for code targeting an offloading language.
1378
+ // We will extract if it defines a new global symbol visible to the
1379
+ // host. This is only necessary for code targeting an offloading
1380
+ // language.
1360
1381
bool NewGlobalSymbol =
1361
1382
((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined () &&
1362
1383
!Sym.canBeOmittedFromSymbolTable () && Kind != object::OFK_None &&
@@ -1411,8 +1432,9 @@ Expected<bool> getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
1411
1432
!(OldSym & Sym_Weak) &&
1412
1433
!(*FlagsOrErr & SymbolRef::SF_Undefined);
1413
1434
1414
- // We will extract if it defines a new global symbol visible to the host.
1415
- // This is only necessary for code targeting an offloading language.
1435
+ // We will extract if it defines a new global symbol visible to the
1436
+ // host. This is only necessary for code targeting an offloading
1437
+ // language.
1416
1438
bool NewGlobalSymbol =
1417
1439
((NewSymbol || (OldSym & Sym_Undefined)) &&
1418
1440
!(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None &&
@@ -1579,8 +1601,8 @@ getDeviceInput(const ArgList &Args) {
1579
1601
1580
1602
Expected<bool > ExtractOrErr =
1581
1603
getSymbols (Binary.getBinary ()->getImage (),
1582
- Binary.getBinary ()->getOffloadKind (), /* IsArchive= */ true ,
1583
- Saver, Syms[ID]);
1604
+ Binary.getBinary ()->getOffloadKind (),
1605
+ /* IsArchive= */ true , Saver, Syms[ID]);
1584
1606
if (!ExtractOrErr)
1585
1607
return ExtractOrErr.takeError ();
1586
1608
0 commit comments