Skip to content

Commit b58bf9d

Browse files
committed
Squashed spirv patch, which adds post-llvm-19 spirv work
Adds the following patches AMDGPU: Remove wavefrontsize64 feature from dummy target llvm#117410 [LLVM][NFC] Use used's element type if available llvm#116804 [llvm][AMDGPU] Fold llvm.amdgcn.wavefrontsize early llvm#114481 [clang][Driver][HIP] Add support for mixing AMDGCNSPIRV & concrete offload-archs. llvm#113509 [clang][llvm][SPIR-V] Explicitly encode native integer widths for SPIR-V llvm#110695 [llvm][opt][Transforms] Replacement calloc should match replaced malloc llvm#110524 [clang][HIP] Don't use the OpenCLKernel CC when targeting AMDGCNSPIRV llvm#110447 [cuda][HIP] constant should imply constant llvm#110182 [llvm][SPIRV] Expose fast popcnt support for SPIR-V targets llvm#109845 [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface llvm#109415 [SPIRV][RFC] Rework / extend support for memory scopes llvm#106429 [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. llvm#102776 Change-Id: I2b9ab54aba1c9345b9b0eb84409e6ed6c3cdb6cd
1 parent c40b49a commit b58bf9d

File tree

78 files changed

+2209
-484
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+2209
-484
lines changed

clang/lib/Basic/Targets/SPIR.h

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,11 @@ class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo {
258258
PointerWidth = PointerAlign = 32;
259259
SizeType = TargetInfo::UnsignedInt;
260260
PtrDiffType = IntPtrType = TargetInfo::SignedInt;
261-
resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
262-
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
261+
// SPIR-V has core support for atomic ops, and Int32 is always available;
262+
// we take the maximum because it's possible the Host supports wider types.
263+
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 32);
264+
resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-"
265+
"v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
263266
}
264267

265268
void getTargetDefines(const LangOptions &Opts,
@@ -275,8 +278,11 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo {
275278
PointerWidth = PointerAlign = 64;
276279
SizeType = TargetInfo::UnsignedLong;
277280
PtrDiffType = IntPtrType = TargetInfo::SignedLong;
278-
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
279-
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
281+
// SPIR-V has core support for atomic ops, and Int64 is always available;
282+
// we take the maximum because it's possible the Host supports wider types.
283+
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 64);
284+
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
285+
"v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
280286
}
281287

282288
void getTargetDefines(const LangOptions &Opts,
@@ -314,8 +320,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRVTargetInfo {
314320

315321
// SPIR-V IDs are represented with a single 32-bit word.
316322
SizeType = TargetInfo::UnsignedInt;
317-
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
318-
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
323+
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
324+
"v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
319325
}
320326

321327
void getTargetDefines(const LangOptions &Opts,
@@ -335,8 +341,11 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public BaseSPIRVTargetInfo {
335341
PointerWidth = PointerAlign = 32;
336342
SizeType = TargetInfo::UnsignedInt;
337343
PtrDiffType = IntPtrType = TargetInfo::SignedInt;
338-
resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
339-
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
344+
// SPIR-V has core support for atomic ops, and Int32 is always available;
345+
// we take the maximum because it's possible the Host supports wider types.
346+
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 32);
347+
resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-"
348+
"v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
340349
}
341350

342351
void getTargetDefines(const LangOptions &Opts,
@@ -356,8 +365,11 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public BaseSPIRVTargetInfo {
356365
PointerWidth = PointerAlign = 64;
357366
SizeType = TargetInfo::UnsignedLong;
358367
PtrDiffType = IntPtrType = TargetInfo::SignedLong;
359-
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
360-
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
368+
// SPIR-V has core support for atomic ops, and Int64 is always available;
369+
// we take the maximum because it's possible the Host supports wider types.
370+
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 64);
371+
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
372+
"v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
361373
}
362374

363375
void getTargetDefines(const LangOptions &Opts,
@@ -380,9 +392,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
380392
PointerWidth = PointerAlign = 64;
381393
SizeType = TargetInfo::UnsignedLong;
382394
PtrDiffType = IntPtrType = TargetInfo::SignedLong;
395+
AddrSpaceMap = &SPIRDefIsGenMap;
383396

384-
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
385-
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0");
397+
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
398+
"v256:256-v512:512-v1024:1024-n32:64-S32-G1-P4-A0");
386399

387400
BFloat16Width = BFloat16Align = 16;
388401
BFloat16Format = &llvm::APFloat::BFloat();
@@ -412,6 +425,10 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
412425

413426
void setAuxTarget(const TargetInfo *Aux) override;
414427

428+
void adjust(DiagnosticsEngine &Diags, LangOptions &Opts) override {
429+
TargetInfo::adjust(Diags, Opts);
430+
}
431+
415432
bool hasInt128Type() const override { return TargetInfo::hasInt128Type(); }
416433
};
417434

clang/lib/CodeGen/CGAtomic.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -766,8 +766,19 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
766766
// LLVM atomic instructions always have synch scope. If clang atomic
767767
// expression has no scope operand, use default LLVM synch scope.
768768
if (!ScopeModel) {
769+
llvm::SyncScope::ID SS;
770+
if (CGF.getLangOpts().OpenCL)
771+
// OpenCL approach is: "The functions that do not have memory_scope
772+
// argument have the same semantics as the corresponding functions with
773+
// the memory_scope argument set to memory_scope_device." See ref.:
774+
// https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions
775+
SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(),
776+
SyncScope::OpenCLDevice,
777+
Order, CGF.getLLVMContext());
778+
else
779+
SS = llvm::SyncScope::System;
769780
EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
770-
Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID(""));
781+
Order, SS);
771782
return;
772783
}
773784

clang/lib/CodeGen/CGDeclCXX.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,10 @@ void CodeGenModule::EmitCXXModuleInitFunc(Module *Primary) {
796796
assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
797797
getLangOpts().GPUAllowDeviceInit);
798798
if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
799-
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
799+
if (getTriple().isSPIRV())
800+
Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
801+
else
802+
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
800803
Fn->addFnAttr("device-init");
801804
}
802805

@@ -954,7 +957,10 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
954957
assert(!getLangOpts().CUDA || !getLangOpts().CUDAIsDevice ||
955958
getLangOpts().GPUAllowDeviceInit);
956959
if (getLangOpts().HIP && getLangOpts().CUDAIsDevice) {
957-
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
960+
if (getTriple().isSPIRV())
961+
Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
962+
else
963+
Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
958964
Fn->addFnAttr("device-init");
959965
}
960966

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2978,13 +2978,15 @@ static void emitUsed(CodeGenModule &CGM, StringRef Name,
29782978
for (unsigned i = 0, e = List.size(); i != e; ++i) {
29792979
UsedArray[i] = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
29802980
cast<llvm::Constant>(&*List[i]),
2981-
llvm::PointerType::getUnqual(CGM.getLLVMContext()));
2981+
CGM.getTarget().getTriple().isAMDGCN() ?
2982+
llvm::PointerType::getUnqual(CGM.getLLVMContext()) :
2983+
CGM.Int8PtrTy);
29822984
}
29832985

29842986
if (UsedArray.empty())
29852987
return;
2986-
llvm::ArrayType *ATy = llvm::ArrayType::get(
2987-
llvm::PointerType::getUnqual(CGM.getLLVMContext()), UsedArray.size());
2988+
llvm::ArrayType *ATy =
2989+
llvm::ArrayType::get(UsedArray[0]->getType(), UsedArray.size());
29882990

29892991
auto *GV = new llvm::GlobalVariable(
29902992
CGM.getModule(), ATy, false, llvm::GlobalValue::AppendingLinkage,
@@ -5575,8 +5577,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
55755577
emitter->finalize(GV);
55765578

55775579
// If it is safe to mark the global 'constant', do so now.
5578-
GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor &&
5579-
D->getType().isConstantStorage(getContext(), true, true));
5580+
GV->setConstant((D->hasAttr<CUDAConstantAttr>() && LangOpts.CUDAIsDevice) ||
5581+
(!NeedsGlobalCtor && !NeedsGlobalDtor &&
5582+
D->getType().isConstantStorage(getContext(), true, true)));
55805583

55815584
// If it is in a read-only section, mark it 'constant'.
55825585
if (const SectionAttr *SA = D->getAttr<SectionAttr>()) {

clang/lib/CodeGen/Targets/SPIR.cpp

Lines changed: 123 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
3232
void computeInfo(CGFunctionInfo &FI) const override;
3333

3434
private:
35+
ABIArgInfo classifyReturnType(QualType RetTy) const;
3536
ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
37+
ABIArgInfo classifyArgumentType(QualType Ty) const;
3638
};
3739
} // end anonymous namespace
3840
namespace {
@@ -56,14 +58,66 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
5658
SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
5759
: CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
5860
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
61+
LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
62+
const VarDecl *D) const override;
63+
llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
64+
SyncScope Scope,
65+
llvm::AtomicOrdering Ordering,
66+
llvm::LLVMContext &Ctx) const override;
5967
};
68+
69+
inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) {
70+
switch (Scope) {
71+
case SyncScope::HIPSingleThread:
72+
case SyncScope::SingleScope:
73+
return "singlethread";
74+
case SyncScope::HIPWavefront:
75+
case SyncScope::OpenCLSubGroup:
76+
case SyncScope::WavefrontScope:
77+
return "subgroup";
78+
case SyncScope::HIPWorkgroup:
79+
case SyncScope::OpenCLWorkGroup:
80+
case SyncScope::WorkgroupScope:
81+
return "workgroup";
82+
case SyncScope::HIPAgent:
83+
case SyncScope::OpenCLDevice:
84+
case SyncScope::DeviceScope:
85+
return "device";
86+
case SyncScope::SystemScope:
87+
case SyncScope::HIPSystem:
88+
case SyncScope::OpenCLAllSVMDevices:
89+
return "";
90+
}
91+
return "";
92+
}
6093
} // End anonymous namespace.
6194

6295
void CommonSPIRABIInfo::setCCs() {
6396
assert(getRuntimeCC() == llvm::CallingConv::C);
6497
RuntimeCC = llvm::CallingConv::SPIR_FUNC;
6598
}
6699

100+
ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
101+
if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
102+
return DefaultABIInfo::classifyReturnType(RetTy);
103+
if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
104+
return DefaultABIInfo::classifyReturnType(RetTy);
105+
106+
if (const RecordType *RT = RetTy->getAs<RecordType>()) {
107+
const RecordDecl *RD = RT->getDecl();
108+
if (RD->hasFlexibleArrayMember())
109+
return DefaultABIInfo::classifyReturnType(RetTy);
110+
}
111+
112+
// TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
113+
// avoid encoding various architecture specific bits here we return everything
114+
// as direct to retain type info for things like aggregates, for later perusal
115+
// when translating back to LLVM/lowering in the BE. This is also why we
116+
// disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
117+
// This will be revisited / optimised in the future.
118+
return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
119+
}
120+
67121
ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
68122
if (getContext().getLangOpts().CUDAIsDevice) {
69123
// Coerce pointer arguments with default address space to CrossWorkGroup
@@ -78,18 +132,51 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
78132
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
79133
}
80134

81-
// Force copying aggregate type in kernel arguments by value when
82-
// compiling CUDA targeting SPIR-V. This is required for the object
83-
// copied to be valid on the device.
84-
// This behavior follows the CUDA spec
85-
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
86-
// and matches the NVPTX implementation.
87-
if (isAggregateTypeForABI(Ty))
135+
if (isAggregateTypeForABI(Ty)) {
136+
if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
137+
// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
138+
// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
139+
// which the AMDGPU kernel ABI does not allow. Passing aggregates as
140+
// direct works around this impedance mismatch, as it retains type info
141+
// and can be correctly handled, post reverse-translation, by the AMDGPU
142+
// BE, which has to support this CC for legacy OpenCL purposes. It can
143+
// be brittle and does lead to performance degradation in certain
144+
// pathological cases. This will be revisited / optimised in the future,
145+
// once a way to deal with the byref/byval impedance mismatch is
146+
// identified.
147+
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
148+
// Force copying aggregate type in kernel arguments by value when
149+
// compiling CUDA targeting SPIR-V. This is required for the object
150+
// copied to be valid on the device.
151+
// This behavior follows the CUDA spec
152+
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
153+
// and matches the NVPTX implementation.
88154
return getNaturalAlignIndirect(Ty, /* byval */ true);
155+
}
89156
}
90157
return classifyArgumentType(Ty);
91158
}
92159

160+
ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
161+
if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
162+
return DefaultABIInfo::classifyArgumentType(Ty);
163+
if (!isAggregateTypeForABI(Ty))
164+
return DefaultABIInfo::classifyArgumentType(Ty);
165+
166+
// Records with non-trivial destructors/copy-constructors should not be
167+
// passed by value.
168+
if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
169+
return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
170+
171+
if (const RecordType *RT = Ty->getAs<RecordType>()) {
172+
const RecordDecl *RD = RT->getDecl();
173+
if (RD->hasFlexibleArrayMember())
174+
return DefaultABIInfo::classifyArgumentType(Ty);
175+
}
176+
177+
return ABIArgInfo::getDirect(CGT.ConvertType(Ty), 0u, nullptr, false);
178+
}
179+
93180
void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
94181
// The logic is same as in DefaultABIInfo with an exception on the kernel
95182
// arguments handling.
@@ -132,6 +219,35 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
132219
}
133220
}
134221

222+
LangAS
223+
SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
224+
const VarDecl *D) const {
225+
assert(!CGM.getLangOpts().OpenCL &&
226+
!(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
227+
"Address space agnostic languages only");
228+
// If we're here it means that we're using the SPIRDefIsGen ASMap, hence for
229+
// the global AS we can rely on either cuda_device or sycl_global to be
230+
// correct; however, since this is not a CUDA Device context, we use
231+
// sycl_global to prevent confusion with the assertion.
232+
LangAS DefaultGlobalAS = getLangASFromTargetAS(
233+
CGM.getContext().getTargetAddressSpace(LangAS::sycl_global));
234+
if (!D)
235+
return DefaultGlobalAS;
236+
237+
LangAS AddrSpace = D->getType().getAddressSpace();
238+
if (AddrSpace != LangAS::Default)
239+
return AddrSpace;
240+
241+
return DefaultGlobalAS;
242+
}
243+
244+
llvm::SyncScope::ID
245+
SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope,
246+
llvm::AtomicOrdering,
247+
llvm::LLVMContext &Ctx) const {
248+
return Ctx.getOrInsertSyncScopeID(mapClangSyncScopeToLLVM(Scope));
249+
}
250+
135251
/// Construct a SPIR-V target extension type for the given OpenCL image type.
136252
static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef BaseType,
137253
StringRef OpenCLName,

clang/lib/Driver/Driver.cpp

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -150,13 +150,9 @@ static std::optional<llvm::Triple>
150150
getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) {
151151
if (!Args.hasArg(options::OPT_offload_EQ)) {
152152
auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ);
153-
if (llvm::find(OffloadArchs, "amdgcnspirv") != OffloadArchs.cend()) {
154-
if (OffloadArchs.size() == 1)
155-
return llvm::Triple("spirv64-amd-amdhsa");
156-
// Mixing specific & SPIR-V compilation is not supported for now.
157-
D.Diag(diag::err_drv_only_one_offload_target_supported);
158-
return std::nullopt;
159-
}
153+
if (llvm::is_contained(OffloadArchs, "amdgcnspirv") &&
154+
OffloadArchs.size() == 1)
155+
return llvm::Triple("spirv64-amd-amdhsa");
160156
return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple.
161157
}
162158
auto TT = getOffloadTargetTriple(D, Args);
@@ -3468,9 +3464,11 @@ class OffloadingActionBuilder final {
34683464
llvm::StringMap<bool> Features;
34693465
// getHIPOffloadTargetTriple() is known to return valid value as it has
34703466
// been called successfully in the CreateOffloadingDeviceToolChains().
3471-
auto ArchStr = parseTargetID(
3472-
*getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), IdStr,
3473-
&Features);
3467+
auto T =
3468+
(IdStr == "amdgcnspirv")
3469+
? llvm::Triple("spirv64-amd-amdhsa")
3470+
: *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs());
3471+
auto ArchStr = parseTargetID(T, IdStr, &Features);
34743472
if (!ArchStr) {
34753473
C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr;
34763474
C.setContainsError();
@@ -5992,7 +5990,7 @@ InputInfoList Driver::BuildJobsForActionNoCache(
59925990
// We only have to generate a prefix for the host if this is not a top-level
59935991
// action.
59945992
std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
5995-
A->getOffloadingDeviceKind(), TC->getTriple().normalize(),
5993+
A->getOffloadingDeviceKind(), EffectiveTriple.normalize(),
59965994
/*CreatePrefixForHost=*/isa<OffloadPackagerJobAction>(A) ||
59975995
!(A->getOffloadingHostActiveKinds() == Action::OFK_None ||
59985996
AtTopLevel));

clang/lib/Driver/ToolChain.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,6 +1054,12 @@ std::string ToolChain::ComputeLLVMTriple(const ArgList &Args,
10541054
}
10551055
case llvm::Triple::aarch64_32:
10561056
return getTripleString();
1057+
case llvm::Triple::amdgcn: {
1058+
llvm::Triple Triple = getTriple();
1059+
if (Args.getLastArgValue(options::OPT_mcpu_EQ) == "amdgcnspirv")
1060+
Triple.setArch(llvm::Triple::ArchType::spirv64);
1061+
return Triple.getTriple();
1062+
}
10571063
case llvm::Triple::arm:
10581064
case llvm::Triple::armeb:
10591065
case llvm::Triple::thumb:

0 commit comments

Comments
 (0)