-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[clang][SPIR-V] Add support for AMDGCN flavoured SPIRV #89796
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
662f160
393ce66
2a10ad0
98db8f7
c359e0a
e98f3f5
c41726d
4698b58
aa1cd7c
f9729ef
8257cb1
900cd69
3307f17
eee6063
d2f4244
4cb4026
84a621d
1841385
0ce2da3
120b73c
f3942bd
31ac77d
0e9b1a1
83cd5e0
e9158b0
05074e7
e1fb93f
36c4bf6
5ffa186
cf1880c
4d85a1b
516e14c
bdc3eb5
361d47b
b088c72
e85b557
1d41787
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,8 @@ | |
|
||
#include "SPIR.h" | ||
#include "Targets.h" | ||
#include "clang/Basic/Builtins.h" | ||
#include "clang/Basic/TargetBuiltins.h" | ||
|
||
using namespace clang; | ||
using namespace clang::targets; | ||
|
@@ -54,3 +56,289 @@ void SPIRV64TargetInfo::getTargetDefines(const LangOptions &Opts, | |
BaseSPIRVTargetInfo::getTargetDefines(Opts, Builder); | ||
DefineStd(Builder, "SPIRV64", Opts); | ||
} | ||
|
||
static constexpr Builtin::Info BuiltinInfo[] = { | ||
#define BUILTIN(ID, TYPE, ATTRS) \ | ||
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, | ||
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ | ||
{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, | ||
#include "clang/Basic/BuiltinsAMDGPU.def" | ||
}; | ||
|
||
namespace { | ||
const char *AMDGPUGCCRegNames[] = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we could create a common base class for amdgpu target info and spirv64 amdgcn target info and not duplicate stuff. we can define a static member for reg names. |
||
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", | ||
"v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", | ||
"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", | ||
"v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", | ||
"v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", | ||
"v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", | ||
"v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", | ||
"v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", | ||
"v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", | ||
"v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", | ||
"v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", | ||
"v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", | ||
"v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", | ||
"v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", | ||
"v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", | ||
"v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", | ||
"v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", | ||
"v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", | ||
"v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", | ||
"v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", | ||
"v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", | ||
"v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", | ||
"v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", | ||
"v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", | ||
"v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", | ||
"v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", | ||
"v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", | ||
"v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", | ||
"v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4", | ||
"s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", | ||
"s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", | ||
"s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", | ||
"s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40", | ||
"s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", | ||
"s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", | ||
"s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", | ||
"s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76", | ||
"s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85", | ||
"s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94", | ||
"s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103", | ||
"s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112", | ||
"s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121", | ||
"s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc", | ||
"m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi", | ||
"flat_scratch_lo", "flat_scratch_hi", | ||
"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", | ||
"a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", | ||
"a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", | ||
"a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", | ||
"a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", | ||
"a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", | ||
"a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", | ||
"a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", | ||
"a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", | ||
"a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", | ||
"a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", | ||
"a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", | ||
"a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", | ||
"a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", | ||
"a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", | ||
"a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", | ||
"a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", | ||
"a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", | ||
"a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", | ||
"a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", | ||
"a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", | ||
"a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", | ||
"a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", | ||
"a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", | ||
"a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", | ||
"a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", | ||
"a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", | ||
"a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", | ||
"a252", "a253", "a254", "a255" | ||
}; | ||
|
||
} // anonymous namespace | ||
|
||
ArrayRef<const char *> SPIRV64AMDGCNTargetInfo::getGCCRegNames() const { | ||
return llvm::ArrayRef(AMDGPUGCCRegNames); | ||
} | ||
|
||
bool SPIRV64AMDGCNTargetInfo::initFeatureMap( | ||
llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef, | ||
const std::vector<std::string> &FeatureVec) const { | ||
// This represents the union of all AMDGCN features. | ||
Features["atomic-ds-pk-add-16-insts"] = true; | ||
Features["atomic-flat-pk-add-16-insts"] = true; | ||
Features["atomic-buffer-global-pk-add-f16-insts"] = true; | ||
Features["atomic-global-pk-add-bf16-inst"] = true; | ||
Features["atomic-fadd-rtn-insts"] = true; | ||
Features["ci-insts"] = true; | ||
Features["dot1-insts"] = true; | ||
Features["dot2-insts"] = true; | ||
Features["dot3-insts"] = true; | ||
Features["dot4-insts"] = true; | ||
Features["dot5-insts"] = true; | ||
Features["dot7-insts"] = true; | ||
Features["dot8-insts"] = true; | ||
Features["dot9-insts"] = true; | ||
Features["dot10-insts"] = true; | ||
Features["dot11-insts"] = true; | ||
Features["dl-insts"] = true; | ||
Features["16-bit-insts"] = true; | ||
Features["dpp"] = true; | ||
Features["gfx8-insts"] = true; | ||
Features["gfx9-insts"] = true; | ||
Features["gfx90a-insts"] = true; | ||
Features["gfx940-insts"] = true; | ||
Features["gfx10-insts"] = true; | ||
Features["gfx10-3-insts"] = true; | ||
Features["gfx11-insts"] = true; | ||
Features["gfx12-insts"] = true; | ||
Features["image-insts"] = true; | ||
Features["fp8-conversion-insts"] = true; | ||
Features["s-memrealtime"] = true; | ||
Features["s-memtime-inst"] = true; | ||
Features["gws"] = true; | ||
Features["fp8-insts"] = true; | ||
Features["fp8-conversion-insts"] = true; | ||
Features["atomic-ds-pk-add-16-insts"] = true; | ||
Features["mai-insts"] = true; | ||
|
||
return TargetInfo::initFeatureMap(Features, Diags, {}, FeatureVec); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not want to maintain 2 copies of either of these things. Can we just move this to a common place? I thought we already moved the Features gunk into lib/TargetSupport There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see a common place for the register names, the current design assumes they're a private static detail of a target - suggestions welcome though, I agree the duplication is pretty terrible. As for the features, if you're thinking about extending and then re-using |
||
|
||
bool SPIRV64AMDGCNTargetInfo::validateAsmConstraint( | ||
const char *&Name, TargetInfo::ConstraintInfo &Info) const { | ||
// This is a 1:1 copy of AMDGPUTargetInfo::validateAsmConstraint() | ||
static const ::llvm::StringSet<> SpecialRegs({ | ||
"exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma", | ||
"flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo", | ||
"exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi", | ||
}); | ||
|
||
switch (*Name) { | ||
case 'I': | ||
Info.setRequiresImmediate(-16, 64); | ||
return true; | ||
case 'J': | ||
Info.setRequiresImmediate(-32768, 32767); | ||
return true; | ||
case 'A': | ||
case 'B': | ||
case 'C': | ||
Info.setRequiresImmediate(); | ||
return true; | ||
default: | ||
break; | ||
} | ||
|
||
StringRef S(Name); | ||
|
||
if (S == "DA" || S == "DB") { | ||
Name++; | ||
Info.setRequiresImmediate(); | ||
return true; | ||
} | ||
|
||
bool HasLeftParen = S.consume_front("{"); | ||
if (S.empty()) | ||
return false; | ||
if (S.front() != 'v' && S.front() != 's' && S.front() != 'a') { | ||
if (!HasLeftParen) | ||
return false; | ||
auto E = S.find('}'); | ||
if (!SpecialRegs.count(S.substr(0, E))) | ||
return false; | ||
S = S.drop_front(E + 1); | ||
if (!S.empty()) | ||
return false; | ||
// Found {S} where S is a special register. | ||
Info.setAllowsRegister(); | ||
Name = S.data() - 1; | ||
return true; | ||
} | ||
S = S.drop_front(); | ||
if (!HasLeftParen) { | ||
if (!S.empty()) | ||
return false; | ||
// Found s, v or a. | ||
Info.setAllowsRegister(); | ||
Name = S.data() - 1; | ||
return true; | ||
} | ||
bool HasLeftBracket = S.consume_front("["); | ||
unsigned long long N; | ||
if (S.empty() || consumeUnsignedInteger(S, 10, N)) | ||
return false; | ||
if (S.consume_front(":")) { | ||
if (!HasLeftBracket) | ||
return false; | ||
unsigned long long M; | ||
if (consumeUnsignedInteger(S, 10, M) || N >= M) | ||
return false; | ||
} | ||
if (HasLeftBracket) { | ||
if (!S.consume_front("]")) | ||
return false; | ||
} | ||
if (!S.consume_front("}")) | ||
return false; | ||
if (!S.empty()) | ||
return false; | ||
// Found {vn}, {sn}, {an}, {v[n]}, {s[n]}, {a[n]}, {v[n:m]}, {s[n:m]} | ||
// or {a[n:m]}. | ||
Info.setAllowsRegister(); | ||
Name = S.data() - 1; | ||
return true; | ||
} | ||
|
||
std::string | ||
SPIRV64AMDGCNTargetInfo::convertConstraint(const char *&Constraint) const { | ||
// This is a 1:1 copy of AMDGPUTargetInfo::convertConstraint() | ||
StringRef S(Constraint); | ||
if (S == "DA" || S == "DB") { | ||
return std::string("^") + std::string(Constraint++, 2); | ||
} | ||
|
||
const char *Begin = Constraint; | ||
TargetInfo::ConstraintInfo Info("", ""); | ||
if (validateAsmConstraint(Constraint, Info)) | ||
return std::string(Begin).substr(0, Constraint - Begin + 1); | ||
|
||
Constraint = Begin; | ||
return std::string(1, *Constraint); | ||
} | ||
|
||
ArrayRef<Builtin::Info> SPIRV64AMDGCNTargetInfo::getTargetBuiltins() const { | ||
return llvm::ArrayRef(BuiltinInfo, | ||
clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin); | ||
} | ||
|
||
void SPIRV64AMDGCNTargetInfo::getTargetDefines(const LangOptions &Opts, | ||
MacroBuilder &Builder) const { | ||
BaseSPIRVTargetInfo::getTargetDefines(Opts, Builder); | ||
DefineStd(Builder, "SPIRV64", Opts); | ||
|
||
Builder.defineMacro("__AMD__"); | ||
Builder.defineMacro("__AMDGPU__"); | ||
Builder.defineMacro("__AMDGCN__"); | ||
Comment on lines
+93
to
+95
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these defined on both the host and device? I remember having a quite annoying time with these macros because HIP was defining stuff like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've always thought defining those for both targets was obtuse, since those should act like architecture macros, i.e. (X86). But considering this is how it's done already, I suppose it's a necessary evil. |
||
} | ||
|
||
void SPIRV64AMDGCNTargetInfo::setAuxTarget(const TargetInfo *Aux) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I can tell, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is a reasonable assumption. Since HIP/CUDA needs to make sure type consistency between host and device, normal compilation by clang driver always specify aux-target for device compilation. |
||
// This is a 1:1 copy of AMDGPUTargetInfo::setAuxTarget() | ||
assert(HalfFormat == Aux->HalfFormat); | ||
assert(FloatFormat == Aux->FloatFormat); | ||
assert(DoubleFormat == Aux->DoubleFormat); | ||
|
||
// On x86_64 long double is 80-bit extended precision format, which is | ||
// not supported by AMDGPU. 128-bit floating point format is also not | ||
// supported by AMDGPU. Therefore keep its own format for these two types. | ||
auto SaveLongDoubleFormat = LongDoubleFormat; | ||
auto SaveFloat128Format = Float128Format; | ||
auto SaveLongDoubleWidth = LongDoubleWidth; | ||
auto SaveLongDoubleAlign = LongDoubleAlign; | ||
copyAuxTarget(Aux); | ||
LongDoubleFormat = SaveLongDoubleFormat; | ||
Float128Format = SaveFloat128Format; | ||
LongDoubleWidth = SaveLongDoubleWidth; | ||
LongDoubleAlign = SaveLongDoubleAlign; | ||
// For certain builtin types support on the host target, claim they are | ||
// supported to pass the compilation of the host code during the device-side | ||
// compilation. | ||
// FIXME: As the side effect, we also accept `__float128` uses in the device | ||
// code. To reject these builtin types supported in the host target but not in | ||
// the device target, one approach would support `device_builtin` attribute | ||
// so that we could tell the device builtin types from the host ones. This | ||
// also solves the different representations of the same builtin type, such | ||
// as `size_t` in the MSVC environment. | ||
if (Aux->hasFloat128Type()) { | ||
HasFloat128 = true; | ||
Float128Format = DoubleFormat; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6083,6 +6083,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, | |
StringRef Prefix = | ||
llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); | ||
if (!Prefix.empty()) { | ||
if (Prefix == "spv" && | ||
getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA) | ||
Prefix = "amdgcn"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this required for? I'm wondering why we'd need to reset the prefix here instead of updating the logic somewhere else. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't quite see any other point to tweak the logic. We have some There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I was just wondering if it would make more sense to put this in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we can, with the current signature of that interface / the current approach of keeping There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we want to allow this choice to be target-specific, we can just make the "IntrinsicPrefix" a property of the TargetInfo, instead of trying to derive it directly from the target triple. The default constructor would derive it from the triple, but specific targets could override the field. I'm more concerned this is going to prevent us from using builtins which shoud be usable; do we need to getIntrinsicForClangBuiltin() for both "spv" and "amdgcn"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The second bit is actually a good catch, I had assumed there's no SPIRV builtins, however looking at it it seems a few exist, so it appears we'll need both. I'll rework this part, cheers! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've fixed the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can look at it as a followup, sure; the code in its current form seems okay for now. |
||
IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name); | ||
// NOTE we don't need to perform a compatibility flag check here since the | ||
// intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the | ||
|
@@ -6254,6 +6257,10 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, | |
case llvm::Triple::riscv32: | ||
case llvm::Triple::riscv64: | ||
return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); | ||
case llvm::Triple::spirv64: | ||
if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA) | ||
return nullptr; | ||
return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); | ||
default: | ||
return nullptr; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.