Skip to content

Commit 34c4d8e

Browse files
committed
[AMDGPU][Attributor] Rework calculation of waves per eu
1 parent b7efaf3 commit 34c4d8e

32 files changed

+383
-351
lines changed

clang/test/CodeGenOpenCL/builtins-amdgcn.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,4 +899,5 @@ void test_set_fpenv(unsigned long env) {
899899

900900
// CHECK-DAG: [[$GRID_RANGE]] = !{i32 1, i32 0}
901901
// CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
902-
// CHECK-DAG: attributes #[[$NOUNWIND_READONLY]] = { convergent mustprogress nocallback nofree nounwind willreturn memory(none) }
902+
// CHECK-SPIRV-DAG: attributes #[[$NOUNWIND_READONLY]] = { convergent mustprogress nocallback nofree nounwind willreturn memory(none) }
903+
// CHECK-AMDGCN-DAG: attributes #[[$NOUNWIND_READONLY]] = { convergent mustprogress nocallback nofree nounwind willreturn memory(none) "amdgpu-waves-per-eu"="4,10" }

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 82 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,47 +1108,25 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11081108
Function *F = getAssociatedFunction();
11091109
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
11101110

1111-
auto TakeRange = [&](std::pair<unsigned, unsigned> R) {
1112-
auto [Min, Max] = R;
1113-
ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1114-
IntegerRangeState RangeState(Range);
1115-
clampStateAndIndicateChange(this->getState(), RangeState);
1116-
indicateOptimisticFixpoint();
1117-
};
1118-
1119-
std::pair<unsigned, unsigned> MaxWavesPerEURange{
1120-
1U, InfoCache.getMaxWavesPerEU(*F)};
1121-
11221111
// If the attribute exists, we will honor it if it is not the default.
11231112
if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1113+
std::pair<unsigned, unsigned> MaxWavesPerEURange{
1114+
1U, InfoCache.getMaxWavesPerEU(*F)};
11241115
if (*Attr != MaxWavesPerEURange) {
1125-
TakeRange(*Attr);
1116+
auto [Min, Max] = *Attr;
1117+
ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1118+
IntegerRangeState RangeState(Range);
1119+
this->getState() = RangeState;
1120+
indicateOptimisticFixpoint();
11261121
return;
11271122
}
11281123
}
11291124

1130-
// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the
1131-
// calculation of waves per EU involves flat work group size, we can't
1132-
// simply use an assumed flat work group size as a start point, because the
1133-
// update of flat work group size is in an inverse direction of waves per
1134-
// EU. However, we can still do something if it is an entry function. Since
1135-
// an entry function is a terminal node, and flat work group size either
1136-
// from attribute or default will be used anyway, we can take that value and
1137-
// calculate the waves per EU based on it. This result can't be updated by
1138-
// no means, but that could still allow us to propagate it.
1139-
if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1140-
std::pair<unsigned, unsigned> FlatWorkGroupSize;
1141-
if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F))
1142-
FlatWorkGroupSize = *Attr;
1143-
else
1144-
FlatWorkGroupSize = InfoCache.getDefaultFlatWorkGroupSize(*F);
1145-
TakeRange(InfoCache.getEffectiveWavesPerEU(*F, MaxWavesPerEURange,
1146-
FlatWorkGroupSize));
1147-
}
1125+
if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1126+
indicatePessimisticFixpoint();
11481127
}
11491128

11501129
ChangeStatus updateImpl(Attributor &A) override {
1151-
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
11521130
ChangeStatus Change = ChangeStatus::UNCHANGED;
11531131

11541132
auto CheckCallSite = [&](AbstractCallSite CS) {
@@ -1157,24 +1135,21 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11571135
LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
11581136
<< "->" << Func->getName() << '\n');
11591137

1160-
const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(
1138+
const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
11611139
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1162-
const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
1163-
*this, IRPosition::function(*Func), DepClassTy::REQUIRED);
1164-
if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() ||
1165-
!AssumedGroupSize->isValidState())
1140+
if (!CallerAA || !CallerAA->isValidState())
11661141
return false;
11671142

1168-
unsigned Min, Max;
1169-
std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
1170-
*Caller,
1171-
{CallerInfo->getAssumed().getLower().getZExtValue(),
1172-
CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
1173-
{AssumedGroupSize->getAssumed().getLower().getZExtValue(),
1174-
AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
1175-
ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
1176-
IntegerRangeState CallerRangeState(CallerRange);
1177-
Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
1143+
auto Assumed = this->getAssumed();
1144+
unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1145+
CallerAA->getAssumed().getLower().getZExtValue());
1146+
unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1147+
CallerAA->getAssumed().getUpper().getZExtValue());
1148+
ConstantRange Range(APInt(32, Min), APInt(32, Max));
1149+
IntegerRangeState RangeState(Range);
1150+
this->getState() = RangeState;
1151+
Change |= this->getState() == Assumed ? ChangeStatus::UNCHANGED
1152+
: ChangeStatus::CHANGED;
11781153

11791154
return true;
11801155
};
@@ -1333,6 +1308,59 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
13331308
}
13341309
}
13351310

1311+
static void checkWavesPerEU(Module &M, TargetMachine &TM) {
1312+
for (Function &F : M) {
1313+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
1314+
1315+
auto FlatWgrpSizeAttr =
1316+
AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
1317+
auto WavesPerEUAttr = AMDGPU::getIntegerPairAttribute(
1318+
F, "amdgpu-waves-per-eu", /*OnlyFirstRequired=*/true);
1319+
1320+
unsigned MinWavesPerEU = ST.getMinWavesPerEU();
1321+
unsigned MaxWavesPerEU = ST.getMaxWavesPerEU();
1322+
1323+
unsigned MinFlatWgrpSize = 1U;
1324+
unsigned MaxFlatWgrpSize = 1024U;
1325+
if (FlatWgrpSizeAttr.has_value()) {
1326+
MinFlatWgrpSize = FlatWgrpSizeAttr->first;
1327+
MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second);
1328+
}
1329+
1330+
// Start with the max range.
1331+
unsigned Min = MinWavesPerEU;
1332+
unsigned Max = MaxWavesPerEU;
1333+
1334+
// If the attribute exists, set them to the value from the attribute.
1335+
if (WavesPerEUAttr.has_value()) {
1336+
Min = WavesPerEUAttr->first;
1337+
if (WavesPerEUAttr->second.has_value())
1338+
Max = *(WavesPerEUAttr->second);
1339+
}
1340+
1341+
// Compute the range from flat workgroup size.
1342+
auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
1343+
ST.getWavesPerEU(F, std::make_pair(MinFlatWgrpSize, MaxFlatWgrpSize));
1344+
1345+
// For the lower bound, we have to "tighten" it.
1346+
Min = std::max(Min, MinFromFlatWgrpSize);
1347+
// For the upper bound, we have to "extend" it.
1348+
Max = std::max(Max, MaxFromFlatWgrpSize);
1349+
1350+
// Clamp the range to the max range.
1351+
Min = std::max(Min, MinWavesPerEU);
1352+
Max = std::min(Max, MaxWavesPerEU);
1353+
1354+
// Update the attribute if it is not the max.
1355+
if (Min != MinWavesPerEU || Max != MaxWavesPerEU) {
1356+
SmallString<10> Buffer;
1357+
raw_svector_ostream OS(Buffer);
1358+
OS << Min << ',' << Max;
1359+
F.addFnAttr("amdgpu-waves-per-eu", OS.str());
1360+
}
1361+
}
1362+
}
1363+
13361364
static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
13371365
AMDGPUAttributorOptions Options,
13381366
ThinOrFullLTOPhase LTOPhase) {
@@ -1408,8 +1436,14 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
14081436
}
14091437
}
14101438

1411-
ChangeStatus Change = A.run();
1412-
return Change == ChangeStatus::CHANGED;
1439+
bool Changed = A.run() == ChangeStatus::CHANGED;
1440+
1441+
if (Changed && (LTOPhase == ThinOrFullLTOPhase::None ||
1442+
LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
1443+
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink))
1444+
checkWavesPerEU(M, TM);
1445+
1446+
return Changed;
14131447
}
14141448

14151449
class AMDGPUAttributorLegacy : public ModulePass {

llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,17 @@
44

55
declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i32, i1) #0
66

7-
@lds.i32 = unnamed_addr addrspace(3) global i32 poison, align 4
8-
@lds.arr = unnamed_addr addrspace(3) global [256 x i32] poison, align 4
7+
@lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
8+
@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
99

10-
@global.i32 = unnamed_addr addrspace(1) global i32 poison, align 4
11-
@global.arr = unnamed_addr addrspace(1) global [256 x i32] poison, align 4
10+
@global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4
11+
@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
1212

1313
;.
14-
; HSA: @lds.i32 = unnamed_addr addrspace(3) global i32 poison, align 4
15-
; HSA: @lds.arr = unnamed_addr addrspace(3) global [256 x i32] poison, align 4
16-
; HSA: @global.i32 = unnamed_addr addrspace(1) global i32 poison, align 4
17-
; HSA: @global.arr = unnamed_addr addrspace(1) global [256 x i32] poison, align 4
14+
; HSA: @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
15+
; HSA: @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
16+
; HSA: @global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4
17+
; HSA: @global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
1818
;.
1919
define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
2020
; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast
@@ -232,9 +232,9 @@ attributes #1 = { nounwind }
232232
; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
233233
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
234234
;.
235-
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
236-
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
237-
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
235+
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "amdgpu-waves-per-eu"="4,10" }
236+
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
237+
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
238238
;.
239239
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
240240
;.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -252,13 +252,13 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
252252
}
253253

254254

255-
attributes #0 = { "amdgpu-agpr-alloc"="0" }
255+
attributes #0 = { "amdgpu-no-agpr" }
256256
;.
257-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
258-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
259-
; CHECK: attributes #[[ATTR2]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
260-
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
261-
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
262-
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
263-
; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" }
257+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
258+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
259+
; CHECK: attributes #[[ATTR2]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
260+
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" }
261+
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" }
262+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" }
263+
; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-agpr" }
264264
;.

llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,14 @@ define void @call_no_dispatch_id() {
117117
ret void
118118
}
119119
;.
120-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
121-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
122-
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
123-
; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "uniform-work-group-size"="false" }
124-
; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "uniform-work-group-size"="false" }
125-
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "uniform-work-group-size"="false" }
126-
; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "uniform-work-group-size"="false" }
127-
; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "uniform-work-group-size"="false" }
128-
; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
129-
; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
120+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
121+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
122+
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
123+
; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
124+
; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
125+
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
126+
; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
127+
; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
128+
; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
129+
; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
130130
;.

0 commit comments

Comments
 (0)