Skip to content

Commit 41ed16c

Browse files
authored
Reapply "[AMDGPU] Infer amdgpu-no-flat-scratch-init attribute in AMDGPUAttributor (#94647)" (#118907)
This reverts commit 1ef9410. This fixes the test file attributor-flatscratchinit-globalisel.ll.
1 parent e9c68c6 commit 41ed16c

32 files changed

+1402
-170
lines changed

clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ __global__ void kernel4(struct S s) {
432432
// CHECK-SPIRV-NEXT: ret void
433433
//
434434
// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S(
435-
// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
435+
// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
436436
// OPT-NEXT: [[ENTRY:.*:]]
437437
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(1) [[S_COERCE]], align 8
438438
// OPT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4

llvm/lib/Target/AMDGPU/AMDGPUAttributes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,6 @@ AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z")
3030
AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id")
3131
AMDGPU_ATTRIBUTE(DEFAULT_QUEUE, "amdgpu-no-default-queue")
3232
AMDGPU_ATTRIBUTE(COMPLETION_ACTION, "amdgpu-no-completion-action")
33+
AMDGPU_ATTRIBUTE(FLAT_SCRATCH_INIT, "amdgpu-no-flat-scratch-init")
3334

3435
#undef AMDGPU_ATTRIBUTE

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,18 @@ class AMDGPUInformationCache : public InformationCache {
265265
return !HasAperture && (Access & ADDR_SPACE_CAST);
266266
}
267267

268+
bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
269+
SmallPtrSet<const Constant *, 8> Visited;
270+
uint8_t Access = getConstantAccess(C, Visited);
271+
272+
if (Access & ADDR_SPACE_CAST)
273+
if (const auto *CE = dyn_cast<ConstantExpr>(C))
274+
if (CE->getOperand(0)->getType()->getPointerAddressSpace() ==
275+
AMDGPUAS::PRIVATE_ADDRESS)
276+
return true;
277+
return false;
278+
}
279+
268280
private:
269281
/// Used to determine if the Constant needs the queue pointer.
270282
DenseMap<const Constant *, uint8_t> ConstantStatus;
@@ -529,6 +541,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
529541
if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
530542
removeAssumedBits(COMPLETION_ACTION);
531543

544+
if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
545+
removeAssumedBits(FLAT_SCRATCH_INIT);
546+
532547
return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
533548
: ChangeStatus::UNCHANGED;
534549
}
@@ -687,6 +702,65 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
687702
return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
688703
UsedAssumedInformation);
689704
}
705+
706+
// Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
707+
// not to be set.
708+
bool needFlatScratchInit(Attributor &A) {
709+
assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
710+
711+
// Check all AddrSpaceCast instructions. FlatScratchInit is needed if
712+
// there is a cast from PRIVATE_ADDRESS.
713+
auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
714+
return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
715+
AMDGPUAS::PRIVATE_ADDRESS;
716+
};
717+
718+
bool UsedAssumedInformation = false;
719+
if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
720+
{Instruction::AddrSpaceCast},
721+
UsedAssumedInformation))
722+
return true;
723+
724+
// Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
725+
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
726+
727+
Function *F = getAssociatedFunction();
728+
for (Instruction &I : instructions(F)) {
729+
for (const Use &U : I.operands()) {
730+
if (const auto *C = dyn_cast<Constant>(U)) {
731+
if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
732+
return true;
733+
}
734+
}
735+
}
736+
737+
// Finally check callees.
738+
739+
// This is called on each callee; false means callee shouldn't have
740+
// no-flat-scratch-init.
741+
auto CheckForNoFlatScratchInit = [&](Instruction &I) {
742+
const auto &CB = cast<CallBase>(I);
743+
const Function *Callee = CB.getCalledFunction();
744+
745+
// Callee == 0 for inline asm or indirect call with known callees.
746+
// In the latter case, updateImpl() already checked the callees and we
747+
// know their FLAT_SCRATCH_INIT bit is set.
748+
// If function has indirect call with unknown callees, the bit is
749+
// already removed in updateImpl() and execution won't reach here.
750+
if (!Callee)
751+
return true;
752+
753+
return Callee->getIntrinsicID() !=
754+
Intrinsic::amdgcn_addrspacecast_nonnull;
755+
};
756+
757+
UsedAssumedInformation = false;
758+
// If any callee is false (i.e. need FlatScratchInit),
759+
// checkForAllCallLikeInstructions returns false, in which case this
760+
// function returns true.
761+
return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
762+
UsedAssumedInformation);
763+
}
690764
};
691765

692766
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,

llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,9 @@ attributes #1 = { nounwind }
233233
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
234234
;.
235235
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
236-
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
237-
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
238-
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
236+
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
237+
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
238+
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
239239
;.
240240
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
241241
;.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ define amdgpu_kernel void @kernel_calls_extern() {
116116
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
117117
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
118118
; CHECK-SAME: ) #[[ATTR4]] {
119-
; CHECK-NEXT: call void @unknown() #[[ATTR9:[0-9]+]]
119+
; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
120120
; CHECK-NEXT: ret void
121121
;
122122
call void @unknown() #0
@@ -136,7 +136,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
136136
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
137137
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
138138
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] {
139-
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR9]]
139+
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
140140
; CHECK-NEXT: ret void
141141
;
142142
call void %indirect() #0
@@ -254,14 +254,14 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
254254

255255
attributes #0 = { "amdgpu-no-agpr" }
256256
;.
257-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
258-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
259-
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
257+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
258+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
259+
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
260260
; CHECK: attributes #[[ATTR3:[0-9]+]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
261261
; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
262-
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
262+
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
263263
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
264-
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
265-
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
266-
; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" }
264+
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
265+
; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
266+
; CHECK: attributes #[[ATTR10]] = { "amdgpu-no-agpr" }
267267
;.

0 commit comments

Comments
 (0)