Skip to content

Commit 4e743f8

Browse files
committed
Reapply "[AMDGPU] Infer amdgpu-no-flat-scratch-init attribute in AMDGPUAttributor (#94647)"
This reverts commit 1ef9410. This fixes the test file attributor-flatscratchinit-globalisel.ll.
1 parent 794afe0 commit 4e743f8

32 files changed

+1402
-170
lines changed

clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ __global__ void kernel4(struct S s) {
432432
// CHECK-SPIRV-NEXT: ret void
433433
//
434434
// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S(
435-
// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
435+
// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
436436
// OPT-NEXT: [[ENTRY:.*:]]
437437
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(1) [[S_COERCE]], align 8
438438
// OPT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4

llvm/lib/Target/AMDGPU/AMDGPUAttributes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,6 @@ AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z")
3030
AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id")
3131
AMDGPU_ATTRIBUTE(DEFAULT_QUEUE, "amdgpu-no-default-queue")
3232
AMDGPU_ATTRIBUTE(COMPLETION_ACTION, "amdgpu-no-completion-action")
33+
AMDGPU_ATTRIBUTE(FLAT_SCRATCH_INIT, "amdgpu-no-flat-scratch-init")
3334

3435
#undef AMDGPU_ATTRIBUTE

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,18 @@ class AMDGPUInformationCache : public InformationCache {
260260
return !HasAperture && (Access & ADDR_SPACE_CAST);
261261
}
262262

263+
bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
264+
SmallPtrSet<const Constant *, 8> Visited;
265+
uint8_t Access = getConstantAccess(C, Visited);
266+
267+
if (Access & ADDR_SPACE_CAST)
268+
if (const auto *CE = dyn_cast<ConstantExpr>(C))
269+
if (CE->getOperand(0)->getType()->getPointerAddressSpace() ==
270+
AMDGPUAS::PRIVATE_ADDRESS)
271+
return true;
272+
return false;
273+
}
274+
263275
private:
264276
/// Used to determine if the Constant needs the queue pointer.
265277
DenseMap<const Constant *, uint8_t> ConstantStatus;
@@ -524,6 +536,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
524536
if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
525537
removeAssumedBits(COMPLETION_ACTION);
526538

539+
if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
540+
removeAssumedBits(FLAT_SCRATCH_INIT);
541+
527542
return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
528543
: ChangeStatus::UNCHANGED;
529544
}
@@ -682,6 +697,65 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
682697
return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
683698
UsedAssumedInformation);
684699
}
700+
701+
// Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
702+
// not to be set.
703+
bool needFlatScratchInit(Attributor &A) {
704+
assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
705+
706+
// Check all AddrSpaceCast instructions. FlatScratchInit is needed if
707+
// there is a cast from PRIVATE_ADDRESS.
708+
auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
709+
return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
710+
AMDGPUAS::PRIVATE_ADDRESS;
711+
};
712+
713+
bool UsedAssumedInformation = false;
714+
if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
715+
{Instruction::AddrSpaceCast},
716+
UsedAssumedInformation))
717+
return true;
718+
719+
// Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
720+
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
721+
722+
Function *F = getAssociatedFunction();
723+
for (Instruction &I : instructions(F)) {
724+
for (const Use &U : I.operands()) {
725+
if (const auto *C = dyn_cast<Constant>(U)) {
726+
if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
727+
return true;
728+
}
729+
}
730+
}
731+
732+
// Finally check callees.
733+
734+
// This is called on each callee; false means callee shouldn't have
735+
// no-flat-scratch-init.
736+
auto CheckForNoFlatScratchInit = [&](Instruction &I) {
737+
const auto &CB = cast<CallBase>(I);
738+
const Function *Callee = CB.getCalledFunction();
739+
740+
// Callee == 0 for inline asm or indirect call with known callees.
741+
// In the latter case, updateImpl() already checked the callees and we
742+
// know their FLAT_SCRATCH_INIT bit is set.
743+
// If function has indirect call with unknown callees, the bit is
744+
// already removed in updateImpl() and execution won't reach here.
745+
if (!Callee)
746+
return true;
747+
748+
return Callee->getIntrinsicID() !=
749+
Intrinsic::amdgcn_addrspacecast_nonnull;
750+
};
751+
752+
UsedAssumedInformation = false;
753+
// If any callee is false (i.e. need FlatScratchInit),
754+
// checkForAllCallLikeInstructions returns false, in which case this
755+
// function returns true.
756+
return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
757+
UsedAssumedInformation);
758+
}
685759
};
686760

687761
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,

llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,9 @@ attributes #1 = { nounwind }
233233
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
234234
;.
235235
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
236-
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
237-
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
238-
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
236+
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
237+
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
238+
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
239239
;.
240240
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
241241
;.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ define amdgpu_kernel void @kernel_calls_extern() {
116116
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
117117
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
118118
; CHECK-SAME: ) #[[ATTR4]] {
119-
; CHECK-NEXT: call void @unknown() #[[ATTR9:[0-9]+]]
119+
; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
120120
; CHECK-NEXT: ret void
121121
;
122122
call void @unknown() #0
@@ -136,7 +136,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
136136
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
137137
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
138138
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR4]] {
139-
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR9]]
139+
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
140140
; CHECK-NEXT: ret void
141141
;
142142
call void %indirect() #0
@@ -254,14 +254,14 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
254254

255255
attributes #0 = { "amdgpu-no-agpr" }
256256
;.
257-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
258-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
259-
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
257+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
258+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
259+
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
260260
; CHECK: attributes #[[ATTR3:[0-9]+]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
261261
; CHECK: attributes #[[ATTR4]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
262-
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
262+
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
263263
; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
264-
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
265-
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
266-
; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" }
264+
; CHECK: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
265+
; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
266+
; CHECK: attributes #[[ATTR10]] = { "amdgpu-no-agpr" }
267267
;.

0 commit comments

Comments
 (0)