@@ -260,6 +260,18 @@ class AMDGPUInformationCache : public InformationCache {
260
260
return !HasAperture && (Access & ADDR_SPACE_CAST);
261
261
}
262
262
263
+ bool checkConstForAddrSpaceCastFromPrivate (const Constant *C) {
264
+ SmallPtrSet<const Constant *, 8 > Visited;
265
+ uint8_t Access = getConstantAccess (C, Visited);
266
+
267
+ if (Access & ADDR_SPACE_CAST)
268
+ if (const auto *CE = dyn_cast<ConstantExpr>(C))
269
+ if (CE->getOperand (0 )->getType ()->getPointerAddressSpace () ==
270
+ AMDGPUAS::PRIVATE_ADDRESS)
271
+ return true ;
272
+ return false ;
273
+ }
274
+
263
275
private:
264
276
// / Used to determine if the Constant needs the queue pointer.
265
277
DenseMap<const Constant *, uint8_t > ConstantStatus;
@@ -524,6 +536,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
524
536
if (isAssumed (COMPLETION_ACTION) && funcRetrievesCompletionAction (A, COV))
525
537
removeAssumedBits (COMPLETION_ACTION);
526
538
539
+ if (isAssumed (FLAT_SCRATCH_INIT) && needFlatScratchInit (A))
540
+ removeAssumedBits (FLAT_SCRATCH_INIT);
541
+
527
542
return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED
528
543
: ChangeStatus::UNCHANGED;
529
544
}
@@ -682,6 +697,65 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
682
697
return !A.checkForAllCallLikeInstructions (DoesNotRetrieve, *this ,
683
698
UsedAssumedInformation);
684
699
}
700
+
701
+ // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
702
+ // not to be set.
703
+ bool needFlatScratchInit (Attributor &A) {
704
+ assert (isAssumed (FLAT_SCRATCH_INIT)); // only called if the bit is still set
705
+
706
+ // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
707
+ // there is a cast from PRIVATE_ADDRESS.
708
+ auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
709
+ return cast<AddrSpaceCastInst>(I).getSrcAddressSpace () !=
710
+ AMDGPUAS::PRIVATE_ADDRESS;
711
+ };
712
+
713
+ bool UsedAssumedInformation = false ;
714
+ if (!A.checkForAllInstructions (AddrSpaceCastNotFromPrivate, *this ,
715
+ {Instruction::AddrSpaceCast},
716
+ UsedAssumedInformation))
717
+ return true ;
718
+
719
+ // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
720
+ auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
721
+
722
+ Function *F = getAssociatedFunction ();
723
+ for (Instruction &I : instructions (F)) {
724
+ for (const Use &U : I.operands ()) {
725
+ if (const auto *C = dyn_cast<Constant>(U)) {
726
+ if (InfoCache.checkConstForAddrSpaceCastFromPrivate (C))
727
+ return true ;
728
+ }
729
+ }
730
+ }
731
+
732
+ // Finally check callees.
733
+
734
+ // This is called on each callee; false means callee shouldn't have
735
+ // no-flat-scratch-init.
736
+ auto CheckForNoFlatScratchInit = [&](Instruction &I) {
737
+ const auto &CB = cast<CallBase>(I);
738
+ const Function *Callee = CB.getCalledFunction ();
739
+
740
+ // Callee == 0 for inline asm or indirect call with known callees.
741
+ // In the latter case, updateImpl() already checked the callees and we
742
+ // know their FLAT_SCRATCH_INIT bit is set.
743
+ // If function has indirect call with unknown callees, the bit is
744
+ // already removed in updateImpl() and execution won't reach here.
745
+ if (!Callee)
746
+ return true ;
747
+
748
+ return Callee->getIntrinsicID () !=
749
+ Intrinsic::amdgcn_addrspacecast_nonnull;
750
+ };
751
+
752
+ UsedAssumedInformation = false ;
753
+ // If any callee is false (i.e. need FlatScratchInit),
754
+ // checkForAllCallLikeInstructions returns false, in which case this
755
+ // function returns true.
756
+ return !A.checkForAllCallLikeInstructions (CheckForNoFlatScratchInit, *this ,
757
+ UsedAssumedInformation);
758
+ }
685
759
};
686
760
687
761
AAAMDAttributes &AAAMDAttributes::createForPosition (const IRPosition &IRP,
0 commit comments