@@ -265,6 +265,18 @@ class AMDGPUInformationCache : public InformationCache {
265
265
return !HasAperture && (Access & ADDR_SPACE_CAST);
266
266
}
267
267
268
+ bool checkConstForAddrSpaceCastFromPrivate (const Constant *C) {
269
+ SmallPtrSet<const Constant *, 8 > Visited;
270
+ uint8_t Access = getConstantAccess (C, Visited);
271
+
272
+ if (Access & ADDR_SPACE_CAST)
273
+ if (const auto *CE = dyn_cast<ConstantExpr>(C))
274
+ if (CE->getOperand (0 )->getType ()->getPointerAddressSpace () ==
275
+ AMDGPUAS::PRIVATE_ADDRESS)
276
+ return true ;
277
+ return false ;
278
+ }
279
+
268
280
private:
269
281
// / Used to determine if the Constant needs the queue pointer.
270
282
DenseMap<const Constant *, uint8_t > ConstantStatus;
@@ -529,6 +541,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
529
541
if (isAssumed (COMPLETION_ACTION) && funcRetrievesCompletionAction (A, COV))
530
542
removeAssumedBits (COMPLETION_ACTION);
531
543
544
+ if (isAssumed (FLAT_SCRATCH_INIT) && needFlatScratchInit (A))
545
+ removeAssumedBits (FLAT_SCRATCH_INIT);
546
+
532
547
return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED
533
548
: ChangeStatus::UNCHANGED;
534
549
}
@@ -687,6 +702,65 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
687
702
return !A.checkForAllCallLikeInstructions (DoesNotRetrieve, *this ,
688
703
UsedAssumedInformation);
689
704
}
705
+
706
+ // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
707
+ // not to be set.
708
+ bool needFlatScratchInit (Attributor &A) {
709
+ assert (isAssumed (FLAT_SCRATCH_INIT)); // only called if the bit is still set
710
+
711
+ // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
712
+ // there is a cast from PRIVATE_ADDRESS.
713
+ auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
714
+ return cast<AddrSpaceCastInst>(I).getSrcAddressSpace () !=
715
+ AMDGPUAS::PRIVATE_ADDRESS;
716
+ };
717
+
718
+ bool UsedAssumedInformation = false ;
719
+ if (!A.checkForAllInstructions (AddrSpaceCastNotFromPrivate, *this ,
720
+ {Instruction::AddrSpaceCast},
721
+ UsedAssumedInformation))
722
+ return true ;
723
+
724
+ // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
725
+ auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
726
+
727
+ Function *F = getAssociatedFunction ();
728
+ for (Instruction &I : instructions (F)) {
729
+ for (const Use &U : I.operands ()) {
730
+ if (const auto *C = dyn_cast<Constant>(U)) {
731
+ if (InfoCache.checkConstForAddrSpaceCastFromPrivate (C))
732
+ return true ;
733
+ }
734
+ }
735
+ }
736
+
737
+ // Finally check callees.
738
+
739
+ // This is called on each callee; false means callee shouldn't have
740
+ // no-flat-scratch-init.
741
+ auto CheckForNoFlatScratchInit = [&](Instruction &I) {
742
+ const auto &CB = cast<CallBase>(I);
743
+ const Function *Callee = CB.getCalledFunction ();
744
+
745
+ // Callee == 0 for inline asm or indirect call with known callees.
746
+ // In the latter case, updateImpl() already checked the callees and we
747
+ // know their FLAT_SCRATCH_INIT bit is set.
748
+ // If function has indirect call with unknown callees, the bit is
749
+ // already removed in updateImpl() and execution won't reach here.
750
+ if (!Callee)
751
+ return true ;
752
+
753
+ return Callee->getIntrinsicID () !=
754
+ Intrinsic::amdgcn_addrspacecast_nonnull;
755
+ };
756
+
757
+ UsedAssumedInformation = false ;
758
+ // If any callee is false (i.e. need FlatScratchInit),
759
+ // checkForAllCallLikeInstructions returns false, in which case this
760
+ // function returns true.
761
+ return !A.checkForAllCallLikeInstructions (CheckForNoFlatScratchInit, *this ,
762
+ UsedAssumedInformation);
763
+ }
690
764
};
691
765
692
766
AAAMDAttributes &AAAMDAttributes::createForPosition (const IRPosition &IRP,
0 commit comments