@@ -98,6 +98,7 @@ class SIMemOpInfo final {
98
98
bool IsCrossAddressSpaceOrdering = false ;
99
99
bool IsVolatile = false ;
100
100
bool IsNonTemporal = false ;
101
+ bool IsLastUse = false ;
101
102
102
103
SIMemOpInfo (AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
103
104
SIAtomicScope Scope = SIAtomicScope::SYSTEM,
@@ -107,13 +108,15 @@ class SIMemOpInfo final {
107
108
AtomicOrdering FailureOrdering =
108
109
AtomicOrdering::SequentiallyConsistent,
109
110
bool IsVolatile = false ,
110
- bool IsNonTemporal = false )
111
+ bool IsNonTemporal = false ,
112
+ bool IsLastUse = false )
111
113
: Ordering(Ordering), FailureOrdering(FailureOrdering),
112
114
Scope (Scope), OrderingAddrSpace(OrderingAddrSpace),
113
115
InstrAddrSpace(InstrAddrSpace),
114
116
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
115
117
IsVolatile(IsVolatile),
116
- IsNonTemporal(IsNonTemporal) {
118
+ IsNonTemporal(IsNonTemporal),
119
+ IsLastUse(IsLastUse) {
117
120
118
121
if (Ordering == AtomicOrdering::NotAtomic) {
119
122
assert (Scope == SIAtomicScope::NONE &&
@@ -201,6 +204,12 @@ class SIMemOpInfo final {
201
204
return IsNonTemporal;
202
205
}
203
206
207
+ // / \returns True if memory access of the machine instruction used to
208
+ // / create this SIMemOpInfo is last use, false otherwise.
209
+ bool isLastUse () const {
210
+ return IsLastUse;
211
+ }
212
+
204
213
// / \returns True if ordering constraint of the machine instruction used to
205
214
// / create this SIMemOpInfo is unordered or higher, false otherwise.
206
215
bool isAtomic () const {
@@ -316,6 +325,12 @@ class SICacheControl {
316
325
return false ;
317
326
};
318
327
328
+ // / Update \p MI memory instruction to indicate it is a last use. Return true
329
+ // / iff the instruction was modified.
330
+ virtual bool enableLastUse (MachineInstr &MI, bool IsLastUse) const {
331
+ return false ;
332
+ }
333
+
319
334
// / Inserts any necessary instructions at position \p Pos relative
320
335
// / to instruction \p MI to ensure memory instructions before \p Pos of kind
321
336
// / \p Op associated with address spaces \p AddrSpace have completed. Used
@@ -592,6 +607,10 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
592
607
// MI. \returns Returns true if \p MI is modified, false otherwise.
593
608
bool setScope (const MachineBasicBlock::iterator MI,
594
609
AMDGPU::CPol::CPol Value) const ;
610
+ // Checks if CPol operand is present in instruction \p MI and if current Scope
611
+ // policy is same as \p Value.
612
+ bool isScope (const MachineBasicBlock::iterator MI,
613
+ AMDGPU::CPol::CPol Value) const ;
595
614
596
615
// Stores with system scope (SCOPE_SYS) need to wait for:
597
616
// - loads or atomics(returning) - wait for {LOAD|SAMPLE|BVH|KM}CNT==0
@@ -618,6 +637,9 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
618
637
bool IsNonTemporal) const override ;
619
638
620
639
bool expandSystemScopeStore (MachineBasicBlock::iterator &MI) const override ;
640
+
641
+ bool enableLastUse (MachineInstr &MI,
642
+ bool IsLastUse) const override ;
621
643
};
622
644
623
645
class SIMemoryLegalizer final : public MachineFunctionPass {
@@ -745,12 +767,14 @@ std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
745
767
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
746
768
bool IsNonTemporal = true ;
747
769
bool IsVolatile = false ;
770
+ bool IsLastUse = false ;
748
771
749
772
// Validator should check whether or not MMOs cover the entire set of
750
773
// locations accessed by the memory instruction.
751
774
for (const auto &MMO : MI->memoperands ()) {
752
775
IsNonTemporal &= MMO->isNonTemporal ();
753
776
IsVolatile |= MMO->isVolatile ();
777
+ IsLastUse |= MMO->getFlags () & MOLastUse;
754
778
InstrAddrSpace |=
755
779
toSIAtomicAddrSpace (MMO->getPointerInfo ().getAddrSpace ());
756
780
AtomicOrdering OpOrdering = MMO->getSuccessOrdering ();
@@ -792,7 +816,7 @@ std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
792
816
}
793
817
return SIMemOpInfo (Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
794
818
IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
795
- IsNonTemporal);
819
+ IsNonTemporal, IsLastUse );
796
820
}
797
821
798
822
std::optional<SIMemOpInfo>
@@ -2209,6 +2233,15 @@ bool SIGfx12CacheControl::setScope(const MachineBasicBlock::iterator MI,
2209
2233
return false ;
2210
2234
}
2211
2235
2236
+ bool SIGfx12CacheControl::isScope (const MachineBasicBlock::iterator MI,
2237
+ AMDGPU::CPol::CPol Value) const {
2238
+ MachineOperand *CPol = TII->getNamedOperand (*MI, OpName::cpol);
2239
+ if (!CPol)
2240
+ return false ;
2241
+
2242
+ return (CPol->getImm () & AMDGPU::CPol::SCOPE) == Value;
2243
+ }
2244
+
2212
2245
bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore (
2213
2246
const MachineBasicBlock::iterator MI) const {
2214
2247
// TODO: implement flag for frontend to give us a hint not to insert waits.
@@ -2415,6 +2448,16 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2415
2448
return Changed;
2416
2449
}
2417
2450
2451
+ bool SIGfx12CacheControl::enableLastUse (MachineInstr &MI,
2452
+ bool IsLastUse) const {
2453
+ assert (MI.mayLoad () && !MI.mayStore ());
2454
+
2455
+ if (IsLastUse && !isScope (MI, AMDGPU::CPol::SCOPE_SYS))
2456
+ return setTH (MI, AMDGPU::CPol::TH_LU);;
2457
+
2458
+ return false ;
2459
+ }
2460
+
2418
2461
bool SIGfx12CacheControl::expandSystemScopeStore (
2419
2462
MachineBasicBlock::iterator &MI) const {
2420
2463
MachineOperand *CPol = TII->getNamedOperand (*MI, OpName::cpol);
@@ -2471,12 +2514,19 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
2471
2514
return Changed;
2472
2515
}
2473
2516
2517
+ // enableVolatileAndOrNonTemporal can insert instructions and advance iterator
2518
+ // MI and we need original instruction for enabling last use.
2519
+ MachineInstr &Inst = *MI;
2520
+
2474
2521
// Atomic instructions already bypass caches to the scope specified by the
2475
2522
// SyncScope operand. Only non-atomic volatile and nontemporal instructions
2476
2523
// need additional treatment.
2477
2524
Changed |= CC->enableVolatileAndOrNonTemporal (MI, MOI.getInstrAddrSpace (),
2478
2525
SIMemOp::LOAD, MOI.isVolatile (),
2479
2526
MOI.isNonTemporal ());
2527
+
2528
+ Changed |= CC->enableLastUse (Inst, MOI.isLastUse ());
2529
+
2480
2530
return Changed;
2481
2531
}
2482
2532
0 commit comments