@@ -508,6 +508,63 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
508
508
return true ;
509
509
}
510
510
511
+ // If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
512
+ // operation, it can be folded into the cmpxchg expansion by
513
+ // modifying the branch within 'LoopHead' (which performs the same
514
+ // comparison). This is a valid transformation because after altering the
515
+ // LoopHead's BNE destination, the BNE following the cmpxchg becomes
516
+ // redundant and and be deleted. In the case of a masked cmpxchg, an
517
+ // appropriate AND and BNE must be matched.
518
+ //
519
+ // On success, returns true and deletes the matching BNE or AND+BNE, sets the
520
+ // LoopHeadBNETarget argument to the target that should be used within the
521
+ // loop head, and removes that block as a successor to MBB.
522
+ bool tryToFoldBNEOnCmpXchgResult (MachineBasicBlock &MBB,
523
+ MachineBasicBlock::iterator MBBI,
524
+ Register DestReg, Register CmpValReg,
525
+ Register MaskReg,
526
+ MachineBasicBlock *&LoopHeadBNETarget) {
527
+ SmallVector<MachineInstr *> ToErase;
528
+ auto E = MBB.end ();
529
+ if (MBBI == E)
530
+ return false ;
531
+ MBBI = skipDebugInstructionsForward (MBBI, E);
532
+
533
+ // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.
534
+ if (MaskReg.isValid ()) {
535
+ if (MBBI == E || MBBI->getOpcode () != RISCV::AND)
536
+ return false ;
537
+ Register ANDOp1 = MBBI->getOperand (1 ).getReg ();
538
+ Register ANDOp2 = MBBI->getOperand (2 ).getReg ();
539
+ if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&
540
+ !(ANDOp1 == MaskReg && ANDOp2 == DestReg))
541
+ return false ;
542
+ // We now expect the BNE to use the result of the AND as an operand.
543
+ DestReg = MBBI->getOperand (0 ).getReg ();
544
+ ToErase.push_back (&*MBBI);
545
+ MBBI = skipDebugInstructionsForward (std::next (MBBI), E);
546
+ }
547
+
548
+ // Match BNE DestReg, MaskReg.
549
+ if (MBBI == E || MBBI->getOpcode () != RISCV::BNE)
550
+ return false ;
551
+ Register BNEOp0 = MBBI->getOperand (0 ).getReg ();
552
+ Register BNEOp1 = MBBI->getOperand (1 ).getReg ();
553
+ if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
554
+ !(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
555
+ return false ;
556
+ ToErase.push_back (&*MBBI);
557
+ LoopHeadBNETarget = MBBI->getOperand (2 ).getMBB ();
558
+ MBBI = skipDebugInstructionsForward (std::next (MBBI), E);
559
+ if (MBBI != E)
560
+ return false ;
561
+
562
+ MBB.removeSuccessor (LoopHeadBNETarget);
563
+ for (auto *MI : ToErase)
564
+ MI->eraseFromParent ();
565
+ return true ;
566
+ }
567
+
511
568
bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg (
512
569
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
513
570
int Width, MachineBasicBlock::iterator &NextMBBI) {
@@ -518,25 +575,31 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
518
575
auto LoopTailMBB = MF->CreateMachineBasicBlock (MBB.getBasicBlock ());
519
576
auto DoneMBB = MF->CreateMachineBasicBlock (MBB.getBasicBlock ());
520
577
578
+ Register DestReg = MI.getOperand (0 ).getReg ();
579
+ Register ScratchReg = MI.getOperand (1 ).getReg ();
580
+ Register AddrReg = MI.getOperand (2 ).getReg ();
581
+ Register CmpValReg = MI.getOperand (3 ).getReg ();
582
+ Register NewValReg = MI.getOperand (4 ).getReg ();
583
+ Register MaskReg = IsMasked ? MI.getOperand (5 ).getReg () : Register ();
584
+
585
+ MachineBasicBlock *LoopHeadBNETarget = DoneMBB;
586
+ tryToFoldBNEOnCmpXchgResult (MBB, std::next (MBBI), DestReg, CmpValReg, MaskReg,
587
+ LoopHeadBNETarget);
588
+
521
589
// Insert new MBBs.
522
590
MF->insert (++MBB.getIterator (), LoopHeadMBB);
523
591
MF->insert (++LoopHeadMBB->getIterator (), LoopTailMBB);
524
592
MF->insert (++LoopTailMBB->getIterator (), DoneMBB);
525
593
526
594
// Set up successors and transfer remaining instructions to DoneMBB.
527
595
LoopHeadMBB->addSuccessor (LoopTailMBB);
528
- LoopHeadMBB->addSuccessor (DoneMBB );
596
+ LoopHeadMBB->addSuccessor (LoopHeadBNETarget );
529
597
LoopTailMBB->addSuccessor (DoneMBB);
530
598
LoopTailMBB->addSuccessor (LoopHeadMBB);
531
599
DoneMBB->splice (DoneMBB->end (), &MBB, MI, MBB.end ());
532
600
DoneMBB->transferSuccessors (&MBB);
533
601
MBB.addSuccessor (LoopHeadMBB);
534
602
535
- Register DestReg = MI.getOperand (0 ).getReg ();
536
- Register ScratchReg = MI.getOperand (1 ).getReg ();
537
- Register AddrReg = MI.getOperand (2 ).getReg ();
538
- Register CmpValReg = MI.getOperand (3 ).getReg ();
539
- Register NewValReg = MI.getOperand (4 ).getReg ();
540
603
AtomicOrdering Ordering =
541
604
static_cast <AtomicOrdering>(MI.getOperand (IsMasked ? 6 : 5 ).getImm ());
542
605
@@ -549,7 +612,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
549
612
BuildMI (LoopHeadMBB, DL, TII->get (RISCV::BNE))
550
613
.addReg (DestReg)
551
614
.addReg (CmpValReg)
552
- .addMBB (DoneMBB );
615
+ .addMBB (LoopHeadBNETarget );
553
616
// .looptail:
554
617
// sc.[w|d] scratch, newval, (addr)
555
618
// bnez scratch, loophead
@@ -574,7 +637,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
574
637
BuildMI (LoopHeadMBB, DL, TII->get (RISCV::BNE))
575
638
.addReg (ScratchReg)
576
639
.addReg (CmpValReg)
577
- .addMBB (DoneMBB );
640
+ .addMBB (LoopHeadBNETarget );
578
641
579
642
// .looptail:
580
643
// xor scratch, dest, newval
0 commit comments