Skip to content

Commit ce38128

Browse files
committed
[RISCV] Avoid redundant branch-to-branch when expanding cmpxchg
If the success value of a cmpxchg is used in a branch, the expanded cmpxchg sequence ends up with a redundant branch-to-branch (as the backend atomics expansion happens as late as possible, passes to optimise such cases have already run). This patch identifies this case and avoid it when expanding the cmpxchg. Note that a similar optimisation is possible for a BEQ on the cmpxchg success value. As it's hard to imagine a case where real-world code may do that, this patch doens't handle that case. Differential Revision: https://reviews.llvm.org/D130192
1 parent cc8c746 commit ce38128

File tree

2 files changed

+77
-22
lines changed

2 files changed

+77
-22
lines changed

llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,63 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
508508
return true;
509509
}
510510

511+
// If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
512+
// operation, it can be folded into the cmpxchg expansion by
513+
// modifying the branch within 'LoopHead' (which performs the same
514+
// comparison). This is a valid transformation because after altering the
515+
// LoopHead's BNE destination, the BNE following the cmpxchg becomes
516+
// redundant and and be deleted. In the case of a masked cmpxchg, an
517+
// appropriate AND and BNE must be matched.
518+
//
519+
// On success, returns true and deletes the matching BNE or AND+BNE, sets the
520+
// LoopHeadBNETarget argument to the target that should be used within the
521+
// loop head, and removes that block as a successor to MBB.
522+
bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,
523+
MachineBasicBlock::iterator MBBI,
524+
Register DestReg, Register CmpValReg,
525+
Register MaskReg,
526+
MachineBasicBlock *&LoopHeadBNETarget) {
527+
SmallVector<MachineInstr *> ToErase;
528+
auto E = MBB.end();
529+
if (MBBI == E)
530+
return false;
531+
MBBI = skipDebugInstructionsForward(MBBI, E);
532+
533+
// If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.
534+
if (MaskReg.isValid()) {
535+
if (MBBI == E || MBBI->getOpcode() != RISCV::AND)
536+
return false;
537+
Register ANDOp1 = MBBI->getOperand(1).getReg();
538+
Register ANDOp2 = MBBI->getOperand(2).getReg();
539+
if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&
540+
!(ANDOp1 == MaskReg && ANDOp2 == DestReg))
541+
return false;
542+
// We now expect the BNE to use the result of the AND as an operand.
543+
DestReg = MBBI->getOperand(0).getReg();
544+
ToErase.push_back(&*MBBI);
545+
MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
546+
}
547+
548+
// Match BNE DestReg, MaskReg.
549+
if (MBBI == E || MBBI->getOpcode() != RISCV::BNE)
550+
return false;
551+
Register BNEOp0 = MBBI->getOperand(0).getReg();
552+
Register BNEOp1 = MBBI->getOperand(1).getReg();
553+
if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
554+
!(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
555+
return false;
556+
ToErase.push_back(&*MBBI);
557+
LoopHeadBNETarget = MBBI->getOperand(2).getMBB();
558+
MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
559+
if (MBBI != E)
560+
return false;
561+
562+
MBB.removeSuccessor(LoopHeadBNETarget);
563+
for (auto *MI : ToErase)
564+
MI->eraseFromParent();
565+
return true;
566+
}
567+
511568
bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
512569
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
513570
int Width, MachineBasicBlock::iterator &NextMBBI) {
@@ -518,25 +575,31 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
518575
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
519576
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
520577

578+
Register DestReg = MI.getOperand(0).getReg();
579+
Register ScratchReg = MI.getOperand(1).getReg();
580+
Register AddrReg = MI.getOperand(2).getReg();
581+
Register CmpValReg = MI.getOperand(3).getReg();
582+
Register NewValReg = MI.getOperand(4).getReg();
583+
Register MaskReg = IsMasked ? MI.getOperand(5).getReg() : Register();
584+
585+
MachineBasicBlock *LoopHeadBNETarget = DoneMBB;
586+
tryToFoldBNEOnCmpXchgResult(MBB, std::next(MBBI), DestReg, CmpValReg, MaskReg,
587+
LoopHeadBNETarget);
588+
521589
// Insert new MBBs.
522590
MF->insert(++MBB.getIterator(), LoopHeadMBB);
523591
MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
524592
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
525593

526594
// Set up successors and transfer remaining instructions to DoneMBB.
527595
LoopHeadMBB->addSuccessor(LoopTailMBB);
528-
LoopHeadMBB->addSuccessor(DoneMBB);
596+
LoopHeadMBB->addSuccessor(LoopHeadBNETarget);
529597
LoopTailMBB->addSuccessor(DoneMBB);
530598
LoopTailMBB->addSuccessor(LoopHeadMBB);
531599
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
532600
DoneMBB->transferSuccessors(&MBB);
533601
MBB.addSuccessor(LoopHeadMBB);
534602

535-
Register DestReg = MI.getOperand(0).getReg();
536-
Register ScratchReg = MI.getOperand(1).getReg();
537-
Register AddrReg = MI.getOperand(2).getReg();
538-
Register CmpValReg = MI.getOperand(3).getReg();
539-
Register NewValReg = MI.getOperand(4).getReg();
540603
AtomicOrdering Ordering =
541604
static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
542605

@@ -549,7 +612,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
549612
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
550613
.addReg(DestReg)
551614
.addReg(CmpValReg)
552-
.addMBB(DoneMBB);
615+
.addMBB(LoopHeadBNETarget);
553616
// .looptail:
554617
// sc.[w|d] scratch, newval, (addr)
555618
// bnez scratch, loophead
@@ -574,7 +637,7 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
574637
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
575638
.addReg(ScratchReg)
576639
.addReg(CmpValReg)
577-
.addMBB(DoneMBB);
640+
.addMBB(LoopHeadBNETarget);
578641

579642
// .looptail:
580643
// xor scratch, dest, newval

llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,12 @@ define void @cmpxchg_and_branch1(i32* %ptr, i32 signext %cmp, i32 signext %val)
1717
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
1818
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
1919
; CHECK-NEXT: lr.w.aqrl a3, (a0)
20-
; CHECK-NEXT: bne a3, a1, .LBB0_5
20+
; CHECK-NEXT: bne a3, a1, .LBB0_1
2121
; CHECK-NEXT: # %bb.4: # %do_cmpxchg
2222
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=2
2323
; CHECK-NEXT: sc.w.aqrl a4, a2, (a0)
2424
; CHECK-NEXT: bnez a4, .LBB0_3
25-
; CHECK-NEXT: .LBB0_5: # %do_cmpxchg
26-
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
27-
; CHECK-NEXT: bne a3, a1, .LBB0_1
25+
; CHECK-NEXT: # %bb.5: # %do_cmpxchg
2826
; CHECK-NEXT: # %bb.2: # %exit
2927
; CHECK-NEXT: ret
3028
entry:
@@ -86,18 +84,15 @@ define void @cmpxchg_masked_and_branch1(i8* %ptr, i8 signext %cmp, i8 signext %v
8684
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
8785
; RV32IA-NEXT: lr.w.aqrl a4, (a3)
8886
; RV32IA-NEXT: and a5, a4, a0
89-
; RV32IA-NEXT: bne a5, a1, .LBB2_5
87+
; RV32IA-NEXT: bne a5, a1, .LBB2_1
9088
; RV32IA-NEXT: # %bb.4: # %do_cmpxchg
9189
; RV32IA-NEXT: # in Loop: Header=BB2_3 Depth=2
9290
; RV32IA-NEXT: xor a5, a4, a2
9391
; RV32IA-NEXT: and a5, a5, a0
9492
; RV32IA-NEXT: xor a5, a4, a5
9593
; RV32IA-NEXT: sc.w.aqrl a5, a5, (a3)
9694
; RV32IA-NEXT: bnez a5, .LBB2_3
97-
; RV32IA-NEXT: .LBB2_5: # %do_cmpxchg
98-
; RV32IA-NEXT: # in Loop: Header=BB2_1 Depth=1
99-
; RV32IA-NEXT: and a4, a4, a0
100-
; RV32IA-NEXT: bne a1, a4, .LBB2_1
95+
; RV32IA-NEXT: # %bb.5: # %do_cmpxchg
10196
; RV32IA-NEXT: # %bb.2: # %exit
10297
; RV32IA-NEXT: ret
10398
;
@@ -119,18 +114,15 @@ define void @cmpxchg_masked_and_branch1(i8* %ptr, i8 signext %cmp, i8 signext %v
119114
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
120115
; RV64IA-NEXT: lr.w.aqrl a4, (a3)
121116
; RV64IA-NEXT: and a5, a4, a0
122-
; RV64IA-NEXT: bne a5, a1, .LBB2_5
117+
; RV64IA-NEXT: bne a5, a1, .LBB2_1
123118
; RV64IA-NEXT: # %bb.4: # %do_cmpxchg
124119
; RV64IA-NEXT: # in Loop: Header=BB2_3 Depth=2
125120
; RV64IA-NEXT: xor a5, a4, a2
126121
; RV64IA-NEXT: and a5, a5, a0
127122
; RV64IA-NEXT: xor a5, a4, a5
128123
; RV64IA-NEXT: sc.w.aqrl a5, a5, (a3)
129124
; RV64IA-NEXT: bnez a5, .LBB2_3
130-
; RV64IA-NEXT: .LBB2_5: # %do_cmpxchg
131-
; RV64IA-NEXT: # in Loop: Header=BB2_1 Depth=1
132-
; RV64IA-NEXT: and a4, a4, a0
133-
; RV64IA-NEXT: bne a1, a4, .LBB2_1
125+
; RV64IA-NEXT: # %bb.5: # %do_cmpxchg
134126
; RV64IA-NEXT: # %bb.2: # %exit
135127
; RV64IA-NEXT: ret
136128
entry:

0 commit comments

Comments
 (0)