Skip to content

[TailDup] Allow large number of predecessors/successors without phis. #116072

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions llvm/lib/CodeGen/TailDuplicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,14 +573,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (TailBB.isSuccessor(&TailBB))
return false;

// Duplicating a BB which has both multiple predecessors and successors will
// result in a complex CFG and also may cause huge amount of PHI nodes. If we
// want to remove this limitation, we have to address
// https://github.com/llvm/llvm-project/issues/78578.
if (TailBB.pred_size() > TailDupPredSize &&
TailBB.succ_size() > TailDupSuccSize)
return false;

// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
Expand Down Expand Up @@ -618,6 +610,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
unsigned InstrCount = 0;
unsigned NumPhis = 0;
for (MachineInstr &MI : TailBB) {
// Non-duplicable things shouldn't be tail-duplicated.
// CFI instructions are marked as non-duplicable, because Darwin compact
Expand Down Expand Up @@ -661,6 +654,20 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,

if (InstrCount > MaxDuplicateCount)
return false;
NumPhis += MI.isPHI();
}

// Duplicating a BB which has both multiple predecessors and successors will
// may cause huge amount of PHI nodes. If we want to remove this limitation,
// we have to address https://github.com/llvm/llvm-project/issues/78578.
if (TailBB.pred_size() > TailDupPredSize &&
TailBB.succ_size() > TailDupSuccSize) {
// If TailBB or any of its successors contains a phi, we may have to add a
// large number of additional phis with additional incoming values.
if (NumPhis != 0 || any_of(TailBB.successors(), [](MachineBasicBlock *MBB) {
return any_of(*MBB, [](MachineInstr &MI) { return MI.isPHI(); });
}))
return false;
}

// Check if any of the successors of TailBB has a PHI node in which the
Expand Down
60 changes: 32 additions & 28 deletions llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir
Original file line number Diff line number Diff line change
Expand Up @@ -538,43 +538,47 @@ body: |
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.2:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.3:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.4:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.5:
; LIMIT-NEXT: successors: %bb.7(0x80000000)
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
; LIMIT-NEXT: JMP_1 %bb.7
; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.6:
; LIMIT-NEXT: successors:
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.7:
; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags
; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit
; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.9:
; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[MOV32rm4]] :: (store (s32))
Expand All @@ -583,23 +587,23 @@ body: |
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.10:
; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri5]] :: (store (s32))
; LIMIT-NEXT: $eax = COPY [[SHR32ri5]]
; LIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri8]] :: (store (s32))
; LIMIT-NEXT: $eax = COPY [[SHR32ri8]]
; LIMIT-NEXT: RET 0, $eax
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.11:
; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri6]] :: (store (s32))
; LIMIT-NEXT: $eax = COPY [[SHR32ri6]]
; LIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri9]] :: (store (s32))
; LIMIT-NEXT: $eax = COPY [[SHR32ri9]]
; LIMIT-NEXT: RET 0, $eax
; LIMIT-NEXT: {{ $}}
; LIMIT-NEXT: bb.12:
; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri7]] :: (store (s32))
; LIMIT-NEXT: $eax = COPY [[SHR32ri7]]
; LIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
; LIMIT-NEXT: MOV32mr [[COPY1]], 1, $noreg, 0, $noreg, [[SHR32ri10]] :: (store (s32))
; LIMIT-NEXT: $eax = COPY [[SHR32ri10]]
; LIMIT-NEXT: RET 0, $eax
;
; NOLIMIT-LABEL: name: foo_no_phis
Expand Down
Loading