Skip to content

RegisterCoalescer: Fix implicit operand handling during rematerialize #75271

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions llvm/lib/CodeGen/RegisterCoalescer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
<< printMBBReference(MBB) << '\t' << CopyMI);
}

const bool IsUndefCopy = CopyMI.getOperand(1).isUndef();

// Remove CopyMI.
// Note: This is fine to remove the copy before updating the live-ranges.
// While updating the live-ranges, we only look at slot indices and
Expand All @@ -1214,6 +1216,19 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
&EndPoints);
BValNo->markUnused();

if (IsUndefCopy) {
// We're introducing an undef phi def, and need to set undef on any users of
// the previously local def to avoid artifically extending the lifetime
// through the block.
for (MachineOperand &MO : MRI->use_nodbg_operands(IntB.reg())) {
const MachineInstr &MI = *MO.getParent();
SlotIndex UseIdx = LIS->getInstructionIndex(MI);
if (!IntB.liveAt(UseIdx))
MO.setIsUndef(true);
}
}

// Extend IntB to the EndPoints of its original live interval.
LIS->extendToIndices(IntB, EndPoints);

Expand Down Expand Up @@ -1596,8 +1611,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}

if (NewMI.getOperand(0).getSubReg())
NewMI.getOperand(0).setIsUndef();
NewMI.setRegisterDefReadUndef(NewMI.getOperand(0).getReg());

// Transfer over implicit operands to the rematerialized instruction.
for (MachineOperand &MO : ImplicitOps)
Expand Down
131 changes: 131 additions & 0 deletions llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=x86_64-pc-linux-gnu -verify-coalescing < %s | FileCheck %s

%"class.llvm::APInt." = type <{ %union.anon., i32, [4 x i8] }>
%union.anon. = type { i64 }

define void @_ZNK4llvm5APInt21multiplicativeInverseERKS0_(ptr %r) {
; CHECK-LABEL: _ZNK4llvm5APInt21multiplicativeInverseERKS0_:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: jmp .LBB0_1
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_4: # %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl %edx, %edx
; CHECK-NEXT: shlq $4, %rdx
; CHECK-NEXT: movl $0, (%rdi,%rdx)
; CHECK-NEXT: movl %ecx, %edx
; CHECK-NEXT: .LBB0_1: # %bb
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB0_3 Depth 2
; CHECK-NEXT: xorl $1, %ecx
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: movq %rcx, %r8
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_4
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_3: # %for.body.i.i.i.i.i.3
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: orq $1, %r8
; CHECK-NEXT: orq $1, %rsi
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: jmp .LBB0_4
entry:
br label %bb

bb: ; preds = %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, %entry
%i.0 = phi i32 [ 0, %entry ], [ %xor, %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i ]
%xor = xor i32 %i.0, 1
%idxprom = zext nneg i32 %xor to i64
br label %for.body.i.i.i.i.i

for.body.i.i.i.i.i: ; preds = %for.body.i.i.i.i.i.3, %bb
%lsr.iv37 = phi i64 [ %lsr.iv.next38, %for.body.i.i.i.i.i.3 ], [ 0, %bb ]
%lsr.iv = phi i64 [ %lsr.iv.next, %for.body.i.i.i.i.i.3 ], [ %idxprom, %bb ]
%exitcond.not.i.i.i.i.i.2 = icmp eq i64 0, 1
br i1 %exitcond.not.i.i.i.i.i.2, label %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, label %for.body.i.i.i.i.i.3

for.body.i.i.i.i.i.3: ; preds = %for.body.i.i.i.i.i
%sunkaddr55 = mul i64 %lsr.iv37, 0
%i = xor i64 %lsr.iv, 0
%lsr.iv.next = or i64 %lsr.iv, 1
%lsr.iv.next38 = or i64 %lsr.iv37, 1
br label %for.body.i.i.i.i.i

_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i: ; preds = %for.body.i.i.i.i.i
%idxprom12 = zext nneg i32 %i.0 to i64
%arrayidx13 = getelementptr [2 x %"class.llvm::APInt."], ptr %r, i64 0, i64 %idxprom12
store i32 0, ptr %arrayidx13, align 4
br label %bb
}

; This variant hit an assert and never reached the verifier error
define void @_ZNK4llvm5APInt21multiplicativeInverseERKS0__assert(ptr %r) {
; CHECK-LABEL: _ZNK4llvm5APInt21multiplicativeInverseERKS0__assert:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: jmp .LBB1_1
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_4: # %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i
; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: movl %edx, %edx
; CHECK-NEXT: shlq $4, %rdx
; CHECK-NEXT: movl $0, (%rdi,%rdx)
; CHECK-NEXT: movl %ecx, %edx
; CHECK-NEXT: .LBB1_1: # %bb
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB1_3 Depth 2
; CHECK-NEXT: xorl $1, %ecx
; CHECK-NEXT: movq %rcx, %rsi
; CHECK-NEXT: shlq $4, %rsi
; CHECK-NEXT: movq (%rsi), %rsi
; CHECK-NEXT: xorl %r8d, %r8d
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB1_4
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_3: # %for.body.i.i.i.i.i.3
; CHECK-NEXT: # Parent Loop BB1_1 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: orq $1, %rsi
; CHECK-NEXT: orq $1, %r8
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB1_3
; CHECK-NEXT: jmp .LBB1_4
entry:
br label %bb

bb: ; preds = %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, %entry
%i.0 = phi i32 [ 0, %entry ], [ %xor, %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i ]
%xor = xor i32 %i.0, 1
%idxprom = zext nneg i32 %xor to i64
%arrayidx = getelementptr [2 x %"class.llvm::APInt."], ptr null, i64 0, i64 %idxprom
%i.i.i.i.i.i = load ptr, ptr %arrayidx, align 16
%i.i.i.i.i.i36 = ptrtoint ptr %i.i.i.i.i.i to i64
br label %for.body.i.i.i.i.i

for.body.i.i.i.i.i: ; preds = %for.body.i.i.i.i.i.3, %bb
%lsr.iv37 = phi i64 [ %lsr.iv.next38, %for.body.i.i.i.i.i.3 ], [ 0, %bb ]
%lsr.iv = phi i64 [ %lsr.iv.next, %for.body.i.i.i.i.i.3 ], [ %i.i.i.i.i.i36, %bb ]
%exitcond.not.i.i.i.i.i.2 = icmp eq i64 0, 1
br i1 %exitcond.not.i.i.i.i.i.2, label %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, label %for.body.i.i.i.i.i.3

for.body.i.i.i.i.i.3: ; preds = %for.body.i.i.i.i.i
%sunkaddr55 = mul i64 %lsr.iv37, 0
%i = xor i64 %lsr.iv, 0
%lsr.iv.next = or i64 %lsr.iv, 1
%lsr.iv.next38 = or i64 %lsr.iv37, 1
br label %for.body.i.i.i.i.i

_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i: ; preds = %for.body.i.i.i.i.i
%idxprom12 = zext nneg i32 %i.0 to i64
%arrayidx13 = getelementptr [2 x %"class.llvm::APInt."], ptr %r, i64 0, i64 %idxprom12
store i32 0, ptr %arrayidx13, align 4
br label %bb
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=x86_64-pc-linux-gnu -run-pass=register-coalescer -verify-coalescing -o - %s | FileCheck %s

# Check for "Live range continues after dead def flag".

# There are 2 copies of undef, but the registers also appear to be
# live due to block live outs, and thus were not deleted as
# eliminateUndefCopy only considered the live range, and not the undef
# flag.
#
# removePartialRedundancy would move the COPY undef %0 in bb.1 to
# bb.0. The live range of %1 would then be extended to be live out of
# %bb.1 for the backedge phi. This would then fail the verifier, since
# the dead flag was no longer valid. This was fixed by directly
# considering the undef flag to avoid considering this special case.

---
name: partial_redundancy_coalesce_undef_copy_live_out
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: partial_redundancy_coalesce_undef_copy_live_out
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $rdi
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $rdi
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri undef [[XOR32ri]], 1, implicit-def dead $eflags
; CHECK-NEXT: dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[COPY]]
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = IMPLICIT_DEF
; CHECK-NEXT: JMP_1 %bb.1
bb.0:
liveins: $rdi

%0:gr32 = COPY $rdi

bb.1:
%1:gr32 = COPY undef %0
dead %1:gr32 = XOR32ri %1, 1, implicit-def dead $eflags
dead %2:gr32 = MOV32rr killed %0
%0:gr32 = COPY killed undef %1
JMP_1 %bb.1

...
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=x86_64-pc-linux-gnu -verify-coalescing -run-pass=register-coalescer -o - %s | FileCheck %s

# The %1 = MOV32r0 is rematerialized as a subregister of %2. The
# implicit-def %1 operand needs to have an undef added, just like the
# main result operand.

---
name: remat_into_subregister_set_undef_implicit_operand_subregisters
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: remat_into_subregister_set_undef_implicit_operand_subregisters
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $rdi
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_1]]
; CHECK-NEXT: undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_2]].sub_32bit, implicit-def [[MOV32r0_2]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = XOR32ri [[MOV32r0_2]].sub_32bit, 1, implicit-def dead $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: JCC_1 %bb.4, 5, implicit killed undef $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[MOV32r0_1]]
; CHECK-NEXT: dead [[SHL64ri:%[0-9]+]]:gr64_nosp = SHL64ri [[MOV32r0_]], 4, implicit-def dead $eflags
; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = COPY [[MOV32r0_2]].sub_32bit
; CHECK-NEXT: JMP_1 %bb.1
bb.0:
liveins: $rdi

undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
%1:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def %1
undef %2.sub_32bit:gr64_with_sub_8bit = COPY %1, implicit-def %2

bb.1:
%2.sub_32bit:gr64_with_sub_8bit = XOR32ri %2.sub_32bit, 1, implicit-def dead $eflags

bb.2:
JCC_1 %bb.4, 5, implicit killed undef $eflags

bb.3:

bb.4:
dead %3:gr32 = MOV32rr %1
dead %4:gr64_nosp = SHL64ri %0, 4, implicit-def dead $eflags
%1:gr32 = COPY %2.sub_32bit
JMP_1 %bb.1

...

# Same, except the implicit-def on the original instruction already
# has a subregister index.

---
name: remat_into_subregister_set_undef_implicit_operand_subregisters_with_subreg
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: remat_into_subregister_set_undef_implicit_operand_subregisters_with_subreg
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $rdi
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_1]].sub_8bit
; CHECK-NEXT: undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_2]].sub_8bit, implicit-def [[MOV32r0_2]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = XOR32ri [[MOV32r0_2]].sub_32bit, 1, implicit-def dead $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: JCC_1 %bb.4, 5, implicit killed undef $eflags
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[MOV32r0_1]]
; CHECK-NEXT: dead [[SHL64ri:%[0-9]+]]:gr64_nosp = SHL64ri [[MOV32r0_]], 4, implicit-def dead $eflags
; CHECK-NEXT: [[MOV32r0_1:%[0-9]+]]:gr32 = COPY [[MOV32r0_2]].sub_32bit
; CHECK-NEXT: JMP_1 %bb.1
bb.0:
liveins: $rdi

undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
%1:gr32 = MOV32r0 implicit-def dead $eflags, undef implicit-def %1.sub_8bit
undef %2.sub_32bit:gr64_with_sub_8bit = COPY %1, implicit-def %2

bb.1:
%2.sub_32bit:gr64_with_sub_8bit = XOR32ri %2.sub_32bit, 1, implicit-def dead $eflags

bb.2:
JCC_1 %bb.4, 5, implicit killed undef $eflags

bb.3:

bb.4:
dead %3:gr32 = MOV32rr %1
dead %4:gr64_nosp = SHL64ri %0, 4, implicit-def dead $eflags
%1:gr32 = COPY %2.sub_32bit
JMP_1 %bb.1

...