Skip to content

Commit 5ca09d6

Browse files
authored
[ARM] Fix VBSL Pseudo kill flags. (#109629)
When expanding a VBSP pseudo into VMOV; VBSL, if the first reg was killed in the BSP then the kill flags could be incorrect copied to the mov (vorr) and the vbsl. Drop the kill flags. Note that this sometimes comes up when all the operands of the VBSP are the same, which can be optimized separately.
1 parent 78ff736 commit 5ca09d6

File tree

3 files changed

+53
-3
lines changed

3 files changed

+53
-3
lines changed

llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2178,12 +2178,13 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
21782178
} else {
21792179
// Use move to satisfy constraints
21802180
unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
2181+
unsigned MO1Flags = getRegState(MI.getOperand(1)) & ~RegState::Kill;
21812182
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
21822183
.addReg(DstReg,
21832184
RegState::Define |
21842185
getRenamableRegState(MI.getOperand(0).isRenamable()))
2185-
.add(MI.getOperand(1))
2186-
.add(MI.getOperand(1))
2186+
.addReg(MI.getOperand(1).getReg(), MO1Flags)
2187+
.addReg(MI.getOperand(1).getReg(), MO1Flags)
21872188
.addImm(MI.getOperand(4).getImm())
21882189
.add(MI.getOperand(5));
21892190
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))

llvm/test/CodeGen/ARM/expand-pseudos.mir

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
entry:
2525
unreachable
2626
}
27+
define i32 @vbsl_kill_flags(i32 %x) {
28+
unreachable
29+
}
2730
...
2831
---
2932
name: test1
@@ -141,3 +144,21 @@ body: |
141144
BX_RET 14, $noreg, implicit $r0
142145
143146
...
147+
---
148+
name: vbsl_kill_flags
149+
alignment: 4
150+
tracksRegLiveness: true
151+
body: |
152+
bb.0 (%ir-block.0):
153+
liveins: $d1
154+
155+
; CHECK-LABEL: name: vbsl_kill_flags
156+
; CHECK: liveins: $d1
157+
; CHECK-NEXT: {{ $}}
158+
; CHECK-NEXT: renamable $d0 = VORRd renamable $d1, renamable $d1, 14 /* CC::al */, $noreg
159+
; CHECK-NEXT: renamable $d0 = VBSLd killed renamable $d0, renamable $d1, renamable $d1, 14 /* CC::al */, $noreg
160+
; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
161+
renamable $d0 = VBSPd killed renamable $d1, renamable $d1, renamable $d1, 14 /* CC::al */, $noreg
162+
BX_RET 14 /* CC::al */, $noreg, implicit $d0
163+
164+
...

llvm/test/CodeGen/ARM/vbsl.ll

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=armv7-eabihf -mattr=+neon %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=armv7-eabihf -mattr=+neon -verify-machineinstrs %s -o - | FileCheck %s
33

44
define <8 x i8> @v_bsli8(ptr %A, ptr %B, ptr %C) nounwind {
55
; CHECK-LABEL: v_bsli8:
@@ -261,6 +261,34 @@ define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounw
261261
ret <2 x i64> %vbsl3.i
262262
}
263263

264+
define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) {
265+
; CHECK-LABEL: same_param_all:
266+
; CHECK: @ %bb.0:
267+
; CHECK-NEXT: vorr d0, d1, d1
268+
; CHECK-NEXT: vbsl d0, d1, d1
269+
; CHECK-NEXT: bx lr
270+
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %b, <8 x i8> %b, <8 x i8> %b)
271+
ret <8 x i8> %vbsl.i
272+
}
273+
274+
define <8 x i8> @same_param_12(<8 x i8> %a, <8 x i8> %b) {
275+
; CHECK-LABEL: same_param_12:
276+
; CHECK: @ %bb.0:
277+
; CHECK-NEXT: vbsl d0, d1, d1
278+
; CHECK-NEXT: bx lr
279+
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %b)
280+
ret <8 x i8> %vbsl.i
281+
}
282+
283+
define <8 x i8> @same_param_01(<8 x i8> %a, <8 x i8> %b) {
284+
; CHECK-LABEL: same_param_01:
285+
; CHECK: @ %bb.0:
286+
; CHECK-NEXT: vbif d0, d1, d0
287+
; CHECK-NEXT: bx lr
288+
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> %b)
289+
ret <8 x i8> %vbsl.i
290+
}
291+
264292
declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
265293
declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
266294
declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone

0 commit comments

Comments
 (0)