Skip to content

Commit 08d7eec

Browse files
committed
Revert "Allow rematerialization of virtual reg uses"
Reverted due to two distcint performance regression reports. This reverts commit 92c1fd1.
1 parent e8e2edd commit 08d7eec

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+4093
-4106
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,10 @@ class TargetInstrInfo : public MCInstrInfo {
117117
const MachineFunction &MF) const;
118118

119119
/// Return true if the instruction is trivially rematerializable, meaning it
120-
/// has no side effects. Uses of constants and unallocatable physical
121-
/// registers are always trivial to rematerialize so that the instructions
122-
/// result is independent of the place in the function. Uses of virtual
123-
/// registers are allowed but it is caller's responsility to ensure these
124-
/// operands are valid at the point the instruction is beeing moved.
120+
/// has no side effects and requires no operands that aren't always available.
121+
/// This means the only allowed uses are constants and unallocatable physical
122+
/// registers so that the instructions result is independent of the place
123+
/// in the function.
125124
bool isTriviallyReMaterializable(const MachineInstr &MI,
126125
AAResults *AA = nullptr) const {
127126
return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF ||
@@ -141,7 +140,8 @@ class TargetInstrInfo : public MCInstrInfo {
141140
/// set, this hook lets the target specify whether the instruction is actually
142141
/// trivially rematerializable, taking into consideration its operands. This
143142
/// predicate must return false if the instruction has any side effects other
144-
/// than producing a value.
143+
/// than producing a value, or if it requres any address registers that are
144+
/// not always available.
145145
/// Requirements must be check as stated in isTriviallyReMaterializable() .
146146
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
147147
AAResults *AA) const {

llvm/lib/CodeGen/TargetInstrInfo.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -921,8 +921,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
921921
const MachineRegisterInfo &MRI = MF.getRegInfo();
922922

923923
// Remat clients assume operand 0 is the defined register.
924-
if (!MI.getNumOperands() || !MI.getOperand(0).isReg() ||
925-
MI.getOperand(0).isTied())
924+
if (!MI.getNumOperands() || !MI.getOperand(0).isReg())
926925
return false;
927926
Register DefReg = MI.getOperand(0).getReg();
928927

@@ -984,6 +983,12 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
984983
// same virtual register, though.
985984
if (MO.isDef() && Reg != DefReg)
986985
return false;
986+
987+
// Don't allow any virtual-register uses. Rematting an instruction with
988+
// virtual register uses would length the live ranges of the uses, which
989+
// is not necessarily a good idea, certainly not "trivial".
990+
if (MO.isUse())
991+
return false;
987992
}
988993

989994
// Everything checked out.

llvm/test/CodeGen/AMDGPU/remat-sop.mir

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -51,66 +51,6 @@ body: |
5151
S_NOP 0, implicit %2
5252
S_ENDPGM 0
5353
...
54-
# The liverange of %0 covers a point of rematerialization, source value is
55-
# availabe.
56-
---
57-
name: test_remat_s_mov_b32_vreg_src_long_lr
58-
tracksRegLiveness: true
59-
machineFunctionInfo:
60-
stackPtrOffsetReg: $sgpr32
61-
body: |
62-
bb.0:
63-
; GCN-LABEL: name: test_remat_s_mov_b32_vreg_src_long_lr
64-
; GCN: renamable $sgpr0 = IMPLICIT_DEF
65-
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
66-
; GCN: S_NOP 0, implicit killed renamable $sgpr1
67-
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
68-
; GCN: S_NOP 0, implicit killed renamable $sgpr1
69-
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
70-
; GCN: S_NOP 0, implicit killed renamable $sgpr1
71-
; GCN: S_NOP 0, implicit killed renamable $sgpr0
72-
; GCN: S_ENDPGM 0
73-
%0:sreg_32 = IMPLICIT_DEF
74-
%1:sreg_32 = S_MOV_B32 %0:sreg_32
75-
%2:sreg_32 = S_MOV_B32 %0:sreg_32
76-
%3:sreg_32 = S_MOV_B32 %0:sreg_32
77-
S_NOP 0, implicit %1
78-
S_NOP 0, implicit %2
79-
S_NOP 0, implicit %3
80-
S_NOP 0, implicit %0
81-
S_ENDPGM 0
82-
...
83-
# The liverange of %0 does not cover a point of rematerialization, source value is
84-
# unavailabe and we do not want to artificially extend the liverange.
85-
---
86-
name: test_no_remat_s_mov_b32_vreg_src_short_lr
87-
tracksRegLiveness: true
88-
machineFunctionInfo:
89-
stackPtrOffsetReg: $sgpr32
90-
body: |
91-
bb.0:
92-
; GCN-LABEL: name: test_no_remat_s_mov_b32_vreg_src_short_lr
93-
; GCN: renamable $sgpr0 = IMPLICIT_DEF
94-
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
95-
; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
96-
; GCN: renamable $sgpr1 = S_MOV_B32 renamable $sgpr0
97-
; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
98-
; GCN: renamable $sgpr0 = S_MOV_B32 killed renamable $sgpr0
99-
; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
100-
; GCN: S_NOP 0, implicit killed renamable $sgpr1
101-
; GCN: renamable $sgpr1 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
102-
; GCN: S_NOP 0, implicit killed renamable $sgpr1
103-
; GCN: S_NOP 0, implicit killed renamable $sgpr0
104-
; GCN: S_ENDPGM 0
105-
%0:sreg_32 = IMPLICIT_DEF
106-
%1:sreg_32 = S_MOV_B32 %0:sreg_32
107-
%2:sreg_32 = S_MOV_B32 %0:sreg_32
108-
%3:sreg_32 = S_MOV_B32 %0:sreg_32
109-
S_NOP 0, implicit %1
110-
S_NOP 0, implicit %2
111-
S_NOP 0, implicit %3
112-
S_ENDPGM 0
113-
...
11454
---
11555
name: test_remat_s_mov_b64
11656
tracksRegLiveness: true

llvm/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,20 @@ define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnon
2929
; ENABLE-NEXT: pophs {r11, pc}
3030
; ENABLE-NEXT: .LBB0_3: @ %while.body.preheader
3131
; ENABLE-NEXT: movw r12, :lower16:skip
32-
; ENABLE-NEXT: sub r3, r1, #1
32+
; ENABLE-NEXT: sub r1, r1, #1
3333
; ENABLE-NEXT: movt r12, :upper16:skip
3434
; ENABLE-NEXT: .LBB0_4: @ %while.body
3535
; ENABLE-NEXT: @ =>This Inner Loop Header: Depth=1
36-
; ENABLE-NEXT: ldrb r1, [r0]
37-
; ENABLE-NEXT: ldrb r1, [r12, r1]
38-
; ENABLE-NEXT: add r0, r0, r1
39-
; ENABLE-NEXT: sub r1, r3, #1
40-
; ENABLE-NEXT: cmp r1, r3
36+
; ENABLE-NEXT: ldrb r3, [r0]
37+
; ENABLE-NEXT: ldrb r3, [r12, r3]
38+
; ENABLE-NEXT: add r0, r0, r3
39+
; ENABLE-NEXT: sub r3, r1, #1
40+
; ENABLE-NEXT: cmp r3, r1
4141
; ENABLE-NEXT: bhs .LBB0_6
4242
; ENABLE-NEXT: @ %bb.5: @ %while.body
4343
; ENABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
4444
; ENABLE-NEXT: cmp r0, r2
45-
; ENABLE-NEXT: mov r3, r1
45+
; ENABLE-NEXT: mov r1, r3
4646
; ENABLE-NEXT: blo .LBB0_4
4747
; ENABLE-NEXT: .LBB0_6: @ %if.end29
4848
; ENABLE-NEXT: pop {r11, pc}
@@ -119,20 +119,20 @@ define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnon
119119
; DISABLE-NEXT: pophs {r11, pc}
120120
; DISABLE-NEXT: .LBB0_3: @ %while.body.preheader
121121
; DISABLE-NEXT: movw r12, :lower16:skip
122-
; DISABLE-NEXT: sub r3, r1, #1
122+
; DISABLE-NEXT: sub r1, r1, #1
123123
; DISABLE-NEXT: movt r12, :upper16:skip
124124
; DISABLE-NEXT: .LBB0_4: @ %while.body
125125
; DISABLE-NEXT: @ =>This Inner Loop Header: Depth=1
126-
; DISABLE-NEXT: ldrb r1, [r0]
127-
; DISABLE-NEXT: ldrb r1, [r12, r1]
128-
; DISABLE-NEXT: add r0, r0, r1
129-
; DISABLE-NEXT: sub r1, r3, #1
130-
; DISABLE-NEXT: cmp r1, r3
126+
; DISABLE-NEXT: ldrb r3, [r0]
127+
; DISABLE-NEXT: ldrb r3, [r12, r3]
128+
; DISABLE-NEXT: add r0, r0, r3
129+
; DISABLE-NEXT: sub r3, r1, #1
130+
; DISABLE-NEXT: cmp r3, r1
131131
; DISABLE-NEXT: bhs .LBB0_6
132132
; DISABLE-NEXT: @ %bb.5: @ %while.body
133133
; DISABLE-NEXT: @ in Loop: Header=BB0_4 Depth=1
134134
; DISABLE-NEXT: cmp r0, r2
135-
; DISABLE-NEXT: mov r3, r1
135+
; DISABLE-NEXT: mov r1, r3
136136
; DISABLE-NEXT: blo .LBB0_4
137137
; DISABLE-NEXT: .LBB0_6: @ %if.end29
138138
; DISABLE-NEXT: pop {r11, pc}

llvm/test/CodeGen/ARM/funnel-shift-rot.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,13 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
7373
; SCALAR-NEXT: push {r4, r5, r11, lr}
7474
; SCALAR-NEXT: rsb r3, r2, #0
7575
; SCALAR-NEXT: and r4, r2, #63
76-
; SCALAR-NEXT: and r12, r3, #63
77-
; SCALAR-NEXT: rsb r3, r12, #32
76+
; SCALAR-NEXT: and lr, r3, #63
77+
; SCALAR-NEXT: rsb r3, lr, #32
7878
; SCALAR-NEXT: lsl r2, r0, r4
79-
; SCALAR-NEXT: lsr lr, r0, r12
80-
; SCALAR-NEXT: orr r3, lr, r1, lsl r3
81-
; SCALAR-NEXT: subs lr, r12, #32
82-
; SCALAR-NEXT: lsrpl r3, r1, lr
79+
; SCALAR-NEXT: lsr r12, r0, lr
80+
; SCALAR-NEXT: orr r3, r12, r1, lsl r3
81+
; SCALAR-NEXT: subs r12, lr, #32
82+
; SCALAR-NEXT: lsrpl r3, r1, r12
8383
; SCALAR-NEXT: subs r5, r4, #32
8484
; SCALAR-NEXT: movwpl r2, #0
8585
; SCALAR-NEXT: cmp r5, #0
@@ -88,8 +88,8 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
8888
; SCALAR-NEXT: lsr r3, r0, r3
8989
; SCALAR-NEXT: orr r3, r3, r1, lsl r4
9090
; SCALAR-NEXT: lslpl r3, r0, r5
91-
; SCALAR-NEXT: lsr r0, r1, r12
92-
; SCALAR-NEXT: cmp lr, #0
91+
; SCALAR-NEXT: lsr r0, r1, lr
92+
; SCALAR-NEXT: cmp r12, #0
9393
; SCALAR-NEXT: movwpl r0, #0
9494
; SCALAR-NEXT: orr r1, r3, r0
9595
; SCALAR-NEXT: mov r0, r2
@@ -245,15 +245,15 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
245245
; CHECK: @ %bb.0:
246246
; CHECK-NEXT: .save {r4, r5, r11, lr}
247247
; CHECK-NEXT: push {r4, r5, r11, lr}
248-
; CHECK-NEXT: and r12, r2, #63
248+
; CHECK-NEXT: and lr, r2, #63
249249
; CHECK-NEXT: rsb r2, r2, #0
250-
; CHECK-NEXT: rsb r3, r12, #32
250+
; CHECK-NEXT: rsb r3, lr, #32
251251
; CHECK-NEXT: and r4, r2, #63
252-
; CHECK-NEXT: lsr lr, r0, r12
253-
; CHECK-NEXT: orr r3, lr, r1, lsl r3
254-
; CHECK-NEXT: subs lr, r12, #32
252+
; CHECK-NEXT: lsr r12, r0, lr
253+
; CHECK-NEXT: orr r3, r12, r1, lsl r3
254+
; CHECK-NEXT: subs r12, lr, #32
255255
; CHECK-NEXT: lsl r2, r0, r4
256-
; CHECK-NEXT: lsrpl r3, r1, lr
256+
; CHECK-NEXT: lsrpl r3, r1, r12
257257
; CHECK-NEXT: subs r5, r4, #32
258258
; CHECK-NEXT: movwpl r2, #0
259259
; CHECK-NEXT: cmp r5, #0
@@ -262,8 +262,8 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
262262
; CHECK-NEXT: lsr r3, r0, r3
263263
; CHECK-NEXT: orr r3, r3, r1, lsl r4
264264
; CHECK-NEXT: lslpl r3, r0, r5
265-
; CHECK-NEXT: lsr r0, r1, r12
266-
; CHECK-NEXT: cmp lr, #0
265+
; CHECK-NEXT: lsr r0, r1, lr
266+
; CHECK-NEXT: cmp r12, #0
267267
; CHECK-NEXT: movwpl r0, #0
268268
; CHECK-NEXT: orr r1, r0, r3
269269
; CHECK-NEXT: mov r0, r2

llvm/test/CodeGen/ARM/funnel-shift.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -224,31 +224,31 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
224224
; CHECK-NEXT: mov r3, #0
225225
; CHECK-NEXT: bl __aeabi_uldivmod
226226
; CHECK-NEXT: add r0, r2, #27
227-
; CHECK-NEXT: lsl r2, r7, #27
228-
; CHECK-NEXT: and r12, r0, #63
229227
; CHECK-NEXT: lsl r6, r6, #27
228+
; CHECK-NEXT: and r1, r0, #63
229+
; CHECK-NEXT: lsl r2, r7, #27
230230
; CHECK-NEXT: orr r7, r6, r7, lsr #5
231-
; CHECK-NEXT: rsb r3, r12, #32
232-
; CHECK-NEXT: lsr r2, r2, r12
233231
; CHECK-NEXT: mov r6, #63
234-
; CHECK-NEXT: orr r2, r2, r7, lsl r3
235-
; CHECK-NEXT: subs r3, r12, #32
232+
; CHECK-NEXT: rsb r3, r1, #32
233+
; CHECK-NEXT: lsr r2, r2, r1
234+
; CHECK-NEXT: subs r12, r1, #32
236235
; CHECK-NEXT: bic r6, r6, r0
236+
; CHECK-NEXT: orr r2, r2, r7, lsl r3
237237
; CHECK-NEXT: lsl r5, r9, #1
238-
; CHECK-NEXT: lsrpl r2, r7, r3
239-
; CHECK-NEXT: subs r1, r6, #32
238+
; CHECK-NEXT: lsrpl r2, r7, r12
240239
; CHECK-NEXT: lsl r0, r5, r6
241-
; CHECK-NEXT: lsl r4, r8, #1
240+
; CHECK-NEXT: subs r4, r6, #32
241+
; CHECK-NEXT: lsl r3, r8, #1
242242
; CHECK-NEXT: movwpl r0, #0
243-
; CHECK-NEXT: orr r4, r4, r9, lsr #31
243+
; CHECK-NEXT: orr r3, r3, r9, lsr #31
244244
; CHECK-NEXT: orr r0, r0, r2
245245
; CHECK-NEXT: rsb r2, r6, #32
246-
; CHECK-NEXT: cmp r1, #0
246+
; CHECK-NEXT: cmp r4, #0
247+
; CHECK-NEXT: lsr r1, r7, r1
247248
; CHECK-NEXT: lsr r2, r5, r2
248-
; CHECK-NEXT: orr r2, r2, r4, lsl r6
249-
; CHECK-NEXT: lslpl r2, r5, r1
250-
; CHECK-NEXT: lsr r1, r7, r12
251-
; CHECK-NEXT: cmp r3, #0
249+
; CHECK-NEXT: orr r2, r2, r3, lsl r6
250+
; CHECK-NEXT: lslpl r2, r5, r4
251+
; CHECK-NEXT: cmp r12, #0
252252
; CHECK-NEXT: movwpl r1, #0
253253
; CHECK-NEXT: orr r1, r2, r1
254254
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}

llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -91,17 +91,17 @@ define void @i56_or(i56* %a) {
9191
; BE-LABEL: i56_or:
9292
; BE: @ %bb.0:
9393
; BE-NEXT: mov r1, r0
94+
; BE-NEXT: ldr r12, [r0]
9495
; BE-NEXT: ldrh r2, [r1, #4]!
9596
; BE-NEXT: ldrb r3, [r1, #2]
9697
; BE-NEXT: orr r2, r3, r2, lsl #8
97-
; BE-NEXT: ldr r3, [r0]
98-
; BE-NEXT: orr r2, r2, r3, lsl #24
99-
; BE-NEXT: orr r12, r2, #384
100-
; BE-NEXT: strb r12, [r1, #2]
101-
; BE-NEXT: lsr r2, r12, #8
102-
; BE-NEXT: strh r2, [r1]
103-
; BE-NEXT: bic r1, r3, #255
104-
; BE-NEXT: orr r1, r1, r12, lsr #24
98+
; BE-NEXT: orr r2, r2, r12, lsl #24
99+
; BE-NEXT: orr r2, r2, #384
100+
; BE-NEXT: strb r2, [r1, #2]
101+
; BE-NEXT: lsr r3, r2, #8
102+
; BE-NEXT: strh r3, [r1]
103+
; BE-NEXT: bic r1, r12, #255
104+
; BE-NEXT: orr r1, r1, r2, lsr #24
105105
; BE-NEXT: str r1, [r0]
106106
; BE-NEXT: mov pc, lr
107107
%aa = load i56, i56* %a
@@ -127,13 +127,13 @@ define void @i56_and_or(i56* %a) {
127127
; BE-NEXT: ldrb r3, [r1, #2]
128128
; BE-NEXT: strb r2, [r1, #2]
129129
; BE-NEXT: orr r2, r3, r12, lsl #8
130-
; BE-NEXT: ldr r3, [r0]
131-
; BE-NEXT: orr r2, r2, r3, lsl #24
132-
; BE-NEXT: orr r12, r2, #384
133-
; BE-NEXT: lsr r2, r12, #8
134-
; BE-NEXT: strh r2, [r1]
135-
; BE-NEXT: bic r1, r3, #255
136-
; BE-NEXT: orr r1, r1, r12, lsr #24
130+
; BE-NEXT: ldr r12, [r0]
131+
; BE-NEXT: orr r2, r2, r12, lsl #24
132+
; BE-NEXT: orr r2, r2, #384
133+
; BE-NEXT: lsr r3, r2, #8
134+
; BE-NEXT: strh r3, [r1]
135+
; BE-NEXT: bic r1, r12, #255
136+
; BE-NEXT: orr r1, r1, r2, lsr #24
137137
; BE-NEXT: str r1, [r0]
138138
; BE-NEXT: mov pc, lr
139139

llvm/test/CodeGen/ARM/neon-copy.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,16 +1340,16 @@ define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
13401340
; CHECK-NEXT: .pad #8
13411341
; CHECK-NEXT: sub sp, sp, #8
13421342
; CHECK-NEXT: vmov.u16 r1, d0[1]
1343-
; CHECK-NEXT: and r12, r0, #3
1343+
; CHECK-NEXT: and r0, r0, #3
13441344
; CHECK-NEXT: vmov.u16 r2, d0[2]
1345-
; CHECK-NEXT: mov r0, sp
1346-
; CHECK-NEXT: vmov.u16 r3, d0[3]
1347-
; CHECK-NEXT: orr r0, r0, r12, lsl #1
1345+
; CHECK-NEXT: mov r3, sp
1346+
; CHECK-NEXT: vmov.u16 r12, d0[3]
1347+
; CHECK-NEXT: orr r0, r3, r0, lsl #1
13481348
; CHECK-NEXT: vst1.16 {d0[0]}, [r0:16]
13491349
; CHECK-NEXT: vldr d0, [sp]
13501350
; CHECK-NEXT: vmov.16 d0[1], r1
13511351
; CHECK-NEXT: vmov.16 d0[2], r2
1352-
; CHECK-NEXT: vmov.16 d0[3], r3
1352+
; CHECK-NEXT: vmov.16 d0[3], r12
13531353
; CHECK-NEXT: add sp, sp, #8
13541354
; CHECK-NEXT: bx lr
13551355
%tmp = extractelement <8 x i16> %x, i32 0

0 commit comments

Comments
 (0)