Skip to content

Commit d9cd801

Browse files
committed
[RISCV] Defer forming x0,x0 vsetvlis till after insertion
Stacked on llvm#96200 Currently we try and detect when the VL doesn't change between two vsetvlis in emitVSETVLIs, and insert a VL-preserving vsetvli x0,x0 then and there. Doing it in situ has some drawbacks: - We lose information about what the VL is which can prevent doLocalPostpass from coalescing some vsetvlis further down the line - We have to explicitly handle x0,x0 form vsetvlis in coalesceVSETVLIs, whereas we don't in the top-down passes - This prevents us from sharing the VSETVLIInfo compatibility logic between the two, hence why we have canMutatePriorConfig This patch changes emitVSETVLIs to just emit regular vsetvlis, and adds a separate pass after coalesceVSETVLIs to convert vsetvlis to x0,x0 when possible. By removing the edge cases needed to handle x0,x0s, we can unify how we check vsetvli compatibility between coalesceVSETVLIs and emitVSETVLIs, and remove the duplicated logic in areCompatibleVTYPEs and canMutatePriorConfig. Note that when converting to x0,x0, we reuse the block data computed from the dataflow analysis despite it taking place after coalesceVSETVLIs. This turns out to be fine since coalesceVSETVLI never changes the exit state (only the local state within the block), and so the entry states stay the same too.
1 parent c9b4345 commit d9cd801

8 files changed

+352
-577
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 155 additions & 184 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll

Lines changed: 18 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,8 @@ define <2 x half> @expandload_v2f16(ptr %base, <2 x half> %src0, <2 x i1> %mask)
5151
; RV32-NEXT: beqz a1, .LBB1_2
5252
; RV32-NEXT: .LBB1_4: # %cond.load1
5353
; RV32-NEXT: flh fa5, 0(a0)
54-
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
55-
; RV32-NEXT: vfmv.s.f v9, fa5
5654
; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
55+
; RV32-NEXT: vfmv.s.f v9, fa5
5756
; RV32-NEXT: vslideup.vi v8, v9, 1
5857
; RV32-NEXT: ret
5958
;
@@ -77,9 +76,8 @@ define <2 x half> @expandload_v2f16(ptr %base, <2 x half> %src0, <2 x i1> %mask)
7776
; RV64-NEXT: beqz a1, .LBB1_2
7877
; RV64-NEXT: .LBB1_4: # %cond.load1
7978
; RV64-NEXT: flh fa5, 0(a0)
80-
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
81-
; RV64-NEXT: vfmv.s.f v9, fa5
8279
; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
80+
; RV64-NEXT: vfmv.s.f v9, fa5
8381
; RV64-NEXT: vslideup.vi v8, v9, 1
8482
; RV64-NEXT: ret
8583
%res = call <2 x half> @llvm.masked.expandload.v2f16(ptr align 2 %base, <2 x i1> %mask, <2 x half> %src0)
@@ -114,9 +112,8 @@ define <4 x half> @expandload_v4f16(ptr %base, <4 x half> %src0, <4 x i1> %mask)
114112
; RV32-NEXT: beqz a2, .LBB2_2
115113
; RV32-NEXT: .LBB2_6: # %cond.load1
116114
; RV32-NEXT: flh fa5, 0(a0)
117-
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
118-
; RV32-NEXT: vfmv.s.f v9, fa5
119115
; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
116+
; RV32-NEXT: vfmv.s.f v9, fa5
120117
; RV32-NEXT: vslideup.vi v8, v9, 1
121118
; RV32-NEXT: addi a0, a0, 2
122119
; RV32-NEXT: andi a2, a1, 4
@@ -162,9 +159,8 @@ define <4 x half> @expandload_v4f16(ptr %base, <4 x half> %src0, <4 x i1> %mask)
162159
; RV64-NEXT: beqz a2, .LBB2_2
163160
; RV64-NEXT: .LBB2_6: # %cond.load1
164161
; RV64-NEXT: flh fa5, 0(a0)
165-
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
166-
; RV64-NEXT: vfmv.s.f v9, fa5
167162
; RV64-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
163+
; RV64-NEXT: vfmv.s.f v9, fa5
168164
; RV64-NEXT: vslideup.vi v8, v9, 1
169165
; RV64-NEXT: addi a0, a0, 2
170166
; RV64-NEXT: andi a2, a1, 4
@@ -227,9 +223,8 @@ define <8 x half> @expandload_v8f16(ptr %base, <8 x half> %src0, <8 x i1> %mask)
227223
; RV32-NEXT: beqz a2, .LBB3_2
228224
; RV32-NEXT: .LBB3_10: # %cond.load1
229225
; RV32-NEXT: flh fa5, 0(a0)
230-
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
231-
; RV32-NEXT: vfmv.s.f v9, fa5
232226
; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
227+
; RV32-NEXT: vfmv.s.f v9, fa5
233228
; RV32-NEXT: vslideup.vi v8, v9, 1
234229
; RV32-NEXT: addi a0, a0, 2
235230
; RV32-NEXT: andi a2, a1, 4
@@ -319,9 +314,8 @@ define <8 x half> @expandload_v8f16(ptr %base, <8 x half> %src0, <8 x i1> %mask)
319314
; RV64-NEXT: beqz a2, .LBB3_2
320315
; RV64-NEXT: .LBB3_10: # %cond.load1
321316
; RV64-NEXT: flh fa5, 0(a0)
322-
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
323-
; RV64-NEXT: vfmv.s.f v9, fa5
324317
; RV64-NEXT: vsetivli zero, 2, e16, m1, tu, ma
318+
; RV64-NEXT: vfmv.s.f v9, fa5
325319
; RV64-NEXT: vslideup.vi v8, v9, 1
326320
; RV64-NEXT: addi a0, a0, 2
327321
; RV64-NEXT: andi a2, a1, 4
@@ -425,9 +419,8 @@ define <2 x float> @expandload_v2f32(ptr %base, <2 x float> %src0, <2 x i1> %mas
425419
; RV32-NEXT: beqz a1, .LBB5_2
426420
; RV32-NEXT: .LBB5_4: # %cond.load1
427421
; RV32-NEXT: flw fa5, 0(a0)
428-
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
429-
; RV32-NEXT: vfmv.s.f v9, fa5
430422
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
423+
; RV32-NEXT: vfmv.s.f v9, fa5
431424
; RV32-NEXT: vslideup.vi v8, v9, 1
432425
; RV32-NEXT: ret
433426
;
@@ -451,9 +444,8 @@ define <2 x float> @expandload_v2f32(ptr %base, <2 x float> %src0, <2 x i1> %mas
451444
; RV64-NEXT: beqz a1, .LBB5_2
452445
; RV64-NEXT: .LBB5_4: # %cond.load1
453446
; RV64-NEXT: flw fa5, 0(a0)
454-
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
455-
; RV64-NEXT: vfmv.s.f v9, fa5
456447
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
448+
; RV64-NEXT: vfmv.s.f v9, fa5
457449
; RV64-NEXT: vslideup.vi v8, v9, 1
458450
; RV64-NEXT: ret
459451
%res = call <2 x float> @llvm.masked.expandload.v2f32(ptr align 4 %base, <2 x i1> %mask, <2 x float> %src0)
@@ -488,9 +480,8 @@ define <4 x float> @expandload_v4f32(ptr %base, <4 x float> %src0, <4 x i1> %mas
488480
; RV32-NEXT: beqz a2, .LBB6_2
489481
; RV32-NEXT: .LBB6_6: # %cond.load1
490482
; RV32-NEXT: flw fa5, 0(a0)
491-
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
492-
; RV32-NEXT: vfmv.s.f v9, fa5
493483
; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
484+
; RV32-NEXT: vfmv.s.f v9, fa5
494485
; RV32-NEXT: vslideup.vi v8, v9, 1
495486
; RV32-NEXT: addi a0, a0, 4
496487
; RV32-NEXT: andi a2, a1, 4
@@ -536,9 +527,8 @@ define <4 x float> @expandload_v4f32(ptr %base, <4 x float> %src0, <4 x i1> %mas
536527
; RV64-NEXT: beqz a2, .LBB6_2
537528
; RV64-NEXT: .LBB6_6: # %cond.load1
538529
; RV64-NEXT: flw fa5, 0(a0)
539-
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
540-
; RV64-NEXT: vfmv.s.f v9, fa5
541530
; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma
531+
; RV64-NEXT: vfmv.s.f v9, fa5
542532
; RV64-NEXT: vslideup.vi v8, v9, 1
543533
; RV64-NEXT: addi a0, a0, 4
544534
; RV64-NEXT: andi a2, a1, 4
@@ -601,9 +591,8 @@ define <8 x float> @expandload_v8f32(ptr %base, <8 x float> %src0, <8 x i1> %mas
601591
; RV32-NEXT: beqz a2, .LBB7_2
602592
; RV32-NEXT: .LBB7_10: # %cond.load1
603593
; RV32-NEXT: flw fa5, 0(a0)
604-
; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
605-
; RV32-NEXT: vfmv.s.f v10, fa5
606594
; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
595+
; RV32-NEXT: vfmv.s.f v10, fa5
607596
; RV32-NEXT: vslideup.vi v8, v10, 1
608597
; RV32-NEXT: addi a0, a0, 4
609598
; RV32-NEXT: andi a2, a1, 4
@@ -693,9 +682,8 @@ define <8 x float> @expandload_v8f32(ptr %base, <8 x float> %src0, <8 x i1> %mas
693682
; RV64-NEXT: beqz a2, .LBB7_2
694683
; RV64-NEXT: .LBB7_10: # %cond.load1
695684
; RV64-NEXT: flw fa5, 0(a0)
696-
; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
697-
; RV64-NEXT: vfmv.s.f v10, fa5
698685
; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma
686+
; RV64-NEXT: vfmv.s.f v10, fa5
699687
; RV64-NEXT: vslideup.vi v8, v10, 1
700688
; RV64-NEXT: addi a0, a0, 4
701689
; RV64-NEXT: andi a2, a1, 4
@@ -799,9 +787,8 @@ define <2 x double> @expandload_v2f64(ptr %base, <2 x double> %src0, <2 x i1> %m
799787
; RV32-NEXT: beqz a1, .LBB9_2
800788
; RV32-NEXT: .LBB9_4: # %cond.load1
801789
; RV32-NEXT: fld fa5, 0(a0)
802-
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
803-
; RV32-NEXT: vfmv.s.f v9, fa5
804790
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
791+
; RV32-NEXT: vfmv.s.f v9, fa5
805792
; RV32-NEXT: vslideup.vi v8, v9, 1
806793
; RV32-NEXT: ret
807794
;
@@ -825,9 +812,8 @@ define <2 x double> @expandload_v2f64(ptr %base, <2 x double> %src0, <2 x i1> %m
825812
; RV64-NEXT: beqz a1, .LBB9_2
826813
; RV64-NEXT: .LBB9_4: # %cond.load1
827814
; RV64-NEXT: fld fa5, 0(a0)
828-
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
829-
; RV64-NEXT: vfmv.s.f v9, fa5
830815
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
816+
; RV64-NEXT: vfmv.s.f v9, fa5
831817
; RV64-NEXT: vslideup.vi v8, v9, 1
832818
; RV64-NEXT: ret
833819
%res = call <2 x double> @llvm.masked.expandload.v2f64(ptr align 8 %base, <2 x i1> %mask, <2 x double> %src0)
@@ -862,9 +848,8 @@ define <4 x double> @expandload_v4f64(ptr %base, <4 x double> %src0, <4 x i1> %m
862848
; RV32-NEXT: beqz a2, .LBB10_2
863849
; RV32-NEXT: .LBB10_6: # %cond.load1
864850
; RV32-NEXT: fld fa5, 0(a0)
865-
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
866-
; RV32-NEXT: vfmv.s.f v10, fa5
867851
; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma
852+
; RV32-NEXT: vfmv.s.f v10, fa5
868853
; RV32-NEXT: vslideup.vi v8, v10, 1
869854
; RV32-NEXT: addi a0, a0, 8
870855
; RV32-NEXT: andi a2, a1, 4
@@ -910,9 +895,8 @@ define <4 x double> @expandload_v4f64(ptr %base, <4 x double> %src0, <4 x i1> %m
910895
; RV64-NEXT: beqz a2, .LBB10_2
911896
; RV64-NEXT: .LBB10_6: # %cond.load1
912897
; RV64-NEXT: fld fa5, 0(a0)
913-
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
914-
; RV64-NEXT: vfmv.s.f v10, fa5
915898
; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
899+
; RV64-NEXT: vfmv.s.f v10, fa5
916900
; RV64-NEXT: vslideup.vi v8, v10, 1
917901
; RV64-NEXT: addi a0, a0, 8
918902
; RV64-NEXT: andi a2, a1, 4
@@ -975,9 +959,8 @@ define <8 x double> @expandload_v8f64(ptr %base, <8 x double> %src0, <8 x i1> %m
975959
; RV32-NEXT: beqz a2, .LBB11_2
976960
; RV32-NEXT: .LBB11_10: # %cond.load1
977961
; RV32-NEXT: fld fa5, 0(a0)
978-
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
979-
; RV32-NEXT: vfmv.s.f v12, fa5
980962
; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma
963+
; RV32-NEXT: vfmv.s.f v12, fa5
981964
; RV32-NEXT: vslideup.vi v8, v12, 1
982965
; RV32-NEXT: addi a0, a0, 8
983966
; RV32-NEXT: andi a2, a1, 4
@@ -1067,9 +1050,8 @@ define <8 x double> @expandload_v8f64(ptr %base, <8 x double> %src0, <8 x i1> %m
10671050
; RV64-NEXT: beqz a2, .LBB11_2
10681051
; RV64-NEXT: .LBB11_10: # %cond.load1
10691052
; RV64-NEXT: fld fa5, 0(a0)
1070-
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1071-
; RV64-NEXT: vfmv.s.f v12, fa5
10721053
; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
1054+
; RV64-NEXT: vfmv.s.f v12, fa5
10731055
; RV64-NEXT: vslideup.vi v8, v12, 1
10741056
; RV64-NEXT: addi a0, a0, 8
10751057
; RV64-NEXT: andi a2, a1, 4

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,8 @@ define <2 x i8> @expandload_v2i8(ptr %base, <2 x i8> %src0, <2 x i1> %mask) {
4040
; CHECK-NEXT: beqz a1, .LBB1_2
4141
; CHECK-NEXT: .LBB1_4: # %cond.load1
4242
; CHECK-NEXT: lbu a0, 0(a0)
43-
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
44-
; CHECK-NEXT: vmv.s.x v9, a0
4543
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
44+
; CHECK-NEXT: vmv.s.x v9, a0
4645
; CHECK-NEXT: vslideup.vi v8, v9, 1
4746
; CHECK-NEXT: ret
4847
%res = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> %mask, <2 x i8> %src0)
@@ -77,9 +76,8 @@ define <4 x i8> @expandload_v4i8(ptr %base, <4 x i8> %src0, <4 x i1> %mask) {
7776
; CHECK-NEXT: beqz a2, .LBB2_2
7877
; CHECK-NEXT: .LBB2_6: # %cond.load1
7978
; CHECK-NEXT: lbu a2, 0(a0)
80-
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
81-
; CHECK-NEXT: vmv.s.x v9, a2
8279
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
80+
; CHECK-NEXT: vmv.s.x v9, a2
8381
; CHECK-NEXT: vslideup.vi v8, v9, 1
8482
; CHECK-NEXT: addi a0, a0, 1
8583
; CHECK-NEXT: andi a2, a1, 4
@@ -142,9 +140,8 @@ define <8 x i8> @expandload_v8i8(ptr %base, <8 x i8> %src0, <8 x i1> %mask) {
142140
; CHECK-NEXT: beqz a2, .LBB3_2
143141
; CHECK-NEXT: .LBB3_10: # %cond.load1
144142
; CHECK-NEXT: lbu a2, 0(a0)
145-
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
146-
; CHECK-NEXT: vmv.s.x v9, a2
147143
; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
144+
; CHECK-NEXT: vmv.s.x v9, a2
148145
; CHECK-NEXT: vslideup.vi v8, v9, 1
149146
; CHECK-NEXT: addi a0, a0, 1
150147
; CHECK-NEXT: andi a2, a1, 4
@@ -237,9 +234,8 @@ define <2 x i16> @expandload_v2i16(ptr %base, <2 x i16> %src0, <2 x i1> %mask) {
237234
; CHECK-NEXT: beqz a1, .LBB5_2
238235
; CHECK-NEXT: .LBB5_4: # %cond.load1
239236
; CHECK-NEXT: lh a0, 0(a0)
240-
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
241-
; CHECK-NEXT: vmv.s.x v9, a0
242237
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
238+
; CHECK-NEXT: vmv.s.x v9, a0
243239
; CHECK-NEXT: vslideup.vi v8, v9, 1
244240
; CHECK-NEXT: ret
245241
%res = call <2 x i16> @llvm.masked.expandload.v2i16(ptr align 2 %base, <2 x i1> %mask, <2 x i16> %src0)
@@ -274,9 +270,8 @@ define <4 x i16> @expandload_v4i16(ptr %base, <4 x i16> %src0, <4 x i1> %mask) {
274270
; CHECK-NEXT: beqz a2, .LBB6_2
275271
; CHECK-NEXT: .LBB6_6: # %cond.load1
276272
; CHECK-NEXT: lh a2, 0(a0)
277-
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
278-
; CHECK-NEXT: vmv.s.x v9, a2
279273
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
274+
; CHECK-NEXT: vmv.s.x v9, a2
280275
; CHECK-NEXT: vslideup.vi v8, v9, 1
281276
; CHECK-NEXT: addi a0, a0, 2
282277
; CHECK-NEXT: andi a2, a1, 4
@@ -339,9 +334,8 @@ define <8 x i16> @expandload_v8i16(ptr %base, <8 x i16> %src0, <8 x i1> %mask) {
339334
; CHECK-NEXT: beqz a2, .LBB7_2
340335
; CHECK-NEXT: .LBB7_10: # %cond.load1
341336
; CHECK-NEXT: lh a2, 0(a0)
342-
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
343-
; CHECK-NEXT: vmv.s.x v9, a2
344337
; CHECK-NEXT: vsetivli zero, 2, e16, m1, tu, ma
338+
; CHECK-NEXT: vmv.s.x v9, a2
345339
; CHECK-NEXT: vslideup.vi v8, v9, 1
346340
; CHECK-NEXT: addi a0, a0, 2
347341
; CHECK-NEXT: andi a2, a1, 4
@@ -434,9 +428,8 @@ define <2 x i32> @expandload_v2i32(ptr %base, <2 x i32> %src0, <2 x i1> %mask) {
434428
; CHECK-NEXT: beqz a1, .LBB9_2
435429
; CHECK-NEXT: .LBB9_4: # %cond.load1
436430
; CHECK-NEXT: lw a0, 0(a0)
437-
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
438-
; CHECK-NEXT: vmv.s.x v9, a0
439431
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
432+
; CHECK-NEXT: vmv.s.x v9, a0
440433
; CHECK-NEXT: vslideup.vi v8, v9, 1
441434
; CHECK-NEXT: ret
442435
%res = call <2 x i32> @llvm.masked.expandload.v2i32(ptr align 4 %base, <2 x i1> %mask, <2 x i32> %src0)
@@ -471,9 +464,8 @@ define <4 x i32> @expandload_v4i32(ptr %base, <4 x i32> %src0, <4 x i1> %mask) {
471464
; CHECK-NEXT: beqz a2, .LBB10_2
472465
; CHECK-NEXT: .LBB10_6: # %cond.load1
473466
; CHECK-NEXT: lw a2, 0(a0)
474-
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
475-
; CHECK-NEXT: vmv.s.x v9, a2
476467
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
468+
; CHECK-NEXT: vmv.s.x v9, a2
477469
; CHECK-NEXT: vslideup.vi v8, v9, 1
478470
; CHECK-NEXT: addi a0, a0, 4
479471
; CHECK-NEXT: andi a2, a1, 4
@@ -536,9 +528,8 @@ define <8 x i32> @expandload_v8i32(ptr %base, <8 x i32> %src0, <8 x i1> %mask) {
536528
; CHECK-NEXT: beqz a2, .LBB11_2
537529
; CHECK-NEXT: .LBB11_10: # %cond.load1
538530
; CHECK-NEXT: lw a2, 0(a0)
539-
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
540-
; CHECK-NEXT: vmv.s.x v10, a2
541531
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
532+
; CHECK-NEXT: vmv.s.x v10, a2
542533
; CHECK-NEXT: vslideup.vi v8, v10, 1
543534
; CHECK-NEXT: addi a0, a0, 4
544535
; CHECK-NEXT: andi a2, a1, 4
@@ -680,9 +671,8 @@ define <2 x i64> @expandload_v2i64(ptr %base, <2 x i64> %src0, <2 x i1> %mask) {
680671
; RV64-NEXT: beqz a1, .LBB13_2
681672
; RV64-NEXT: .LBB13_4: # %cond.load1
682673
; RV64-NEXT: ld a0, 0(a0)
683-
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
684-
; RV64-NEXT: vmv.s.x v9, a0
685674
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
675+
; RV64-NEXT: vmv.s.x v9, a0
686676
; RV64-NEXT: vslideup.vi v8, v9, 1
687677
; RV64-NEXT: ret
688678
%res = call <2 x i64> @llvm.masked.expandload.v2i64(ptr align 8 %base, <2 x i1> %mask, <2 x i64> %src0)
@@ -775,9 +765,8 @@ define <4 x i64> @expandload_v4i64(ptr %base, <4 x i64> %src0, <4 x i1> %mask) {
775765
; RV64-NEXT: beqz a2, .LBB14_2
776766
; RV64-NEXT: .LBB14_6: # %cond.load1
777767
; RV64-NEXT: ld a2, 0(a0)
778-
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
779-
; RV64-NEXT: vmv.s.x v10, a2
780768
; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
769+
; RV64-NEXT: vmv.s.x v10, a2
781770
; RV64-NEXT: vslideup.vi v8, v10, 1
782771
; RV64-NEXT: addi a0, a0, 8
783772
; RV64-NEXT: andi a2, a1, 4
@@ -954,9 +943,8 @@ define <8 x i64> @expandload_v8i64(ptr %base, <8 x i64> %src0, <8 x i1> %mask) {
954943
; RV64-NEXT: beqz a2, .LBB15_2
955944
; RV64-NEXT: .LBB15_10: # %cond.load1
956945
; RV64-NEXT: ld a2, 0(a0)
957-
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
958-
; RV64-NEXT: vmv.s.x v12, a2
959946
; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
947+
; RV64-NEXT: vmv.s.x v12, a2
960948
; RV64-NEXT: vslideup.vi v8, v12, 1
961949
; RV64-NEXT: addi a0, a0, 8
962950
; RV64-NEXT: andi a2, a1, 4

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x,
3939
; CHECK-NEXT: vmul.vx v14, v12, a0
4040
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4141
; CHECK-NEXT: vrgatherei16.vv v12, v8, v14
42-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
42+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4343
; CHECK-NEXT: vmv.v.i v0, 12
4444
; CHECK-NEXT: vadd.vi v8, v14, -14
4545
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu

0 commit comments

Comments
 (0)