Skip to content

Commit 61ae7e4

Browse files
authored
[RISCV] Select pattern (shl (sext_vl/zext_vl), 1) to VWADD/VWADDU. (#82225)
Previously, we already had similar selection pattern for (shl (ext)) and (shl_vl (ext_vl)).
1 parent 9c6df7d commit 61ae7e4

File tree

2 files changed

+126
-16
lines changed

2 files changed

+126
-16
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -976,6 +976,16 @@ foreach vtiToWti = AllWidenableIntVectors in {
976976
(!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX)
977977
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
978978
vti.AVL, vti.Log2SEW, TA_MA)>;
979+
def : Pat<(shl (wti.Vector (riscv_sext_vl_oneuse (vti.Vector vti.RegClass:$rs1), (vti.Mask V0), VLOpFrag)),
980+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
981+
(!cast<Instruction>("PseudoVWADD_VV_"#vti.LMul.MX#"_MASK")
982+
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
983+
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
984+
def : Pat<(shl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs1), (vti.Mask V0), VLOpFrag)),
985+
(wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
986+
(!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX#"_MASK")
987+
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
988+
(vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
979989
}
980990
}
981991

llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll

Lines changed: 116 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,106 @@ define void @vpscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vsca
417417
ret void
418418
}
419419

420+
declare <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
421+
define void @vpscatter_baseidx_vpsext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
422+
; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16:
423+
; RV32: # %bb.0:
424+
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
425+
; RV32-NEXT: vwadd.vv v12, v10, v10, v0.t
426+
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
427+
; RV32-NEXT: ret
428+
;
429+
; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16:
430+
; RV64: # %bb.0:
431+
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
432+
; RV64-NEXT: vsext.vf2 v12, v10, v0.t
433+
; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
434+
; RV64-NEXT: vwadd.vv v16, v12, v12
435+
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
436+
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
437+
; RV64-NEXT: ret
438+
%eidxs = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
439+
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs
440+
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
441+
ret void
442+
}
443+
444+
declare <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
445+
define void @vpscatter_baseidx_vpzext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
446+
; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16:
447+
; RV32: # %bb.0:
448+
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
449+
; RV32-NEXT: vwaddu.vv v12, v10, v10, v0.t
450+
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
451+
; RV32-NEXT: ret
452+
;
453+
; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16:
454+
; RV64: # %bb.0:
455+
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
456+
; RV64-NEXT: vzext.vf2 v12, v10, v0.t
457+
; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma
458+
; RV64-NEXT: vwadd.vv v16, v12, v12
459+
; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
460+
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
461+
; RV64-NEXT: ret
462+
%eidxs = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
463+
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs
464+
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
465+
ret void
466+
}
467+
468+
declare <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
469+
define void @vpscatter_baseidx_vpsext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
470+
; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16:
471+
; RV32: # %bb.0:
472+
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
473+
; RV32-NEXT: vsext.vf2 v16, v12, v0.t
474+
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
475+
; RV32-NEXT: vnsrl.wi v12, v16, 0
476+
; RV32-NEXT: vadd.vv v12, v12, v12
477+
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
478+
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
479+
; RV32-NEXT: ret
480+
;
481+
; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16:
482+
; RV64: # %bb.0:
483+
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
484+
; RV64-NEXT: vwadd.vv v16, v12, v12, v0.t
485+
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
486+
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
487+
; RV64-NEXT: ret
488+
%eidxs = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl)
489+
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs
490+
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
491+
ret void
492+
}
493+
494+
declare <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
495+
define void @vpscatter_baseidx_vpzext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
496+
; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16:
497+
; RV32: # %bb.0:
498+
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
499+
; RV32-NEXT: vzext.vf2 v16, v12, v0.t
500+
; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma
501+
; RV32-NEXT: vnsrl.wi v12, v16, 0
502+
; RV32-NEXT: vadd.vv v12, v12, v12
503+
; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
504+
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
505+
; RV32-NEXT: ret
506+
;
507+
; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16:
508+
; RV64: # %bb.0:
509+
; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
510+
; RV64-NEXT: vwaddu.vv v16, v12, v12, v0.t
511+
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
512+
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
513+
; RV64-NEXT: ret
514+
%eidxs = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl)
515+
%ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs
516+
call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
517+
ret void
518+
}
519+
420520
declare void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
421521

422522
define void @vpscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -2029,10 +2129,10 @@ define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x ptr>
20292129
; RV32-NEXT: vl8re32.v v24, (a0)
20302130
; RV32-NEXT: csrr a0, vlenb
20312131
; RV32-NEXT: mv a2, a1
2032-
; RV32-NEXT: bltu a1, a0, .LBB95_2
2132+
; RV32-NEXT: bltu a1, a0, .LBB99_2
20332133
; RV32-NEXT: # %bb.1:
20342134
; RV32-NEXT: mv a2, a0
2035-
; RV32-NEXT: .LBB95_2:
2135+
; RV32-NEXT: .LBB99_2:
20362136
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
20372137
; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t
20382138
; RV32-NEXT: sub a2, a1, a0
@@ -2062,10 +2162,10 @@ define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x ptr>
20622162
; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
20632163
; RV64-NEXT: vl8re64.v v24, (a0)
20642164
; RV64-NEXT: mv a0, a2
2065-
; RV64-NEXT: bltu a2, a1, .LBB95_2
2165+
; RV64-NEXT: bltu a2, a1, .LBB99_2
20662166
; RV64-NEXT: # %bb.1:
20672167
; RV64-NEXT: mv a0, a1
2068-
; RV64-NEXT: .LBB95_2:
2168+
; RV64-NEXT: .LBB99_2:
20692169
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
20702170
; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t
20712171
; RV64-NEXT: sub a0, a2, a1
@@ -2097,10 +2197,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, pt
20972197
; RV32-NEXT: csrr a1, vlenb
20982198
; RV32-NEXT: vsll.vi v24, v24, 3
20992199
; RV32-NEXT: mv a3, a2
2100-
; RV32-NEXT: bltu a2, a1, .LBB96_2
2200+
; RV32-NEXT: bltu a2, a1, .LBB100_2
21012201
; RV32-NEXT: # %bb.1:
21022202
; RV32-NEXT: mv a3, a1
2103-
; RV32-NEXT: .LBB96_2:
2203+
; RV32-NEXT: .LBB100_2:
21042204
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
21052205
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
21062206
; RV32-NEXT: sub a3, a2, a1
@@ -2137,10 +2237,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, pt
21372237
; RV64-NEXT: vsext.vf4 v16, v24
21382238
; RV64-NEXT: vsll.vi v24, v16, 3
21392239
; RV64-NEXT: mv a3, a2
2140-
; RV64-NEXT: bltu a2, a1, .LBB96_2
2240+
; RV64-NEXT: bltu a2, a1, .LBB100_2
21412241
; RV64-NEXT: # %bb.1:
21422242
; RV64-NEXT: mv a3, a1
2143-
; RV64-NEXT: .LBB96_2:
2243+
; RV64-NEXT: .LBB100_2:
21442244
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
21452245
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
21462246
; RV64-NEXT: sub a3, a2, a1
@@ -2178,10 +2278,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
21782278
; RV32-NEXT: csrr a1, vlenb
21792279
; RV32-NEXT: vsll.vi v24, v24, 3
21802280
; RV32-NEXT: mv a3, a2
2181-
; RV32-NEXT: bltu a2, a1, .LBB97_2
2281+
; RV32-NEXT: bltu a2, a1, .LBB101_2
21822282
; RV32-NEXT: # %bb.1:
21832283
; RV32-NEXT: mv a3, a1
2184-
; RV32-NEXT: .LBB97_2:
2284+
; RV32-NEXT: .LBB101_2:
21852285
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
21862286
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
21872287
; RV32-NEXT: sub a3, a2, a1
@@ -2218,10 +2318,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
22182318
; RV64-NEXT: vsext.vf4 v16, v24
22192319
; RV64-NEXT: vsll.vi v24, v16, 3
22202320
; RV64-NEXT: mv a3, a2
2221-
; RV64-NEXT: bltu a2, a1, .LBB97_2
2321+
; RV64-NEXT: bltu a2, a1, .LBB101_2
22222322
; RV64-NEXT: # %bb.1:
22232323
; RV64-NEXT: mv a3, a1
2224-
; RV64-NEXT: .LBB97_2:
2324+
; RV64-NEXT: .LBB101_2:
22252325
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
22262326
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
22272327
; RV64-NEXT: sub a3, a2, a1
@@ -2260,10 +2360,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
22602360
; RV32-NEXT: csrr a1, vlenb
22612361
; RV32-NEXT: vsll.vi v24, v24, 3
22622362
; RV32-NEXT: mv a3, a2
2263-
; RV32-NEXT: bltu a2, a1, .LBB98_2
2363+
; RV32-NEXT: bltu a2, a1, .LBB102_2
22642364
; RV32-NEXT: # %bb.1:
22652365
; RV32-NEXT: mv a3, a1
2266-
; RV32-NEXT: .LBB98_2:
2366+
; RV32-NEXT: .LBB102_2:
22672367
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
22682368
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
22692369
; RV32-NEXT: sub a3, a2, a1
@@ -2285,10 +2385,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
22852385
; RV64-NEXT: csrr a1, vlenb
22862386
; RV64-NEXT: vsll.vi v24, v24, 3
22872387
; RV64-NEXT: mv a3, a2
2288-
; RV64-NEXT: bltu a2, a1, .LBB98_2
2388+
; RV64-NEXT: bltu a2, a1, .LBB102_2
22892389
; RV64-NEXT: # %bb.1:
22902390
; RV64-NEXT: mv a3, a1
2291-
; RV64-NEXT: .LBB98_2:
2391+
; RV64-NEXT: .LBB102_2:
22922392
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
22932393
; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t
22942394
; RV64-NEXT: sub a3, a2, a1

0 commit comments

Comments
 (0)