Skip to content

Commit 6da5968

Browse files
authored
[RISCV] Lower scalar_to_vector for supported FP types (#114340)
In https://reviews.llvm.org/D147608 we added custom lowering for integers, but inadvertently also marked it as custom for scalable FP vectors despite not handling it. This adds handling for floats and marks it as custom lowered for fixed-length FP vectors too. Note that this doesn't handle bf16 or f16 vectors that would need promotion, but these scalar_to_vector nodes seem to be emitted when expanding them.
1 parent a33fd61 commit 6da5968

File tree

4 files changed

+146
-43
lines changed

4 files changed

+146
-43
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1403,7 +1403,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
14031403
}
14041404

14051405
setOperationAction({ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE,
1406-
ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
1406+
ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
1407+
ISD::SCALAR_TO_VECTOR},
14071408
VT, Custom);
14081409

14091410
setOperationAction(
@@ -6511,9 +6512,16 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
65116512
if (VT.isFixedLengthVector())
65126513
ContainerVT = getContainerForFixedLengthVector(VT);
65136514
SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6514-
Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6515-
SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6516-
DAG.getUNDEF(ContainerVT), Scalar, VL);
6515+
6516+
SDValue V;
6517+
if (VT.isFloatingPoint()) {
6518+
V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6519+
DAG.getUNDEF(ContainerVT), Scalar, VL);
6520+
} else {
6521+
Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6522+
V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6523+
DAG.getUNDEF(ContainerVT), Scalar, VL);
6524+
}
65176525
if (VT.isFixedLengthVector())
65186526
V = convertFromScalableVector(VT, V, DAG, Subtarget);
65196527
return V;
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
4+
5+
define <8 x float> @fpext_v8bf16(<8 x bfloat> %x) {
6+
; CHECK-LABEL: fpext_v8bf16:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: fmv.x.w a0, fa0
9+
; CHECK-NEXT: fmv.x.w a1, fa1
10+
; CHECK-NEXT: fmv.x.w a2, fa2
11+
; CHECK-NEXT: fmv.x.w a3, fa3
12+
; CHECK-NEXT: fmv.x.w a4, fa4
13+
; CHECK-NEXT: fmv.x.w a5, fa5
14+
; CHECK-NEXT: fmv.x.w a6, fa6
15+
; CHECK-NEXT: fmv.x.w a7, fa7
16+
; CHECK-NEXT: slli a7, a7, 16
17+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
18+
; CHECK-NEXT: vmv.s.x v8, a7
19+
; CHECK-NEXT: slli a6, a6, 16
20+
; CHECK-NEXT: vmv.s.x v9, a6
21+
; CHECK-NEXT: vslideup.vi v9, v8, 1
22+
; CHECK-NEXT: slli a5, a5, 16
23+
; CHECK-NEXT: vmv.s.x v8, a5
24+
; CHECK-NEXT: slli a4, a4, 16
25+
; CHECK-NEXT: vmv.s.x v10, a4
26+
; CHECK-NEXT: vslideup.vi v10, v8, 1
27+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
28+
; CHECK-NEXT: vslideup.vi v10, v9, 2
29+
; CHECK-NEXT: slli a3, a3, 16
30+
; CHECK-NEXT: vmv.s.x v8, a3
31+
; CHECK-NEXT: slli a2, a2, 16
32+
; CHECK-NEXT: vmv.s.x v9, a2
33+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
34+
; CHECK-NEXT: vslideup.vi v9, v8, 1
35+
; CHECK-NEXT: slli a1, a1, 16
36+
; CHECK-NEXT: vmv.s.x v11, a1
37+
; CHECK-NEXT: slli a0, a0, 16
38+
; CHECK-NEXT: vmv.s.x v8, a0
39+
; CHECK-NEXT: vslideup.vi v8, v11, 1
40+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
41+
; CHECK-NEXT: vslideup.vi v8, v9, 2
42+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
43+
; CHECK-NEXT: vslideup.vi v8, v10, 4
44+
; CHECK-NEXT: ret
45+
%y = fpext <8 x bfloat> %x to <8 x float>
46+
ret <8 x float> %y
47+
}
48+
49+
define <8 x float> @fpext_v8f16(<8 x bfloat> %x) {
50+
; CHECK-LABEL: fpext_v8f16:
51+
; CHECK: # %bb.0:
52+
; CHECK-NEXT: fmv.x.w a0, fa0
53+
; CHECK-NEXT: fmv.x.w a1, fa1
54+
; CHECK-NEXT: fmv.x.w a2, fa2
55+
; CHECK-NEXT: fmv.x.w a3, fa3
56+
; CHECK-NEXT: fmv.x.w a4, fa4
57+
; CHECK-NEXT: fmv.x.w a5, fa5
58+
; CHECK-NEXT: fmv.x.w a6, fa6
59+
; CHECK-NEXT: fmv.x.w a7, fa7
60+
; CHECK-NEXT: slli a7, a7, 16
61+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
62+
; CHECK-NEXT: vmv.s.x v8, a7
63+
; CHECK-NEXT: slli a6, a6, 16
64+
; CHECK-NEXT: vmv.s.x v9, a6
65+
; CHECK-NEXT: vslideup.vi v9, v8, 1
66+
; CHECK-NEXT: slli a5, a5, 16
67+
; CHECK-NEXT: vmv.s.x v8, a5
68+
; CHECK-NEXT: slli a4, a4, 16
69+
; CHECK-NEXT: vmv.s.x v10, a4
70+
; CHECK-NEXT: vslideup.vi v10, v8, 1
71+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
72+
; CHECK-NEXT: vslideup.vi v10, v9, 2
73+
; CHECK-NEXT: slli a3, a3, 16
74+
; CHECK-NEXT: vmv.s.x v8, a3
75+
; CHECK-NEXT: slli a2, a2, 16
76+
; CHECK-NEXT: vmv.s.x v9, a2
77+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
78+
; CHECK-NEXT: vslideup.vi v9, v8, 1
79+
; CHECK-NEXT: slli a1, a1, 16
80+
; CHECK-NEXT: vmv.s.x v11, a1
81+
; CHECK-NEXT: slli a0, a0, 16
82+
; CHECK-NEXT: vmv.s.x v8, a0
83+
; CHECK-NEXT: vslideup.vi v8, v11, 1
84+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
85+
; CHECK-NEXT: vslideup.vi v8, v9, 2
86+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
87+
; CHECK-NEXT: vslideup.vi v8, v10, 4
88+
; CHECK-NEXT: ret
89+
%y = fpext <8 x bfloat> %x to <8 x float>
90+
ret <8 x float> %y
91+
}
92+

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -412,30 +412,20 @@ declare <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i7(<1 x i7>, me
412412
define <1 x half> @vsitofp_v1i7_v1f16(<1 x i7> %va) strictfp {
413413
; RV32-LABEL: vsitofp_v1i7_v1f16:
414414
; RV32: # %bb.0:
415-
; RV32-NEXT: addi sp, sp, -16
416-
; RV32-NEXT: .cfi_def_cfa_offset 16
417415
; RV32-NEXT: slli a0, a0, 25
418416
; RV32-NEXT: srai a0, a0, 25
419417
; RV32-NEXT: fcvt.h.w fa5, a0
420-
; RV32-NEXT: fsh fa5, 14(sp)
421-
; RV32-NEXT: addi a0, sp, 14
422-
; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
423-
; RV32-NEXT: vle16.v v8, (a0)
424-
; RV32-NEXT: addi sp, sp, 16
418+
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
419+
; RV32-NEXT: vfmv.s.f v8, fa5
425420
; RV32-NEXT: ret
426421
;
427422
; RV64-LABEL: vsitofp_v1i7_v1f16:
428423
; RV64: # %bb.0:
429-
; RV64-NEXT: addi sp, sp, -16
430-
; RV64-NEXT: .cfi_def_cfa_offset 16
431424
; RV64-NEXT: slli a0, a0, 57
432425
; RV64-NEXT: srai a0, a0, 57
433426
; RV64-NEXT: fcvt.h.w fa5, a0
434-
; RV64-NEXT: fsh fa5, 14(sp)
435-
; RV64-NEXT: addi a0, sp, 14
436-
; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
437-
; RV64-NEXT: vle16.v v8, (a0)
438-
; RV64-NEXT: addi sp, sp, 16
427+
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
428+
; RV64-NEXT: vfmv.s.f v8, fa5
439429
; RV64-NEXT: ret
440430
%evec = call <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i7(<1 x i7> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
441431
ret <1 x half> %evec
@@ -445,15 +435,10 @@ declare <1 x half> @llvm.experimental.constrained.uitofp.v1f16.v1i7(<1 x i7>, me
445435
define <1 x half> @vuitofp_v1i7_v1f16(<1 x i7> %va) strictfp {
446436
; CHECK-LABEL: vuitofp_v1i7_v1f16:
447437
; CHECK: # %bb.0:
448-
; CHECK-NEXT: addi sp, sp, -16
449-
; CHECK-NEXT: .cfi_def_cfa_offset 16
450438
; CHECK-NEXT: andi a0, a0, 127
451439
; CHECK-NEXT: fcvt.h.wu fa5, a0
452-
; CHECK-NEXT: fsh fa5, 14(sp)
453-
; CHECK-NEXT: addi a0, sp, 14
454-
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
455-
; CHECK-NEXT: vle16.v v8, (a0)
456-
; CHECK-NEXT: addi sp, sp, 16
440+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
441+
; CHECK-NEXT: vfmv.s.f v8, fa5
457442
; CHECK-NEXT: ret
458443
%evec = call <1 x half> @llvm.experimental.constrained.uitofp.v1f16.v1i7(<1 x i7> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
459444
ret <1 x half> %evec

llvm/test/CodeGen/RISCV/rvv/pr63596.ll

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,36 +9,54 @@ define <4 x float> @foo(ptr %0) nounwind {
99
; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1010
; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1111
; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
12+
; CHECK-NEXT: csrr a1, vlenb
13+
; CHECK-NEXT: slli a1, a1, 1
14+
; CHECK-NEXT: sub sp, sp, a1
1215
; CHECK-NEXT: lhu s0, 0(a0)
1316
; CHECK-NEXT: lhu s1, 2(a0)
1417
; CHECK-NEXT: lhu s2, 4(a0)
1518
; CHECK-NEXT: lhu a0, 6(a0)
1619
; CHECK-NEXT: fmv.w.x fa0, a0
1720
; CHECK-NEXT: call __extendhfsf2
18-
; CHECK-NEXT: fsw fa0, 4(sp)
19-
; CHECK-NEXT: fmv.w.x fa0, s2
21+
; CHECK-NEXT: fmv.w.x fa5, s2
22+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
23+
; CHECK-NEXT: vfmv.s.f v8, fa0
24+
; CHECK-NEXT: addi a0, sp, 16
25+
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
26+
; CHECK-NEXT: fmv.s fa0, fa5
2027
; CHECK-NEXT: call __extendhfsf2
21-
; CHECK-NEXT: fsw fa0, 12(sp)
28+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
29+
; CHECK-NEXT: vfmv.s.f v8, fa0
30+
; CHECK-NEXT: addi a0, sp, 16
31+
; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
32+
; CHECK-NEXT: vslideup.vi v8, v9, 1
33+
; CHECK-NEXT: csrr a0, vlenb
34+
; CHECK-NEXT: add a0, sp, a0
35+
; CHECK-NEXT: addi a0, a0, 16
36+
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
2237
; CHECK-NEXT: fmv.w.x fa0, s1
2338
; CHECK-NEXT: call __extendhfsf2
24-
; CHECK-NEXT: fsw fa0, 8(sp)
25-
; CHECK-NEXT: fmv.w.x fa0, s0
39+
; CHECK-NEXT: fmv.w.x fa5, s0
40+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
41+
; CHECK-NEXT: vfmv.s.f v8, fa0
42+
; CHECK-NEXT: addi a0, sp, 16
43+
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
44+
; CHECK-NEXT: fmv.s fa0, fa5
2645
; CHECK-NEXT: call __extendhfsf2
27-
; CHECK-NEXT: fsw fa0, 0(sp)
28-
; CHECK-NEXT: addi a0, sp, 4
29-
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
30-
; CHECK-NEXT: vle32.v v9, (a0)
31-
; CHECK-NEXT: addi a0, sp, 12
32-
; CHECK-NEXT: vle32.v v10, (a0)
33-
; CHECK-NEXT: addi a0, sp, 8
34-
; CHECK-NEXT: vle32.v v11, (a0)
35-
; CHECK-NEXT: mv a0, sp
36-
; CHECK-NEXT: vle32.v v8, (a0)
3746
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
38-
; CHECK-NEXT: vslideup.vi v10, v9, 1
39-
; CHECK-NEXT: vslideup.vi v8, v11, 1
47+
; CHECK-NEXT: vfmv.s.f v8, fa0
48+
; CHECK-NEXT: addi a0, sp, 16
49+
; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
50+
; CHECK-NEXT: vslideup.vi v8, v9, 1
51+
; CHECK-NEXT: csrr a0, vlenb
52+
; CHECK-NEXT: add a0, sp, a0
53+
; CHECK-NEXT: addi a0, a0, 16
54+
; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
4055
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
41-
; CHECK-NEXT: vslideup.vi v8, v10, 2
56+
; CHECK-NEXT: vslideup.vi v8, v9, 2
57+
; CHECK-NEXT: csrr a0, vlenb
58+
; CHECK-NEXT: slli a0, a0, 1
59+
; CHECK-NEXT: add sp, sp, a0
4260
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
4361
; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
4462
; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload

0 commit comments

Comments
 (0)