Skip to content

Commit 60641b0

Browse files
[LLVM][SVE] Extend dup(extract_elt(v,i)) isel patterns to cover more combinations. (llvm#115189)
Adds missing bfloat patterns for unpacked scalable vectors. Adds patterns for splatting extracts from fixed length vectors.
1 parent bc368e4 commit 60641b0

File tree

2 files changed

+624
-49
lines changed

2 files changed

+624
-49
lines changed

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 104 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,57 @@ class SVEType<ValueType VT> {
5959
!eq(VT, nxv8f16): nxv2f16,
6060
!eq(VT, nxv8bf16): nxv2bf16,
6161
true : untyped);
62+
63+
// The 64-bit vector subreg of VT.
64+
ValueType DSub = !cond(
65+
!eq(VT, nxv16i8): v8i8,
66+
!eq(VT, nxv8i16): v4i16,
67+
!eq(VT, nxv4i32): v2i32,
68+
!eq(VT, nxv2i64): v1i64,
69+
!eq(VT, nxv2f16): v4f16,
70+
!eq(VT, nxv4f16): v4f16,
71+
!eq(VT, nxv8f16): v4f16,
72+
!eq(VT, nxv2f32): v2f32,
73+
!eq(VT, nxv4f32): v2f32,
74+
!eq(VT, nxv2f64): v1f64,
75+
!eq(VT, nxv2bf16): v4bf16,
76+
!eq(VT, nxv4bf16): v4bf16,
77+
!eq(VT, nxv8bf16): v4bf16,
78+
true : untyped);
79+
80+
// The 128-bit vector subreg of VT.
81+
ValueType ZSub = !cond(
82+
!eq(VT, nxv16i8): v16i8,
83+
!eq(VT, nxv8i16): v8i16,
84+
!eq(VT, nxv4i32): v4i32,
85+
!eq(VT, nxv2i64): v2i64,
86+
!eq(VT, nxv2f16): v8f16,
87+
!eq(VT, nxv4f16): v8f16,
88+
!eq(VT, nxv8f16): v8f16,
89+
!eq(VT, nxv2f32): v4f32,
90+
!eq(VT, nxv4f32): v4f32,
91+
!eq(VT, nxv2f64): v2f64,
92+
!eq(VT, nxv2bf16): v8bf16,
93+
!eq(VT, nxv4bf16): v8bf16,
94+
!eq(VT, nxv8bf16): v8bf16,
95+
true : untyped);
96+
97+
// The legal scalar used to hold a vector element.
98+
ValueType EltAsScalar = !cond(
99+
!eq(VT, nxv16i8): i32,
100+
!eq(VT, nxv8i16): i32,
101+
!eq(VT, nxv4i32): i32,
102+
!eq(VT, nxv2i64): i64,
103+
!eq(VT, nxv2f16): f16,
104+
!eq(VT, nxv4f16): f16,
105+
!eq(VT, nxv8f16): f16,
106+
!eq(VT, nxv2f32): f32,
107+
!eq(VT, nxv4f32): f32,
108+
!eq(VT, nxv2f64): f64,
109+
!eq(VT, nxv2bf16): bf16,
110+
!eq(VT, nxv4bf16): bf16,
111+
!eq(VT, nxv8bf16): bf16,
112+
true : untyped);
62113
}
63114

64115
def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
@@ -1402,29 +1453,61 @@ multiclass sve_int_perm_dup_i<string asm> {
14021453
def : InstAlias<"mov $Zd, $Qn",
14031454
(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
14041455

1405-
// Duplicate extracted element of vector into all vector elements
1456+
// Duplicate an extracted vector element across a vector.
1457+
14061458
def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))),
14071459
(!cast<Instruction>(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>;
1408-
def : Pat<(nxv8i16 (splat_vector (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1409-
(!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1410-
def : Pat<(nxv4i32 (splat_vector (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1411-
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1412-
def : Pat<(nxv2i64 (splat_vector (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1413-
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1414-
def : Pat<(nxv8f16 (splat_vector (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1415-
(!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1416-
def : Pat<(nxv8bf16 (splat_vector (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1417-
(!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1418-
def : Pat<(nxv4f16 (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1419-
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1420-
def : Pat<(nxv2f16 (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1421-
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1422-
def : Pat<(nxv4f32 (splat_vector (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1423-
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1424-
def : Pat<(nxv2f32 (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1425-
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1426-
def : Pat<(nxv2f64 (splat_vector (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1427-
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1460+
def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v16i8 V128:$vec), sve_elm_idx_extdup_b:$index)))),
1461+
(!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_b:$index)>;
1462+
def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v8i8 V64:$vec), sve_elm_idx_extdup_b:$index)))),
1463+
(!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_b:$index)>;
1464+
1465+
foreach VT = [nxv8i16, nxv2f16, nxv4f16, nxv8f16, nxv2bf16, nxv4bf16, nxv8bf16] in {
1466+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
1467+
(!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
1468+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_h:$index)))),
1469+
(!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_h:$index)>;
1470+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_h:$index)))),
1471+
(!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_h:$index)>;
1472+
}
1473+
1474+
foreach VT = [nxv4i32, nxv2f32, nxv4f32 ] in {
1475+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1476+
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1477+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_s:$index)))),
1478+
(!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_s:$index)>;
1479+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_s:$index)))),
1480+
(!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_s:$index)>;
1481+
}
1482+
1483+
foreach VT = [nxv2i64, nxv2f64] in {
1484+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1485+
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1486+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_d:$index)))),
1487+
(!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_d:$index)>;
1488+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_d:$index)))),
1489+
(!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_d:$index)>;
1490+
}
1491+
1492+
// When extracting from an unpacked vector the index must be scaled to account
1493+
// for the "holes" in the underlying packed vector type. We get the scaling
1494+
// for free by "promoting" the element type to one whose underlying vector
1495+
// type is packed. This is only valid when extracting from a vector whose
1496+
// length is the same or bigger than the result of the splat.
1497+
1498+
foreach VT = [nxv4f16, nxv4bf16] in {
1499+
def : Pat<(SVEType<VT>.HalfLength (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1500+
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1501+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
1502+
(!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
1503+
}
1504+
1505+
foreach VT = [nxv2f16, nxv2f32, nxv2bf16] in {
1506+
def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
1507+
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
1508+
}
1509+
1510+
// Duplicate an indexed 128-bit segment across a vector.
14281511

14291512
def : Pat<(nxv16i8 (AArch64duplane128 nxv16i8:$Op1, i64:$imm)),
14301513
(!cast<Instruction>(NAME # _Q) $Op1, $imm)>;

0 commit comments

Comments
 (0)