Skip to content

Commit 464ea88

Browse files
authored
[LoongArch][CodeGen] Implement 128-bit and 256-bit vector shuffle. (#100054)
[LoongArch][CodeGen] Implement 128-bit and 256-bit vector shuffle operations. In LoongArch, shuffle operations can be divided into two types: - Single-vector shuffle: Shuffle using only one vector, with the other vector being `undef` or not selected by mask. This can be expanded to instructions such as `vreplvei` and `vshuf4i`. - Two-vector shuffle: Shuflle using two vectors. This can be expanded to instructions like `vilv[l/h]`, `vpack[ev/od]`, `vpick[ev/od]` and the basic `vshuf`. In the future, more optimizations may be added, such as handling 1-bit vectors and processing single element patterns, etc.
1 parent 73ffeea commit 464ea88

16 files changed

+2161
-3
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 930 additions & 3 deletions
Large diffs are not rendered by default.

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,16 @@ enum NodeType : unsigned {
120120

121121
// Vector Shuffle
122122
VREPLVE,
123+
VSHUF,
124+
VPICKEV,
125+
VPICKOD,
126+
VPACKEV,
127+
VPACKOD,
128+
VILVL,
129+
VILVH,
130+
VSHUF4I,
131+
VREPLVEI,
132+
XVPERMI,
123133

124134
// Extended vector element extraction
125135
VPICK_SEXT_ELT,

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_loongArchV1RUimm>;
14+
1315
def lasxsplati8
1416
: PatFrag<(ops node:$e0),
1517
(v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0,
@@ -1575,6 +1577,134 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk),
15751577
def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk),
15761578
(XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>;
15771579

1580+
// XVSHUF_{B/H/W/D}
1581+
def : Pat<(loongarch_vshuf v32i8:$xa, v32i8:$xj, v32i8:$xk),
1582+
(XVSHUF_B v32i8:$xj, v32i8:$xk, v32i8:$xa)>;
1583+
def : Pat<(loongarch_vshuf v16i16:$xd, v16i16:$xj, v16i16:$xk),
1584+
(XVSHUF_H v16i16:$xd, v16i16:$xj, v16i16:$xk)>;
1585+
def : Pat<(loongarch_vshuf v8i32:$xd, v8i32:$xj, v8i32:$xk),
1586+
(XVSHUF_W v8i32:$xd, v8i32:$xj, v8i32:$xk)>;
1587+
def : Pat<(loongarch_vshuf v4i64:$xd, v4i64:$xj, v4i64:$xk),
1588+
(XVSHUF_D v4i64:$xd, v4i64:$xj, v4i64:$xk)>;
1589+
def : Pat<(loongarch_vshuf v8i32:$xd, v8f32:$xj, v8f32:$xk),
1590+
(XVSHUF_W v8i32:$xd, v8f32:$xj, v8f32:$xk)>;
1591+
def : Pat<(loongarch_vshuf v4i64:$xd, v4f64:$xj, v4f64:$xk),
1592+
(XVSHUF_D v4i64:$xd, v4f64:$xj, v4f64:$xk)>;
1593+
1594+
// XVPICKEV_{B/H/W/D}
1595+
def : Pat<(loongarch_vpickev v32i8:$xj, v32i8:$xk),
1596+
(XVPICKEV_B v32i8:$xj, v32i8:$xk)>;
1597+
def : Pat<(loongarch_vpickev v16i16:$xj, v16i16:$xk),
1598+
(XVPICKEV_H v16i16:$xj, v16i16:$xk)>;
1599+
def : Pat<(loongarch_vpickev v8i32:$xj, v8i32:$xk),
1600+
(XVPICKEV_W v8i32:$xj, v8i32:$xk)>;
1601+
def : Pat<(loongarch_vpickev v4i64:$xj, v4i64:$xk),
1602+
(XVPICKEV_D v4i64:$xj, v4i64:$xk)>;
1603+
def : Pat<(loongarch_vpickev v8f32:$xj, v8f32:$xk),
1604+
(XVPICKEV_W v8f32:$xj, v8f32:$xk)>;
1605+
def : Pat<(loongarch_vpickev v4f64:$xj, v4f64:$xk),
1606+
(XVPICKEV_D v4f64:$xj, v4f64:$xk)>;
1607+
1608+
// XVPICKOD_{B/H/W/D}
1609+
def : Pat<(loongarch_vpickod v32i8:$xj, v32i8:$xk),
1610+
(XVPICKOD_B v32i8:$xj, v32i8:$xk)>;
1611+
def : Pat<(loongarch_vpickod v16i16:$xj, v16i16:$xk),
1612+
(XVPICKOD_H v16i16:$xj, v16i16:$xk)>;
1613+
def : Pat<(loongarch_vpickod v8i32:$xj, v8i32:$xk),
1614+
(XVPICKOD_W v8i32:$xj, v8i32:$xk)>;
1615+
def : Pat<(loongarch_vpickod v4i64:$xj, v4i64:$xk),
1616+
(XVPICKOD_D v4i64:$xj, v4i64:$xk)>;
1617+
def : Pat<(loongarch_vpickod v8f32:$xj, v8f32:$xk),
1618+
(XVPICKOD_W v8f32:$xj, v8f32:$xk)>;
1619+
def : Pat<(loongarch_vpickod v4f64:$xj, v4f64:$xk),
1620+
(XVPICKOD_D v4f64:$xj, v4f64:$xk)>;
1621+
1622+
// XVPACKEV_{B/H/W/D}
1623+
def : Pat<(loongarch_vpackev v32i8:$xj, v32i8:$xk),
1624+
(XVPACKEV_B v32i8:$xj, v32i8:$xk)>;
1625+
def : Pat<(loongarch_vpackev v16i16:$xj, v16i16:$xk),
1626+
(XVPACKEV_H v16i16:$xj, v16i16:$xk)>;
1627+
def : Pat<(loongarch_vpackev v8i32:$xj, v8i32:$xk),
1628+
(XVPACKEV_W v8i32:$xj, v8i32:$xk)>;
1629+
def : Pat<(loongarch_vpackev v4i64:$xj, v4i64:$xk),
1630+
(XVPACKEV_D v4i64:$xj, v4i64:$xk)>;
1631+
def : Pat<(loongarch_vpackev v8f32:$xj, v8f32:$xk),
1632+
(XVPACKEV_W v8f32:$xj, v8f32:$xk)>;
1633+
def : Pat<(loongarch_vpackev v4f64:$xj, v4f64:$xk),
1634+
(XVPACKEV_D v4f64:$xj, v4f64:$xk)>;
1635+
1636+
// XVPACKOD_{B/H/W/D}
1637+
def : Pat<(loongarch_vpackod v32i8:$xj, v32i8:$xk),
1638+
(XVPACKOD_B v32i8:$xj, v32i8:$xk)>;
1639+
def : Pat<(loongarch_vpackod v16i16:$xj, v16i16:$xk),
1640+
(XVPACKOD_H v16i16:$xj, v16i16:$xk)>;
1641+
def : Pat<(loongarch_vpackod v8i32:$xj, v8i32:$xk),
1642+
(XVPACKOD_W v8i32:$xj, v8i32:$xk)>;
1643+
def : Pat<(loongarch_vpackod v4i64:$xj, v4i64:$xk),
1644+
(XVPACKOD_D v4i64:$xj, v4i64:$xk)>;
1645+
def : Pat<(loongarch_vpackod v8f32:$xj, v8f32:$xk),
1646+
(XVPACKOD_W v8f32:$xj, v8f32:$xk)>;
1647+
def : Pat<(loongarch_vpackod v4f64:$xj, v4f64:$xk),
1648+
(XVPACKOD_D v4f64:$xj, v4f64:$xk)>;
1649+
1650+
// XVILVL_{B/H/W/D}
1651+
def : Pat<(loongarch_vilvl v32i8:$xj, v32i8:$xk),
1652+
(XVILVL_B v32i8:$xj, v32i8:$xk)>;
1653+
def : Pat<(loongarch_vilvl v16i16:$xj, v16i16:$xk),
1654+
(XVILVL_H v16i16:$xj, v16i16:$xk)>;
1655+
def : Pat<(loongarch_vilvl v8i32:$xj, v8i32:$xk),
1656+
(XVILVL_W v8i32:$xj, v8i32:$xk)>;
1657+
def : Pat<(loongarch_vilvl v4i64:$xj, v4i64:$xk),
1658+
(XVILVL_D v4i64:$xj, v4i64:$xk)>;
1659+
def : Pat<(loongarch_vilvl v8f32:$xj, v8f32:$xk),
1660+
(XVILVL_W v8f32:$xj, v8f32:$xk)>;
1661+
def : Pat<(loongarch_vilvl v4f64:$xj, v4f64:$xk),
1662+
(XVILVL_D v4f64:$xj, v4f64:$xk)>;
1663+
1664+
// XVILVH_{B/H/W/D}
1665+
def : Pat<(loongarch_vilvh v32i8:$xj, v32i8:$xk),
1666+
(XVILVH_B v32i8:$xj, v32i8:$xk)>;
1667+
def : Pat<(loongarch_vilvh v16i16:$xj, v16i16:$xk),
1668+
(XVILVH_H v16i16:$xj, v16i16:$xk)>;
1669+
def : Pat<(loongarch_vilvh v8i32:$xj, v8i32:$xk),
1670+
(XVILVH_W v8i32:$xj, v8i32:$xk)>;
1671+
def : Pat<(loongarch_vilvh v4i64:$xj, v4i64:$xk),
1672+
(XVILVH_D v4i64:$xj, v4i64:$xk)>;
1673+
def : Pat<(loongarch_vilvh v8f32:$xj, v8f32:$xk),
1674+
(XVILVH_W v8f32:$xj, v8f32:$xk)>;
1675+
def : Pat<(loongarch_vilvh v4f64:$xj, v4f64:$xk),
1676+
(XVILVH_D v4f64:$xj, v4f64:$xk)>;
1677+
1678+
// XVSHUF4I_{B/H/W}
1679+
def : Pat<(loongarch_vshuf4i v32i8:$xj, immZExt8:$ui8),
1680+
(XVSHUF4I_B v32i8:$xj, immZExt8:$ui8)>;
1681+
def : Pat<(loongarch_vshuf4i v16i16:$xj, immZExt8:$ui8),
1682+
(XVSHUF4I_H v16i16:$xj, immZExt8:$ui8)>;
1683+
def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8),
1684+
(XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>;
1685+
def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8),
1686+
(XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>;
1687+
1688+
// XVREPL128VEI_{B/H/W/D}
1689+
def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4),
1690+
(XVREPL128VEI_B v32i8:$xj, immZExt4:$ui4)>;
1691+
def : Pat<(loongarch_vreplvei v16i16:$xj, immZExt3:$ui3),
1692+
(XVREPL128VEI_H v16i16:$xj, immZExt3:$ui3)>;
1693+
def : Pat<(loongarch_vreplvei v8i32:$xj, immZExt2:$ui2),
1694+
(XVREPL128VEI_W v8i32:$xj, immZExt2:$ui2)>;
1695+
def : Pat<(loongarch_vreplvei v4i64:$xj, immZExt1:$ui1),
1696+
(XVREPL128VEI_D v4i64:$xj, immZExt1:$ui1)>;
1697+
def : Pat<(loongarch_vreplvei v8f32:$xj, immZExt2:$ui2),
1698+
(XVREPL128VEI_W v8f32:$xj, immZExt2:$ui2)>;
1699+
def : Pat<(loongarch_vreplvei v4f64:$xj, immZExt1:$ui1),
1700+
(XVREPL128VEI_D v4f64:$xj, immZExt1:$ui1)>;
1701+
1702+
// XVPERMI_D
1703+
def : Pat<(loongarch_xvpermi v4i64:$xj, immZExt8: $ui8),
1704+
(XVPERMI_D v4i64:$xj, immZExt8: $ui8)>;
1705+
def : Pat<(loongarch_xvpermi v4f64:$xj, immZExt8: $ui8),
1706+
(XVPERMI_D v4f64:$xj, immZExt8: $ui8)>;
1707+
15781708
// XVREPLVE0_{W/D}
15791709
def : Pat<(lasxsplatf32 FPR32:$fj),
15801710
(XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>;

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@ def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
1515
SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
1616
def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
1717

18+
def SDT_LoongArchVShuf : SDTypeProfile<1, 3, [SDTCisVec<0>,
19+
SDTCisInt<1>, SDTCisVec<1>,
20+
SDTCisSameAs<0, 2>,
21+
SDTCisSameAs<2, 3>]>;
22+
def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
23+
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
24+
def SDT_loongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
25+
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
26+
1827
// Target nodes.
1928
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
2029
def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO",
@@ -31,6 +40,23 @@ def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT",
3140
def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT",
3241
SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>;
3342

43+
def loongarch_vshuf: SDNode<"LoongArchISD::VSHUF", SDT_LoongArchVShuf>;
44+
def loongarch_vpickev: SDNode<"LoongArchISD::VPICKEV", SDT_LoongArchV2R>;
45+
def loongarch_vpickod: SDNode<"LoongArchISD::VPICKOD", SDT_LoongArchV2R>;
46+
def loongarch_vpackev: SDNode<"LoongArchISD::VPACKEV", SDT_LoongArchV2R>;
47+
def loongarch_vpackod: SDNode<"LoongArchISD::VPACKOD", SDT_LoongArchV2R>;
48+
def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
49+
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
50+
51+
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_loongArchV1RUimm>;
52+
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_loongArchV1RUimm>;
53+
54+
def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
55+
def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
56+
def immZExt3 : ImmLeaf<i64, [{return isUInt<3>(Imm);}]>;
57+
def immZExt4 : ImmLeaf<i64, [{return isUInt<4>(Imm);}]>;
58+
def immZExt8 : ImmLeaf<i64, [{return isUInt<8>(Imm);}]>;
59+
3460
class VecCond<SDPatternOperator OpNode, ValueType TyNode,
3561
RegisterClass RC = LSX128>
3662
: Pseudo<(outs GPR:$rd), (ins RC:$vj),
@@ -1682,6 +1708,128 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk),
16821708
def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk),
16831709
(VREPLVE_D v2i64:$vj, GRLenVT:$rk)>;
16841710

1711+
// VSHUF_{B/H/W/D}
1712+
def : Pat<(loongarch_vshuf v16i8:$va, v16i8:$vj, v16i8:$vk),
1713+
(VSHUF_B v16i8:$vj, v16i8:$vk, v16i8:$va)>;
1714+
def : Pat<(loongarch_vshuf v8i16:$vd, v8i16:$vj, v8i16:$vk),
1715+
(VSHUF_H v8i16:$vd, v8i16:$vj, v8i16:$vk)>;
1716+
def : Pat<(loongarch_vshuf v4i32:$vd, v4i32:$vj, v4i32:$vk),
1717+
(VSHUF_W v4i32:$vd, v4i32:$vj, v4i32:$vk)>;
1718+
def : Pat<(loongarch_vshuf v2i64:$vd, v2i64:$vj, v2i64:$vk),
1719+
(VSHUF_D v2i64:$vd, v2i64:$vj, v2i64:$vk)>;
1720+
def : Pat<(loongarch_vshuf v4i32:$vd, v4f32:$vj, v4f32:$vk),
1721+
(VSHUF_W v4i32:$vd, v4f32:$vj, v4f32:$vk)>;
1722+
def : Pat<(loongarch_vshuf v2i64:$vd, v2f64:$vj, v2f64:$vk),
1723+
(VSHUF_D v2i64:$vd, v2f64:$vj, v2f64:$vk)>;
1724+
1725+
// VPICKEV_{B/H/W/D}
1726+
def : Pat<(loongarch_vpickev v16i8:$vj, v16i8:$vk),
1727+
(VPICKEV_B v16i8:$vj, v16i8:$vk)>;
1728+
def : Pat<(loongarch_vpickev v8i16:$vj, v8i16:$vk),
1729+
(VPICKEV_H v8i16:$vj, v8i16:$vk)>;
1730+
def : Pat<(loongarch_vpickev v4i32:$vj, v4i32:$vk),
1731+
(VPICKEV_W v4i32:$vj, v4i32:$vk)>;
1732+
def : Pat<(loongarch_vpickev v2i64:$vj, v2i64:$vk),
1733+
(VPICKEV_D v2i64:$vj, v2i64:$vk)>;
1734+
def : Pat<(loongarch_vpickev v4f32:$vj, v4f32:$vk),
1735+
(VPICKEV_W v4f32:$vj, v4f32:$vk)>;
1736+
def : Pat<(loongarch_vpickev v2f64:$vj, v2f64:$vk),
1737+
(VPICKEV_D v2f64:$vj, v2f64:$vk)>;
1738+
1739+
// VPICKOD_{B/H/W/D}
1740+
def : Pat<(loongarch_vpickod v16i8:$vj, v16i8:$vk),
1741+
(VPICKOD_B v16i8:$vj, v16i8:$vk)>;
1742+
def : Pat<(loongarch_vpickod v8i16:$vj, v8i16:$vk),
1743+
(VPICKOD_H v8i16:$vj, v8i16:$vk)>;
1744+
def : Pat<(loongarch_vpickod v4i32:$vj, v4i32:$vk),
1745+
(VPICKOD_W v4i32:$vj, v4i32:$vk)>;
1746+
def : Pat<(loongarch_vpickod v2i64:$vj, v2i64:$vk),
1747+
(VPICKOD_D v2i64:$vj, v2i64:$vk)>;
1748+
def : Pat<(loongarch_vpickod v4f32:$vj, v4f32:$vk),
1749+
(VPICKOD_W v4f32:$vj, v4f32:$vk)>;
1750+
def : Pat<(loongarch_vpickod v2f64:$vj, v2f64:$vk),
1751+
(VPICKOD_D v2f64:$vj, v2f64:$vk)>;
1752+
1753+
// VPACKEV_{B/H/W/D}
1754+
def : Pat<(loongarch_vpackev v16i8:$vj, v16i8:$vk),
1755+
(VPACKEV_B v16i8:$vj, v16i8:$vk)>;
1756+
def : Pat<(loongarch_vpackev v8i16:$vj, v8i16:$vk),
1757+
(VPACKEV_H v8i16:$vj, v8i16:$vk)>;
1758+
def : Pat<(loongarch_vpackev v4i32:$vj, v4i32:$vk),
1759+
(VPACKEV_W v4i32:$vj, v4i32:$vk)>;
1760+
def : Pat<(loongarch_vpackev v2i64:$vj, v2i64:$vk),
1761+
(VPACKEV_D v2i64:$vj, v2i64:$vk)>;
1762+
def : Pat<(loongarch_vpackev v4f32:$vj, v4f32:$vk),
1763+
(VPACKEV_W v4f32:$vj, v4f32:$vk)>;
1764+
def : Pat<(loongarch_vpackev v2f64:$vj, v2f64:$vk),
1765+
(VPACKEV_D v2f64:$vj, v2f64:$vk)>;
1766+
1767+
// VPACKOD_{B/H/W/D}
1768+
def : Pat<(loongarch_vpackod v16i8:$vj, v16i8:$vk),
1769+
(VPACKOD_B v16i8:$vj, v16i8:$vk)>;
1770+
def : Pat<(loongarch_vpackod v8i16:$vj, v8i16:$vk),
1771+
(VPACKOD_H v8i16:$vj, v8i16:$vk)>;
1772+
def : Pat<(loongarch_vpackod v4i32:$vj, v4i32:$vk),
1773+
(VPACKOD_W v4i32:$vj, v4i32:$vk)>;
1774+
def : Pat<(loongarch_vpackod v2i64:$vj, v2i64:$vk),
1775+
(VPACKOD_D v2i64:$vj, v2i64:$vk)>;
1776+
def : Pat<(loongarch_vpackod v4f32:$vj, v4f32:$vk),
1777+
(VPACKOD_W v4f32:$vj, v4f32:$vk)>;
1778+
def : Pat<(loongarch_vpackod v2f64:$vj, v2f64:$vk),
1779+
(VPACKOD_D v2f64:$vj, v2f64:$vk)>;
1780+
1781+
// VILVL_{B/H/W/D}
1782+
def : Pat<(loongarch_vilvl v16i8:$vj, v16i8:$vk),
1783+
(VILVL_B v16i8:$vj, v16i8:$vk)>;
1784+
def : Pat<(loongarch_vilvl v8i16:$vj, v8i16:$vk),
1785+
(VILVL_H v8i16:$vj, v8i16:$vk)>;
1786+
def : Pat<(loongarch_vilvl v4i32:$vj, v4i32:$vk),
1787+
(VILVL_W v4i32:$vj, v4i32:$vk)>;
1788+
def : Pat<(loongarch_vilvl v2i64:$vj, v2i64:$vk),
1789+
(VILVL_D v2i64:$vj, v2i64:$vk)>;
1790+
def : Pat<(loongarch_vilvl v4f32:$vj, v4f32:$vk),
1791+
(VILVL_W v4f32:$vj, v4f32:$vk)>;
1792+
def : Pat<(loongarch_vilvl v2f64:$vj, v2f64:$vk),
1793+
(VILVL_D v2f64:$vj, v2f64:$vk)>;
1794+
1795+
// VILVH_{B/H/W/D}
1796+
def : Pat<(loongarch_vilvh v16i8:$vj, v16i8:$vk),
1797+
(VILVH_B v16i8:$vj, v16i8:$vk)>;
1798+
def : Pat<(loongarch_vilvh v8i16:$vj, v8i16:$vk),
1799+
(VILVH_H v8i16:$vj, v8i16:$vk)>;
1800+
def : Pat<(loongarch_vilvh v4i32:$vj, v4i32:$vk),
1801+
(VILVH_W v4i32:$vj, v4i32:$vk)>;
1802+
def : Pat<(loongarch_vilvh v2i64:$vj, v2i64:$vk),
1803+
(VILVH_D v2i64:$vj, v2i64:$vk)>;
1804+
def : Pat<(loongarch_vilvh v4f32:$vj, v4f32:$vk),
1805+
(VILVH_W v4f32:$vj, v4f32:$vk)>;
1806+
def : Pat<(loongarch_vilvh v2f64:$vj, v2f64:$vk),
1807+
(VILVH_D v2f64:$vj, v2f64:$vk)>;
1808+
1809+
// VSHUF4I_{B/H/W}
1810+
def : Pat<(loongarch_vshuf4i v16i8:$vj, immZExt8:$ui8),
1811+
(VSHUF4I_B v16i8:$vj, immZExt8:$ui8)>;
1812+
def : Pat<(loongarch_vshuf4i v8i16:$vj, immZExt8:$ui8),
1813+
(VSHUF4I_H v8i16:$vj, immZExt8:$ui8)>;
1814+
def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
1815+
(VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
1816+
def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
1817+
(VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;
1818+
1819+
// VREPLVEI_{B/H/W/D}
1820+
def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),
1821+
(VREPLVEI_B v16i8:$vj, immZExt4:$ui4)>;
1822+
def : Pat<(loongarch_vreplvei v8i16:$vj, immZExt3:$ui3),
1823+
(VREPLVEI_H v8i16:$vj, immZExt3:$ui3)>;
1824+
def : Pat<(loongarch_vreplvei v4i32:$vj, immZExt2:$ui2),
1825+
(VREPLVEI_W v4i32:$vj, immZExt2:$ui2)>;
1826+
def : Pat<(loongarch_vreplvei v2i64:$vj, immZExt1:$ui1),
1827+
(VREPLVEI_D v2i64:$vj, immZExt1:$ui1)>;
1828+
def : Pat<(loongarch_vreplvei v4f32:$vj, immZExt2:$ui2),
1829+
(VREPLVEI_W v4f32:$vj, immZExt2:$ui2)>;
1830+
def : Pat<(loongarch_vreplvei v2f64:$vj, immZExt1:$ui1),
1831+
(VREPLVEI_D v2f64:$vj, immZExt1:$ui1)>;
1832+
16851833
// VREPLVEI_{W/D}
16861834
def : Pat<(lsxsplatf32 FPR32:$fj),
16871835
(VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>;

0 commit comments

Comments
 (0)