@@ -15,6 +15,15 @@ def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
15
15
SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
16
16
def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
17
17
18
+ def SDT_LoongArchVShuf : SDTypeProfile<1, 3, [SDTCisVec<0>,
19
+ SDTCisInt<1>, SDTCisVec<1>,
20
+ SDTCisSameAs<0, 2>,
21
+ SDTCisSameAs<2, 3>]>;
22
+ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
23
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
24
+ def SDT_loongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
25
+ SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
26
+
18
27
// Target nodes.
19
28
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
20
29
def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO",
@@ -31,6 +40,23 @@ def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT",
31
40
def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT",
32
41
SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>;
33
42
43
+ def loongarch_vshuf: SDNode<"LoongArchISD::VSHUF", SDT_LoongArchVShuf>;
44
+ def loongarch_vpickev: SDNode<"LoongArchISD::VPICKEV", SDT_LoongArchV2R>;
45
+ def loongarch_vpickod: SDNode<"LoongArchISD::VPICKOD", SDT_LoongArchV2R>;
46
+ def loongarch_vpackev: SDNode<"LoongArchISD::VPACKEV", SDT_LoongArchV2R>;
47
+ def loongarch_vpackod: SDNode<"LoongArchISD::VPACKOD", SDT_LoongArchV2R>;
48
+ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
49
+ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
50
+
51
+ def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_loongArchV1RUimm>;
52
+ def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_loongArchV1RUimm>;
53
+
54
+ def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
55
+ def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
56
+ def immZExt3 : ImmLeaf<i64, [{return isUInt<3>(Imm);}]>;
57
+ def immZExt4 : ImmLeaf<i64, [{return isUInt<4>(Imm);}]>;
58
+ def immZExt8 : ImmLeaf<i64, [{return isUInt<8>(Imm);}]>;
59
+
34
60
class VecCond<SDPatternOperator OpNode, ValueType TyNode,
35
61
RegisterClass RC = LSX128>
36
62
: Pseudo<(outs GPR:$rd), (ins RC:$vj),
@@ -1682,6 +1708,128 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk),
1682
1708
def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk),
1683
1709
(VREPLVE_D v2i64:$vj, GRLenVT:$rk)>;
1684
1710
1711
+ // VSHUF_{B/H/W/D}
1712
+ def : Pat<(loongarch_vshuf v16i8:$va, v16i8:$vj, v16i8:$vk),
1713
+ (VSHUF_B v16i8:$vj, v16i8:$vk, v16i8:$va)>;
1714
+ def : Pat<(loongarch_vshuf v8i16:$vd, v8i16:$vj, v8i16:$vk),
1715
+ (VSHUF_H v8i16:$vd, v8i16:$vj, v8i16:$vk)>;
1716
+ def : Pat<(loongarch_vshuf v4i32:$vd, v4i32:$vj, v4i32:$vk),
1717
+ (VSHUF_W v4i32:$vd, v4i32:$vj, v4i32:$vk)>;
1718
+ def : Pat<(loongarch_vshuf v2i64:$vd, v2i64:$vj, v2i64:$vk),
1719
+ (VSHUF_D v2i64:$vd, v2i64:$vj, v2i64:$vk)>;
1720
+ def : Pat<(loongarch_vshuf v4i32:$vd, v4f32:$vj, v4f32:$vk),
1721
+ (VSHUF_W v4i32:$vd, v4f32:$vj, v4f32:$vk)>;
1722
+ def : Pat<(loongarch_vshuf v2i64:$vd, v2f64:$vj, v2f64:$vk),
1723
+ (VSHUF_D v2i64:$vd, v2f64:$vj, v2f64:$vk)>;
1724
+
1725
+ // VPICKEV_{B/H/W/D}
1726
+ def : Pat<(loongarch_vpickev v16i8:$vj, v16i8:$vk),
1727
+ (VPICKEV_B v16i8:$vj, v16i8:$vk)>;
1728
+ def : Pat<(loongarch_vpickev v8i16:$vj, v8i16:$vk),
1729
+ (VPICKEV_H v8i16:$vj, v8i16:$vk)>;
1730
+ def : Pat<(loongarch_vpickev v4i32:$vj, v4i32:$vk),
1731
+ (VPICKEV_W v4i32:$vj, v4i32:$vk)>;
1732
+ def : Pat<(loongarch_vpickev v2i64:$vj, v2i64:$vk),
1733
+ (VPICKEV_D v2i64:$vj, v2i64:$vk)>;
1734
+ def : Pat<(loongarch_vpickev v4f32:$vj, v4f32:$vk),
1735
+ (VPICKEV_W v4f32:$vj, v4f32:$vk)>;
1736
+ def : Pat<(loongarch_vpickev v2f64:$vj, v2f64:$vk),
1737
+ (VPICKEV_D v2f64:$vj, v2f64:$vk)>;
1738
+
1739
+ // VPICKOD_{B/H/W/D}
1740
+ def : Pat<(loongarch_vpickod v16i8:$vj, v16i8:$vk),
1741
+ (VPICKOD_B v16i8:$vj, v16i8:$vk)>;
1742
+ def : Pat<(loongarch_vpickod v8i16:$vj, v8i16:$vk),
1743
+ (VPICKOD_H v8i16:$vj, v8i16:$vk)>;
1744
+ def : Pat<(loongarch_vpickod v4i32:$vj, v4i32:$vk),
1745
+ (VPICKOD_W v4i32:$vj, v4i32:$vk)>;
1746
+ def : Pat<(loongarch_vpickod v2i64:$vj, v2i64:$vk),
1747
+ (VPICKOD_D v2i64:$vj, v2i64:$vk)>;
1748
+ def : Pat<(loongarch_vpickod v4f32:$vj, v4f32:$vk),
1749
+ (VPICKOD_W v4f32:$vj, v4f32:$vk)>;
1750
+ def : Pat<(loongarch_vpickod v2f64:$vj, v2f64:$vk),
1751
+ (VPICKOD_D v2f64:$vj, v2f64:$vk)>;
1752
+
1753
+ // VPACKEV_{B/H/W/D}
1754
+ def : Pat<(loongarch_vpackev v16i8:$vj, v16i8:$vk),
1755
+ (VPACKEV_B v16i8:$vj, v16i8:$vk)>;
1756
+ def : Pat<(loongarch_vpackev v8i16:$vj, v8i16:$vk),
1757
+ (VPACKEV_H v8i16:$vj, v8i16:$vk)>;
1758
+ def : Pat<(loongarch_vpackev v4i32:$vj, v4i32:$vk),
1759
+ (VPACKEV_W v4i32:$vj, v4i32:$vk)>;
1760
+ def : Pat<(loongarch_vpackev v2i64:$vj, v2i64:$vk),
1761
+ (VPACKEV_D v2i64:$vj, v2i64:$vk)>;
1762
+ def : Pat<(loongarch_vpackev v4f32:$vj, v4f32:$vk),
1763
+ (VPACKEV_W v4f32:$vj, v4f32:$vk)>;
1764
+ def : Pat<(loongarch_vpackev v2f64:$vj, v2f64:$vk),
1765
+ (VPACKEV_D v2f64:$vj, v2f64:$vk)>;
1766
+
1767
+ // VPACKOD_{B/H/W/D}
1768
+ def : Pat<(loongarch_vpackod v16i8:$vj, v16i8:$vk),
1769
+ (VPACKOD_B v16i8:$vj, v16i8:$vk)>;
1770
+ def : Pat<(loongarch_vpackod v8i16:$vj, v8i16:$vk),
1771
+ (VPACKOD_H v8i16:$vj, v8i16:$vk)>;
1772
+ def : Pat<(loongarch_vpackod v4i32:$vj, v4i32:$vk),
1773
+ (VPACKOD_W v4i32:$vj, v4i32:$vk)>;
1774
+ def : Pat<(loongarch_vpackod v2i64:$vj, v2i64:$vk),
1775
+ (VPACKOD_D v2i64:$vj, v2i64:$vk)>;
1776
+ def : Pat<(loongarch_vpackod v4f32:$vj, v4f32:$vk),
1777
+ (VPACKOD_W v4f32:$vj, v4f32:$vk)>;
1778
+ def : Pat<(loongarch_vpackod v2f64:$vj, v2f64:$vk),
1779
+ (VPACKOD_D v2f64:$vj, v2f64:$vk)>;
1780
+
1781
+ // VILVL_{B/H/W/D}
1782
+ def : Pat<(loongarch_vilvl v16i8:$vj, v16i8:$vk),
1783
+ (VILVL_B v16i8:$vj, v16i8:$vk)>;
1784
+ def : Pat<(loongarch_vilvl v8i16:$vj, v8i16:$vk),
1785
+ (VILVL_H v8i16:$vj, v8i16:$vk)>;
1786
+ def : Pat<(loongarch_vilvl v4i32:$vj, v4i32:$vk),
1787
+ (VILVL_W v4i32:$vj, v4i32:$vk)>;
1788
+ def : Pat<(loongarch_vilvl v2i64:$vj, v2i64:$vk),
1789
+ (VILVL_D v2i64:$vj, v2i64:$vk)>;
1790
+ def : Pat<(loongarch_vilvl v4f32:$vj, v4f32:$vk),
1791
+ (VILVL_W v4f32:$vj, v4f32:$vk)>;
1792
+ def : Pat<(loongarch_vilvl v2f64:$vj, v2f64:$vk),
1793
+ (VILVL_D v2f64:$vj, v2f64:$vk)>;
1794
+
1795
+ // VILVH_{B/H/W/D}
1796
+ def : Pat<(loongarch_vilvh v16i8:$vj, v16i8:$vk),
1797
+ (VILVH_B v16i8:$vj, v16i8:$vk)>;
1798
+ def : Pat<(loongarch_vilvh v8i16:$vj, v8i16:$vk),
1799
+ (VILVH_H v8i16:$vj, v8i16:$vk)>;
1800
+ def : Pat<(loongarch_vilvh v4i32:$vj, v4i32:$vk),
1801
+ (VILVH_W v4i32:$vj, v4i32:$vk)>;
1802
+ def : Pat<(loongarch_vilvh v2i64:$vj, v2i64:$vk),
1803
+ (VILVH_D v2i64:$vj, v2i64:$vk)>;
1804
+ def : Pat<(loongarch_vilvh v4f32:$vj, v4f32:$vk),
1805
+ (VILVH_W v4f32:$vj, v4f32:$vk)>;
1806
+ def : Pat<(loongarch_vilvh v2f64:$vj, v2f64:$vk),
1807
+ (VILVH_D v2f64:$vj, v2f64:$vk)>;
1808
+
1809
+ // VSHUF4I_{B/H/W}
1810
+ def : Pat<(loongarch_vshuf4i v16i8:$vj, immZExt8:$ui8),
1811
+ (VSHUF4I_B v16i8:$vj, immZExt8:$ui8)>;
1812
+ def : Pat<(loongarch_vshuf4i v8i16:$vj, immZExt8:$ui8),
1813
+ (VSHUF4I_H v8i16:$vj, immZExt8:$ui8)>;
1814
+ def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
1815
+ (VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
1816
+ def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
1817
+ (VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;
1818
+
1819
+ // VREPLVEI_{B/H/W/D}
1820
+ def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),
1821
+ (VREPLVEI_B v16i8:$vj, immZExt4:$ui4)>;
1822
+ def : Pat<(loongarch_vreplvei v8i16:$vj, immZExt3:$ui3),
1823
+ (VREPLVEI_H v8i16:$vj, immZExt3:$ui3)>;
1824
+ def : Pat<(loongarch_vreplvei v4i32:$vj, immZExt2:$ui2),
1825
+ (VREPLVEI_W v4i32:$vj, immZExt2:$ui2)>;
1826
+ def : Pat<(loongarch_vreplvei v2i64:$vj, immZExt1:$ui1),
1827
+ (VREPLVEI_D v2i64:$vj, immZExt1:$ui1)>;
1828
+ def : Pat<(loongarch_vreplvei v4f32:$vj, immZExt2:$ui2),
1829
+ (VREPLVEI_W v4f32:$vj, immZExt2:$ui2)>;
1830
+ def : Pat<(loongarch_vreplvei v2f64:$vj, immZExt1:$ui1),
1831
+ (VREPLVEI_D v2f64:$vj, immZExt1:$ui1)>;
1832
+
1685
1833
// VREPLVEI_{W/D}
1686
1834
def : Pat<(lsxsplatf32 FPR32:$fj),
1687
1835
(VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>;
0 commit comments