Skip to content

[LoongArch][CodeGen] Implement 128-bit and 256-bit vector shuffle. #100054

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
933 changes: 930 additions & 3 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,16 @@ enum NodeType : unsigned {

// Vector Shuffle
VREPLVE,
VSHUF,
VPICKEV,
VPICKOD,
VPACKEV,
VPACKOD,
VILVL,
VILVH,
VSHUF4I,
VREPLVEI,
XVPERMI,

// Extended vector element extraction
VPICK_SEXT_ELT,
Expand Down
130 changes: 130 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
//
//===----------------------------------------------------------------------===//

def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_loongArchV1RUimm>;

def lasxsplati8
: PatFrag<(ops node:$e0),
(v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0,
Expand Down Expand Up @@ -1575,6 +1577,134 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk),
def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk),
(XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>;

// XVSHUF_{B/H/W/D}
def : Pat<(loongarch_vshuf v32i8:$xa, v32i8:$xj, v32i8:$xk),
(XVSHUF_B v32i8:$xj, v32i8:$xk, v32i8:$xa)>;
def : Pat<(loongarch_vshuf v16i16:$xd, v16i16:$xj, v16i16:$xk),
(XVSHUF_H v16i16:$xd, v16i16:$xj, v16i16:$xk)>;
def : Pat<(loongarch_vshuf v8i32:$xd, v8i32:$xj, v8i32:$xk),
(XVSHUF_W v8i32:$xd, v8i32:$xj, v8i32:$xk)>;
def : Pat<(loongarch_vshuf v4i64:$xd, v4i64:$xj, v4i64:$xk),
(XVSHUF_D v4i64:$xd, v4i64:$xj, v4i64:$xk)>;
def : Pat<(loongarch_vshuf v8i32:$xd, v8f32:$xj, v8f32:$xk),
(XVSHUF_W v8i32:$xd, v8f32:$xj, v8f32:$xk)>;
def : Pat<(loongarch_vshuf v4i64:$xd, v4f64:$xj, v4f64:$xk),
(XVSHUF_D v4i64:$xd, v4f64:$xj, v4f64:$xk)>;

// XVPICKEV_{B/H/W/D}
def : Pat<(loongarch_vpickev v32i8:$xj, v32i8:$xk),
(XVPICKEV_B v32i8:$xj, v32i8:$xk)>;
def : Pat<(loongarch_vpickev v16i16:$xj, v16i16:$xk),
(XVPICKEV_H v16i16:$xj, v16i16:$xk)>;
def : Pat<(loongarch_vpickev v8i32:$xj, v8i32:$xk),
(XVPICKEV_W v8i32:$xj, v8i32:$xk)>;
def : Pat<(loongarch_vpickev v4i64:$xj, v4i64:$xk),
(XVPICKEV_D v4i64:$xj, v4i64:$xk)>;
def : Pat<(loongarch_vpickev v8f32:$xj, v8f32:$xk),
(XVPICKEV_W v8f32:$xj, v8f32:$xk)>;
def : Pat<(loongarch_vpickev v4f64:$xj, v4f64:$xk),
(XVPICKEV_D v4f64:$xj, v4f64:$xk)>;

// XVPICKOD_{B/H/W/D}
def : Pat<(loongarch_vpickod v32i8:$xj, v32i8:$xk),
(XVPICKOD_B v32i8:$xj, v32i8:$xk)>;
def : Pat<(loongarch_vpickod v16i16:$xj, v16i16:$xk),
(XVPICKOD_H v16i16:$xj, v16i16:$xk)>;
def : Pat<(loongarch_vpickod v8i32:$xj, v8i32:$xk),
(XVPICKOD_W v8i32:$xj, v8i32:$xk)>;
def : Pat<(loongarch_vpickod v4i64:$xj, v4i64:$xk),
(XVPICKOD_D v4i64:$xj, v4i64:$xk)>;
def : Pat<(loongarch_vpickod v8f32:$xj, v8f32:$xk),
(XVPICKOD_W v8f32:$xj, v8f32:$xk)>;
def : Pat<(loongarch_vpickod v4f64:$xj, v4f64:$xk),
(XVPICKOD_D v4f64:$xj, v4f64:$xk)>;

// XVPACKEV_{B/H/W/D}
def : Pat<(loongarch_vpackev v32i8:$xj, v32i8:$xk),
(XVPACKEV_B v32i8:$xj, v32i8:$xk)>;
def : Pat<(loongarch_vpackev v16i16:$xj, v16i16:$xk),
(XVPACKEV_H v16i16:$xj, v16i16:$xk)>;
def : Pat<(loongarch_vpackev v8i32:$xj, v8i32:$xk),
(XVPACKEV_W v8i32:$xj, v8i32:$xk)>;
def : Pat<(loongarch_vpackev v4i64:$xj, v4i64:$xk),
(XVPACKEV_D v4i64:$xj, v4i64:$xk)>;
def : Pat<(loongarch_vpackev v8f32:$xj, v8f32:$xk),
(XVPACKEV_W v8f32:$xj, v8f32:$xk)>;
def : Pat<(loongarch_vpackev v4f64:$xj, v4f64:$xk),
(XVPACKEV_D v4f64:$xj, v4f64:$xk)>;

// XVPACKOD_{B/H/W/D}
def : Pat<(loongarch_vpackod v32i8:$xj, v32i8:$xk),
(XVPACKOD_B v32i8:$xj, v32i8:$xk)>;
def : Pat<(loongarch_vpackod v16i16:$xj, v16i16:$xk),
(XVPACKOD_H v16i16:$xj, v16i16:$xk)>;
def : Pat<(loongarch_vpackod v8i32:$xj, v8i32:$xk),
(XVPACKOD_W v8i32:$xj, v8i32:$xk)>;
def : Pat<(loongarch_vpackod v4i64:$xj, v4i64:$xk),
(XVPACKOD_D v4i64:$xj, v4i64:$xk)>;
def : Pat<(loongarch_vpackod v8f32:$xj, v8f32:$xk),
(XVPACKOD_W v8f32:$xj, v8f32:$xk)>;
def : Pat<(loongarch_vpackod v4f64:$xj, v4f64:$xk),
(XVPACKOD_D v4f64:$xj, v4f64:$xk)>;

// XVILVL_{B/H/W/D}
def : Pat<(loongarch_vilvl v32i8:$xj, v32i8:$xk),
(XVILVL_B v32i8:$xj, v32i8:$xk)>;
def : Pat<(loongarch_vilvl v16i16:$xj, v16i16:$xk),
(XVILVL_H v16i16:$xj, v16i16:$xk)>;
def : Pat<(loongarch_vilvl v8i32:$xj, v8i32:$xk),
(XVILVL_W v8i32:$xj, v8i32:$xk)>;
def : Pat<(loongarch_vilvl v4i64:$xj, v4i64:$xk),
(XVILVL_D v4i64:$xj, v4i64:$xk)>;
def : Pat<(loongarch_vilvl v8f32:$xj, v8f32:$xk),
(XVILVL_W v8f32:$xj, v8f32:$xk)>;
def : Pat<(loongarch_vilvl v4f64:$xj, v4f64:$xk),
(XVILVL_D v4f64:$xj, v4f64:$xk)>;

// XVILVH_{B/H/W/D}
def : Pat<(loongarch_vilvh v32i8:$xj, v32i8:$xk),
(XVILVH_B v32i8:$xj, v32i8:$xk)>;
def : Pat<(loongarch_vilvh v16i16:$xj, v16i16:$xk),
(XVILVH_H v16i16:$xj, v16i16:$xk)>;
def : Pat<(loongarch_vilvh v8i32:$xj, v8i32:$xk),
(XVILVH_W v8i32:$xj, v8i32:$xk)>;
def : Pat<(loongarch_vilvh v4i64:$xj, v4i64:$xk),
(XVILVH_D v4i64:$xj, v4i64:$xk)>;
def : Pat<(loongarch_vilvh v8f32:$xj, v8f32:$xk),
(XVILVH_W v8f32:$xj, v8f32:$xk)>;
def : Pat<(loongarch_vilvh v4f64:$xj, v4f64:$xk),
(XVILVH_D v4f64:$xj, v4f64:$xk)>;

// XVSHUF4I_{B/H/W}
def : Pat<(loongarch_vshuf4i v32i8:$xj, immZExt8:$ui8),
(XVSHUF4I_B v32i8:$xj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v16i16:$xj, immZExt8:$ui8),
(XVSHUF4I_H v16i16:$xj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8),
(XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8),
(XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>;

// XVREPL128VEI_{B/H/W/D}
def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4),
(XVREPL128VEI_B v32i8:$xj, immZExt4:$ui4)>;
def : Pat<(loongarch_vreplvei v16i16:$xj, immZExt3:$ui3),
(XVREPL128VEI_H v16i16:$xj, immZExt3:$ui3)>;
def : Pat<(loongarch_vreplvei v8i32:$xj, immZExt2:$ui2),
(XVREPL128VEI_W v8i32:$xj, immZExt2:$ui2)>;
def : Pat<(loongarch_vreplvei v4i64:$xj, immZExt1:$ui1),
(XVREPL128VEI_D v4i64:$xj, immZExt1:$ui1)>;
def : Pat<(loongarch_vreplvei v8f32:$xj, immZExt2:$ui2),
(XVREPL128VEI_W v8f32:$xj, immZExt2:$ui2)>;
def : Pat<(loongarch_vreplvei v4f64:$xj, immZExt1:$ui1),
(XVREPL128VEI_D v4f64:$xj, immZExt1:$ui1)>;

// XVPERMI_D
def : Pat<(loongarch_xvpermi v4i64:$xj, immZExt8: $ui8),
(XVPERMI_D v4i64:$xj, immZExt8: $ui8)>;
def : Pat<(loongarch_xvpermi v4f64:$xj, immZExt8: $ui8),
(XVPERMI_D v4f64:$xj, immZExt8: $ui8)>;

// XVREPLVE0_{W/D}
def : Pat<(lasxsplatf32 FPR32:$fj),
(XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>;
Expand Down
148 changes: 148 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;

def SDT_LoongArchVShuf : SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisInt<1>, SDTCisVec<1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<2, 3>]>;
def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
def SDT_loongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;

// Target nodes.
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO",
Expand All @@ -31,6 +40,23 @@ def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT",
def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT",
SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>;

def loongarch_vshuf: SDNode<"LoongArchISD::VSHUF", SDT_LoongArchVShuf>;
def loongarch_vpickev: SDNode<"LoongArchISD::VPICKEV", SDT_LoongArchV2R>;
def loongarch_vpickod: SDNode<"LoongArchISD::VPICKOD", SDT_LoongArchV2R>;
def loongarch_vpackev: SDNode<"LoongArchISD::VPACKEV", SDT_LoongArchV2R>;
def loongarch_vpackod: SDNode<"LoongArchISD::VPACKOD", SDT_LoongArchV2R>;
def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;

def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_loongArchV1RUimm>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_loongArchV1RUimm>;

def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
def immZExt3 : ImmLeaf<i64, [{return isUInt<3>(Imm);}]>;
def immZExt4 : ImmLeaf<i64, [{return isUInt<4>(Imm);}]>;
def immZExt8 : ImmLeaf<i64, [{return isUInt<8>(Imm);}]>;

class VecCond<SDPatternOperator OpNode, ValueType TyNode,
RegisterClass RC = LSX128>
: Pseudo<(outs GPR:$rd), (ins RC:$vj),
Expand Down Expand Up @@ -1682,6 +1708,128 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk),
def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk),
(VREPLVE_D v2i64:$vj, GRLenVT:$rk)>;

// VSHUF_{B/H/W/D}
def : Pat<(loongarch_vshuf v16i8:$va, v16i8:$vj, v16i8:$vk),
(VSHUF_B v16i8:$vj, v16i8:$vk, v16i8:$va)>;
def : Pat<(loongarch_vshuf v8i16:$vd, v8i16:$vj, v8i16:$vk),
(VSHUF_H v8i16:$vd, v8i16:$vj, v8i16:$vk)>;
def : Pat<(loongarch_vshuf v4i32:$vd, v4i32:$vj, v4i32:$vk),
(VSHUF_W v4i32:$vd, v4i32:$vj, v4i32:$vk)>;
def : Pat<(loongarch_vshuf v2i64:$vd, v2i64:$vj, v2i64:$vk),
(VSHUF_D v2i64:$vd, v2i64:$vj, v2i64:$vk)>;
def : Pat<(loongarch_vshuf v4i32:$vd, v4f32:$vj, v4f32:$vk),
(VSHUF_W v4i32:$vd, v4f32:$vj, v4f32:$vk)>;
def : Pat<(loongarch_vshuf v2i64:$vd, v2f64:$vj, v2f64:$vk),
(VSHUF_D v2i64:$vd, v2f64:$vj, v2f64:$vk)>;

// VPICKEV_{B/H/W/D}
def : Pat<(loongarch_vpickev v16i8:$vj, v16i8:$vk),
(VPICKEV_B v16i8:$vj, v16i8:$vk)>;
def : Pat<(loongarch_vpickev v8i16:$vj, v8i16:$vk),
(VPICKEV_H v8i16:$vj, v8i16:$vk)>;
def : Pat<(loongarch_vpickev v4i32:$vj, v4i32:$vk),
(VPICKEV_W v4i32:$vj, v4i32:$vk)>;
def : Pat<(loongarch_vpickev v2i64:$vj, v2i64:$vk),
(VPICKEV_D v2i64:$vj, v2i64:$vk)>;
def : Pat<(loongarch_vpickev v4f32:$vj, v4f32:$vk),
(VPICKEV_W v4f32:$vj, v4f32:$vk)>;
def : Pat<(loongarch_vpickev v2f64:$vj, v2f64:$vk),
(VPICKEV_D v2f64:$vj, v2f64:$vk)>;

// VPICKOD_{B/H/W/D}
def : Pat<(loongarch_vpickod v16i8:$vj, v16i8:$vk),
(VPICKOD_B v16i8:$vj, v16i8:$vk)>;
def : Pat<(loongarch_vpickod v8i16:$vj, v8i16:$vk),
(VPICKOD_H v8i16:$vj, v8i16:$vk)>;
def : Pat<(loongarch_vpickod v4i32:$vj, v4i32:$vk),
(VPICKOD_W v4i32:$vj, v4i32:$vk)>;
def : Pat<(loongarch_vpickod v2i64:$vj, v2i64:$vk),
(VPICKOD_D v2i64:$vj, v2i64:$vk)>;
def : Pat<(loongarch_vpickod v4f32:$vj, v4f32:$vk),
(VPICKOD_W v4f32:$vj, v4f32:$vk)>;
def : Pat<(loongarch_vpickod v2f64:$vj, v2f64:$vk),
(VPICKOD_D v2f64:$vj, v2f64:$vk)>;

// VPACKEV_{B/H/W/D}
def : Pat<(loongarch_vpackev v16i8:$vj, v16i8:$vk),
(VPACKEV_B v16i8:$vj, v16i8:$vk)>;
def : Pat<(loongarch_vpackev v8i16:$vj, v8i16:$vk),
(VPACKEV_H v8i16:$vj, v8i16:$vk)>;
def : Pat<(loongarch_vpackev v4i32:$vj, v4i32:$vk),
(VPACKEV_W v4i32:$vj, v4i32:$vk)>;
def : Pat<(loongarch_vpackev v2i64:$vj, v2i64:$vk),
(VPACKEV_D v2i64:$vj, v2i64:$vk)>;
def : Pat<(loongarch_vpackev v4f32:$vj, v4f32:$vk),
(VPACKEV_W v4f32:$vj, v4f32:$vk)>;
def : Pat<(loongarch_vpackev v2f64:$vj, v2f64:$vk),
(VPACKEV_D v2f64:$vj, v2f64:$vk)>;

// VPACKOD_{B/H/W/D}
def : Pat<(loongarch_vpackod v16i8:$vj, v16i8:$vk),
(VPACKOD_B v16i8:$vj, v16i8:$vk)>;
def : Pat<(loongarch_vpackod v8i16:$vj, v8i16:$vk),
(VPACKOD_H v8i16:$vj, v8i16:$vk)>;
def : Pat<(loongarch_vpackod v4i32:$vj, v4i32:$vk),
(VPACKOD_W v4i32:$vj, v4i32:$vk)>;
def : Pat<(loongarch_vpackod v2i64:$vj, v2i64:$vk),
(VPACKOD_D v2i64:$vj, v2i64:$vk)>;
def : Pat<(loongarch_vpackod v4f32:$vj, v4f32:$vk),
(VPACKOD_W v4f32:$vj, v4f32:$vk)>;
def : Pat<(loongarch_vpackod v2f64:$vj, v2f64:$vk),
(VPACKOD_D v2f64:$vj, v2f64:$vk)>;

// VILVL_{B/H/W/D}
def : Pat<(loongarch_vilvl v16i8:$vj, v16i8:$vk),
(VILVL_B v16i8:$vj, v16i8:$vk)>;
def : Pat<(loongarch_vilvl v8i16:$vj, v8i16:$vk),
(VILVL_H v8i16:$vj, v8i16:$vk)>;
def : Pat<(loongarch_vilvl v4i32:$vj, v4i32:$vk),
(VILVL_W v4i32:$vj, v4i32:$vk)>;
def : Pat<(loongarch_vilvl v2i64:$vj, v2i64:$vk),
(VILVL_D v2i64:$vj, v2i64:$vk)>;
def : Pat<(loongarch_vilvl v4f32:$vj, v4f32:$vk),
(VILVL_W v4f32:$vj, v4f32:$vk)>;
def : Pat<(loongarch_vilvl v2f64:$vj, v2f64:$vk),
(VILVL_D v2f64:$vj, v2f64:$vk)>;

// VILVH_{B/H/W/D}
def : Pat<(loongarch_vilvh v16i8:$vj, v16i8:$vk),
(VILVH_B v16i8:$vj, v16i8:$vk)>;
def : Pat<(loongarch_vilvh v8i16:$vj, v8i16:$vk),
(VILVH_H v8i16:$vj, v8i16:$vk)>;
def : Pat<(loongarch_vilvh v4i32:$vj, v4i32:$vk),
(VILVH_W v4i32:$vj, v4i32:$vk)>;
def : Pat<(loongarch_vilvh v2i64:$vj, v2i64:$vk),
(VILVH_D v2i64:$vj, v2i64:$vk)>;
def : Pat<(loongarch_vilvh v4f32:$vj, v4f32:$vk),
(VILVH_W v4f32:$vj, v4f32:$vk)>;
def : Pat<(loongarch_vilvh v2f64:$vj, v2f64:$vk),
(VILVH_D v2f64:$vj, v2f64:$vk)>;

// VSHUF4I_{B/H/W}
def : Pat<(loongarch_vshuf4i v16i8:$vj, immZExt8:$ui8),
(VSHUF4I_B v16i8:$vj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v8i16:$vj, immZExt8:$ui8),
(VSHUF4I_H v8i16:$vj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
(VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
(VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;

// VREPLVEI_{B/H/W/D}
def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),
(VREPLVEI_B v16i8:$vj, immZExt4:$ui4)>;
def : Pat<(loongarch_vreplvei v8i16:$vj, immZExt3:$ui3),
(VREPLVEI_H v8i16:$vj, immZExt3:$ui3)>;
def : Pat<(loongarch_vreplvei v4i32:$vj, immZExt2:$ui2),
(VREPLVEI_W v4i32:$vj, immZExt2:$ui2)>;
def : Pat<(loongarch_vreplvei v2i64:$vj, immZExt1:$ui1),
(VREPLVEI_D v2i64:$vj, immZExt1:$ui1)>;
def : Pat<(loongarch_vreplvei v4f32:$vj, immZExt2:$ui2),
(VREPLVEI_W v4f32:$vj, immZExt2:$ui2)>;
def : Pat<(loongarch_vreplvei v2f64:$vj, immZExt1:$ui1),
(VREPLVEI_D v2f64:$vj, immZExt1:$ui1)>;

// VREPLVEI_{W/D}
def : Pat<(lsxsplatf32 FPR32:$fj),
(VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>;
Expand Down
Loading
Loading