-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[SelectionDAG] Scalarize binary ops of splats before legal types #100749
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-webassembly @llvm/pr-subscribers-llvm-selectiondag Author: Froster (Fros1er) ChangesThe issue is solved by not checking There are some cases that my approach can't scalarize, for example: ; test/CodeGen/RISCV/rvv/select-int.ll
define <vscale x 4 x i64> @<!-- -->select_nxv4i64(i1 zeroext %c, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
%v = select i1 %c, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b
ret <vscale x 4 x i64> %v
} https://godbolt.org/z/xzqrKrxvK Btw, maybe we should vectorize it in another pr... define <vscale x 4 x i8> @<!-- -->nxv4i8(i8 %x, i8 %y) {
%a = call i8 @<!-- -->llvm.uadd.sat.nxv4i8(i8 %x, i8 %y)
%head.x = insertelement <vscale x 4 x i8> poison, i8 %a, i32 0
%splat.x = shufflevector <vscale x 4 x i8> %head.x, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
ret <vscale x 4 x i8> %splat.x
} nxv4i8: # @<!-- -->nxv4i8
andi a1, a1, 255
andi a0, a0, 255
add a0, a0, a1
li a1, 255
bltu a0, a1, .LBB0_2 # should use vsaddu.vx
li a0, 255
.LBB0_2:
vsetvli a1, zero, e8, mf2, ta, ma
vmv.v.x v8, a0
ret Patch is 37.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100749.diff 11 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 302ad128f4f53..9b2744f61720c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26930,7 +26930,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
/// If a vector binop is performed on splat values, it may be profitable to
/// extract, scalarize, and insert/splat.
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
- const SDLoc &DL) {
+ const SDLoc &DL, bool LegalTypes) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
unsigned Opcode = N->getOpcode();
@@ -26948,11 +26948,20 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
// TODO: use DAG.isSplatValue instead?
bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
N1.getOpcode() == ISD::SPLAT_VECTOR;
+
+ // If binop is legal or custom on EltVT, scalarize should be profitable. The
+ // check is the same as isOperationLegalOrCustom without isTypeLegal. We
+ // can do this only before LegalTypes, because it may generate illegal `op
+ // EltVT` from legal `op VT (splat EltVT)`, where EltVT is not legal type but
+ // the result type of splat is legal.
+ auto EltAction = TLI.getOperationAction(Opcode, EltVT);
if (!Src0 || !Src1 || Index0 != Index1 ||
Src0.getValueType().getVectorElementType() != EltVT ||
Src1.getValueType().getVectorElementType() != EltVT ||
!(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
- !TLI.isOperationLegalOrCustom(Opcode, EltVT))
+ (LegalTypes && !TLI.isOperationLegalOrCustom(Opcode, EltVT)) ||
+ !(EltAction == TargetLoweringBase::Legal ||
+ EltAction == TargetLoweringBase::Custom))
return SDValue();
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
@@ -27118,7 +27127,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
}
}
- if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL, LegalTypes))
return V;
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
index 764f148ecd3aa..5a5dee0b53d43 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll
@@ -16,14 +16,13 @@ define fastcc i8 @allocno_reload_assign() {
; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h
; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
-; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: mov z0.b, #0 // =0x0
-; CHECK-NEXT: sbfx x8, x8, #0, #1
; CHECK-NEXT: uunpklo z1.h, z0.b
; CHECK-NEXT: uunpkhi z0.h, z0.b
-; CHECK-NEXT: whilelo p1.b, xzr, x8
-; CHECK-NEXT: not p0.b, p0/z, p1.b
+; CHECK-NEXT: mvn w8, w8
+; CHECK-NEXT: sbfx x8, x8, #0, #1
+; CHECK-NEXT: whilelo p0.b, xzr, x8
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpkhi z3.s, z1.h
; CHECK-NEXT: uunpklo z5.s, z0.h
@@ -31,15 +30,15 @@ define fastcc i8 @allocno_reload_assign() {
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: punpklo p2.h, p1.b
+; CHECK-NEXT: punpkhi p3.h, p1.b
; CHECK-NEXT: uunpklo z0.d, z2.s
; CHECK-NEXT: uunpkhi z1.d, z2.s
-; CHECK-NEXT: punpkhi p3.h, p1.b
+; CHECK-NEXT: punpklo p5.h, p0.b
; CHECK-NEXT: uunpklo z2.d, z3.s
; CHECK-NEXT: uunpkhi z3.d, z3.s
-; CHECK-NEXT: punpklo p5.h, p0.b
+; CHECK-NEXT: punpkhi p7.h, p0.b
; CHECK-NEXT: uunpklo z4.d, z5.s
; CHECK-NEXT: uunpkhi z5.d, z5.s
-; CHECK-NEXT: punpkhi p7.h, p0.b
; CHECK-NEXT: uunpklo z6.d, z7.s
; CHECK-NEXT: uunpkhi z7.d, z7.s
; CHECK-NEXT: punpklo p0.h, p2.b
diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
index 6875925adad83..f26e57b5a0b73 100644
--- a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
@@ -5,14 +5,11 @@
define <vscale x 1 x i1> @nxv1i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv1i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v8, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vmsne.vi v9, v9, 0
-; CHECK-NEXT: vmxor.mm v0, v8, v9
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 1 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 1 x i1> %head.x, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -25,14 +22,11 @@ define <vscale x 1 x i1> @nxv1i1(i1 %x, i1 %y) {
define <vscale x 2 x i1> @nxv2i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv2i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v8, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vmsne.vi v9, v9, 0
-; CHECK-NEXT: vmxor.mm v0, v8, v9
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 2 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 2 x i1> %head.x, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
@@ -45,14 +39,11 @@ define <vscale x 2 x i1> @nxv2i1(i1 %x, i1 %y) {
define <vscale x 4 x i1> @nxv4i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv4i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v8, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vmsne.vi v9, v9, 0
-; CHECK-NEXT: vmxor.mm v0, v8, v9
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 4 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 4 x i1> %head.x, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
@@ -65,14 +56,11 @@ define <vscale x 4 x i1> @nxv4i1(i1 %x, i1 %y) {
define <vscale x 8 x i1> @nxv8i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv8i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v8, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v9, a1
-; CHECK-NEXT: vmsne.vi v9, v9, 0
-; CHECK-NEXT: vmxor.mm v0, v8, v9
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 8 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 8 x i1> %head.x, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
@@ -85,14 +73,11 @@ define <vscale x 8 x i1> @nxv8i1(i1 %x, i1 %y) {
define <vscale x 16 x i1> @nxv16i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv16i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v10, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v8, a1
-; CHECK-NEXT: vmsne.vi v11, v8, 0
-; CHECK-NEXT: vmxor.mm v0, v10, v11
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 16 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 16 x i1> %head.x, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
@@ -105,14 +90,11 @@ define <vscale x 16 x i1> @nxv16i1(i1 %x, i1 %y) {
define <vscale x 32 x i1> @nxv32i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv32i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v12, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v8, a1
-; CHECK-NEXT: vmsne.vi v13, v8, 0
-; CHECK-NEXT: vmxor.mm v0, v12, v13
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 32 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 32 x i1> %head.x, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
@@ -125,14 +107,11 @@ define <vscale x 32 x i1> @nxv32i1(i1 %x, i1 %y) {
define <vscale x 64 x i1> @nxv64i1(i1 %x, i1 %y) {
; CHECK-LABEL: nxv64i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vmsne.vi v16, v8, 0
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.v.x v8, a1
-; CHECK-NEXT: vmsne.vi v17, v8, 0
-; CHECK-NEXT: vmxor.mm v0, v16, v17
+; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 64 x i1> poison, i1 %x, i32 0
%splat.x = shufflevector <vscale x 64 x i1> %head.x, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
@@ -145,9 +124,9 @@ define <vscale x 64 x i1> @nxv64i1(i1 %x, i1 %y) {
define <vscale x 1 x i8> @nxv1i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 1 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 1 x i8> %head.x, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -160,9 +139,9 @@ define <vscale x 1 x i8> @nxv1i8(i8 %x, i8 %y) {
define <vscale x 2 x i8> @nxv2i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 2 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 2 x i8> %head.x, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -175,9 +154,9 @@ define <vscale x 2 x i8> @nxv2i8(i8 %x, i8 %y) {
define <vscale x 4 x i8> @nxv4i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 4 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 4 x i8> %head.x, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -190,9 +169,9 @@ define <vscale x 4 x i8> @nxv4i8(i8 %x, i8 %y) {
define <vscale x 8 x i8> @nxv8i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv8i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 8 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 8 x i8> %head.x, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -205,9 +184,9 @@ define <vscale x 8 x i8> @nxv8i8(i8 %x, i8 %y) {
define <vscale x 16 x i8> @nxv16i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv16i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 16 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 16 x i8> %head.x, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
@@ -220,9 +199,9 @@ define <vscale x 16 x i8> @nxv16i8(i8 %x, i8 %y) {
define <vscale x 32 x i8> @nxv32i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv32i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 32 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 32 x i8> %head.x, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
@@ -235,9 +214,9 @@ define <vscale x 32 x i8> @nxv32i8(i8 %x, i8 %y) {
define <vscale x 64 x i8> @nxv64i8(i8 %x, i8 %y) {
; CHECK-LABEL: nxv64i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 64 x i8> poison, i8 %x, i32 0
%splat.x = shufflevector <vscale x 64 x i8> %head.x, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
@@ -250,9 +229,9 @@ define <vscale x 64 x i8> @nxv64i8(i8 %x, i8 %y) {
define <vscale x 1 x i16> @nxv1i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv1i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 1 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 1 x i16> %head.x, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -265,9 +244,9 @@ define <vscale x 1 x i16> @nxv1i16(i16 %x, i16 %y) {
define <vscale x 2 x i16> @nxv2i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 2 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 2 x i16> %head.x, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -280,9 +259,9 @@ define <vscale x 2 x i16> @nxv2i16(i16 %x, i16 %y) {
define <vscale x 4 x i16> @nxv4i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 4 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 4 x i16> %head.x, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -295,9 +274,9 @@ define <vscale x 4 x i16> @nxv4i16(i16 %x, i16 %y) {
define <vscale x 8 x i16> @nxv8i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 8 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 8 x i16> %head.x, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -310,9 +289,9 @@ define <vscale x 8 x i16> @nxv8i16(i16 %x, i16 %y) {
define <vscale x 16 x i16> @nxv16i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv16i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 16 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 16 x i16> %head.x, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
@@ -325,9 +304,9 @@ define <vscale x 16 x i16> @nxv16i16(i16 %x, i16 %y) {
define <vscale x 32 x i16> @nxv32i16(i16 %x, i16 %y) {
; CHECK-LABEL: nxv32i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: ret
%head.x = insertelement <vscale x 32 x i16> poison, i16 %x, i32 0
%splat.x = shufflevector <vscale x 32 x i16> %head.x, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
@@ -338,19 +317,12 @@ define <vscale x 32 x i16> @nxv32i16(i16 %x, i16 %y) {
}
define <vscale x 1 x i32> @nxv1i32(i32 %x, i32 %y) {
-; RV32-LABEL: nxv1i32:
-; RV32: # %bb.0:
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: nxv1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vadd.vx v8, v8, a1
-; RV64-NEXT: ret
+; CHECK-LABEL: nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
%head.x = insertelement <vscale x 1 x i32> poison, i32 %x, i32 0
%splat.x = shufflevector <vscale x 1 x i32> %head.x, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
%head.y = insertelement <vscale x 1 x i32> poison, i32 %y, i32 0
@@ -360,19 +332,12 @@ define <vscale x 1 x i32> @nxv1i32(i32 %x, i32 %y) {
}
define <vscale x 2 x i32> @nxv2i32(i32 %x, i32 %y) {
-; RV32-LABEL: nxv2i32:
-; RV32: # %bb.0:
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: nxv2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vadd.vx v8, v8, a1
-; RV64-NEXT: ret
+; CHECK-LABEL: nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
%head.x = insertelement <vscale x 2 x i32> poison, i32 %x, i32 0
%splat.x = shufflevector <vscale x 2 x i32> %head.x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%head.y = insertelement <vscale x 2 x i32> poison, i32 %y, i32 0
@@ -382,19 +347,12 @@ define <vscale x 2 x i32> @nxv2i32(i32 %x, i32 %y) {
}
define <vscale x 4 x i32> @nxv4i32(i32 %x, i32 %y) {
-; RV32-LABEL: nxv4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: nxv4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vadd.vx v8, v8, a1
-; RV64-NEXT: ret
+; CHECK-LABEL: nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
%head.x = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
%splat.x = shufflevector <vscale x 4 x i32> %head.x, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%head.y = insertelement <vscale x 4 x i32> poison, i32 %y, i32 0
@@ -404,19 +362,12 @@ define <vscale x 4 x i32> @nxv4i32(i32 %x, i32 %y) {
}
define <vscale x 8 x i32> @nxv8i32(i32 %x, i32 %y) {
-; RV32-LABEL: nxv8i32:
-; RV32: # %bb.0:
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vsetvli a...
[truncated]
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, the changes in binop-splats.ll look good!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, I remember being stumped on this when I filed the issue but this patch handles it very elegantly :)
Also I hope you don't mind but I updated the PR title to add a tag, https://llvm.org/docs/DeveloperPolicy.html#id18
Do you need someone to land this for you?
Sorry, I forgot to add the tag when I submitted...
Yes, as I don't have write access. Thanks! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
||
// If binop is legal or custom on EltVT, scalarize should be profitable. The | ||
// check is the same as isOperationLegalOrCustom without isTypeLegal. We | ||
// can do this only before LegalTypes, because it may generate illegal `op | ||
// EltVT` from legal `op VT (splat EltVT)`, where EltVT is not legal type but | ||
// the result type of splat is legal. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can remove this comment now
Landed, thanks for your patience with this. I tweaked the PR description a bit to better describe the new approach |
Fixes #65072. This allows binary ops of splats to be scalarized if the operation isn't legal on the element type isn't legal, but is legal on the type it will be legalized to. I assume if an Op is legal both in scalar and vector, choose scalar version should always be better no matter what the type is.
There are some cases that my approach can't scalarize, for example:
https://godbolt.org/z/xzqrKrxvK
xor (splat i1, splat i1)
is generated in late step after LegalizeType, from select. I didn't figure out how to makexor i1, i1
legal at this time.