[SLP]Support vectorization of previously vectorized scalars in split nodes #134286

alexey-bataev · 2025-04-03T17:52:21Z

Patch removes the restriction for the revectorization of the previously
vectorized scalars in split nodes, and moves the cost profitability
check to avoid regressions.

Created using spr 1.3.5

llvmbot · 2025-04-03T17:52:59Z

@llvm/pr-subscribers-llvm-transforms

Author: Alexey Bataev (alexey-bataev)

Changes

Patch removes the restriction for the revectorization of the previously
vectorized scalars in split nodes, and moves the cost profitability
check to avoid regressions.

Full diff: https://github.com/llvm/llvm-project/pull/134286.diff

3 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+16-23)
(modified) llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll (+12-8)
(modified) llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll (+2-3)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a115fec47aeec..8a12962ccf5d5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9213,17 +9213,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         TTI.preferAlternateOpcodeVectorization() || !SplitAlternateInstructions)
       return false;
 
-    // Any value is used in split node already - just gather.
-    if (any_of(VL, [&](Value *V) {
-          return ScalarsInSplitNodes.contains(V) || isVectorized(V);
-        })) {
-      if (TryToFindDuplicates(S)) {
-        auto Invalid = ScheduleBundle::invalid();
-        newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
-                     ReuseShuffleIndices);
-      }
-      return true;
-    }
     SmallVector<Value *> Op1, Op2;
     OrdersType ReorderIndices(VL.size(), VL.size());
     SmallBitVector Op1Indices(VL.size());
@@ -9282,6 +9271,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
     // as alternate ops.
     if (NumParts >= VL.size())
       return false;
+    constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
+    InstructionCost InsertCost = ::getShuffleCost(
+        TTI, TTI::SK_InsertSubvector, VecTy, {}, Kind, Op1.size(), Op2VecTy);
+    FixedVectorType *SubVecTy =
+        getWidenedType(ScalarTy, std::max(Op1.size(), Op2.size()));
+    InstructionCost NewShuffleCost =
+        ::getShuffleCost(TTI, TTI::SK_PermuteTwoSrc, SubVecTy, Mask, Kind);
+    if (LocalState.getOpcode() != Instruction::ICmp &&
+        LocalState.getOpcode() != Instruction::FCmp &&
+        LocalState.getAltOpcode() != Instruction::ICmp &&
+        LocalState.getAltOpcode() != Instruction::FCmp && NumParts <= 1 &&
+        (Mask.empty() || InsertCost >= NewShuffleCost))
+      return false;
     if ((LocalState.getMainOp()->isBinaryOp() &&
          LocalState.getAltOp()->isBinaryOp() &&
          (LocalState.isShiftOp() || LocalState.isBitwiseLogicOp() ||
@@ -9289,15 +9291,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         (LocalState.getMainOp()->isCast() && LocalState.getAltOp()->isCast()) ||
         (LocalState.getMainOp()->isUnaryOp() &&
          LocalState.getAltOp()->isUnaryOp())) {
-      constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
-      InstructionCost InsertCost = ::getShuffleCost(
-          TTI, TTI::SK_InsertSubvector, VecTy, {}, Kind, Op1.size(), Op2VecTy);
-      FixedVectorType *SubVecTy =
-          getWidenedType(ScalarTy, std::max(Op1.size(), Op2.size()));
-      InstructionCost NewShuffleCost =
-          ::getShuffleCost(TTI, TTI::SK_PermuteTwoSrc, SubVecTy, Mask, Kind);
-      if (NumParts <= 1 && (Mask.empty() || InsertCost >= NewShuffleCost))
-        return false;
       InstructionCost OriginalVecOpsCost =
           TTI.getArithmeticInstrCost(Opcode0, VecTy, Kind) +
           TTI.getArithmeticInstrCost(Opcode1, VecTy, Kind);
@@ -9500,9 +9493,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, VecTy, {}, Kind) +
           ::getScalarizationOverhead(*TTI, ScalarTy, VecTy, Extracted,
                                      /*Insert=*/false, /*Extract=*/true, Kind);
-      InstructionCost ScalarizeCostEstimate =
-          ::getScalarizationOverhead(*TTI, ScalarTy, VecTy, Vectorized,
-                                     /*Insert=*/true, /*Extract=*/false, Kind);
+      InstructionCost ScalarizeCostEstimate = ::getScalarizationOverhead(
+          *TTI, ScalarTy, VecTy, Vectorized,
+          /*Insert=*/true, /*Extract=*/false, Kind, /*ForPoisonSrc=*/false);
       PreferScalarize = VectorizeCostEstimate > ScalarizeCostEstimate;
     }
     if (PreferScalarize) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll b/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll
index 38e9ba7ce7028..1c4f51700d083 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll
@@ -7,18 +7,22 @@ define i32 @a() {
 ; CHECK-NEXT:    br label %[[BB1:.*]]
 ; CHECK:       [[BB1]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x i8> [ zeroinitializer, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[BB1]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x i8> [ zeroinitializer, [[TMP0]] ], [ [[TMP17:%.*]], %[[BB1]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
 ; CHECK-NEXT:    [[TMP6]] = load <4 x i8>, ptr null, align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i8> [[TMP6]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP10]], <4 x i8> [[TMP6]], i64 4)
-; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP18]], <8 x i32> <i32 1, i32 2, i32 3, i32 12, i32 3, i32 12, i32 13, i32 14>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <8 x i32> <i32 poison, i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3>
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <8 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3, i32 poison>
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <8 x i8> [[TMP10]], <8 x i8> [[TMP11]], <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP13]], <8 x i8> [[TMP18]], <8 x i32> <i32 1, i32 3, i32 2, i32 9, i32 3, i32 11, i32 9, i32 13>
 ; CHECK-NEXT:    [[TMP22:%.*]] = xor <8 x i8> [[TMP18]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = xor <8 x i8> [[TMP22]], [[TMP5]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i8> [[TMP23]], <8 x i8> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT:    store <8 x i8> [[TMP13]], ptr null, align 4
+; CHECK-NEXT:    store <8 x i8> [[TMP23]], ptr null, align 4
+; CHECK-NEXT:    [[TMP17]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    br label %[[BB1]]
 ;
   br label %1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll
index e9884b24e1078..b7b6c10137b64 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll
@@ -16,9 +16,8 @@ define i1 @test(ptr %0, ptr %1, <2 x float> %2, <2 x float> %3, <2 x float> %4)
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[TMP9]], i32 7
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP17:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP16]], <8 x float> [[TMP15]], i64 8)
-; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> [[TMP12]], <16 x i32> <i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 14, i32 14, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 poison>
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <16 x float> [[TMP18]], float [[TMP9]], i32 15
-; CHECK-NEXT:    [[TMP20:%.*]] = fmul <16 x float> [[TMP17]], [[TMP19]]
+; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP16]], <8 x float> [[TMP15]], i64 8)
+; CHECK-NEXT:    [[TMP20:%.*]] = fmul <16 x float> [[TMP18]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP20]])
 ; CHECK-NEXT:    [[TMP22:%.*]] = call float @foo(float [[TMP21]])
 ; CHECK-NEXT:    ret i1 false

llvmbot · 2025-04-03T17:53:00Z

@llvm/pr-subscribers-vectorizers

Author: Alexey Bataev (alexey-bataev)

Changes

Patch removes the restriction for the revectorization of the previously
vectorized scalars in split nodes, and moves the cost profitability
check to avoid regressions.

Full diff: https://github.com/llvm/llvm-project/pull/134286.diff

3 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+16-23)
(modified) llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll (+12-8)
(modified) llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll (+2-3)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a115fec47aeec..8a12962ccf5d5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9213,17 +9213,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         TTI.preferAlternateOpcodeVectorization() || !SplitAlternateInstructions)
       return false;
 
-    // Any value is used in split node already - just gather.
-    if (any_of(VL, [&](Value *V) {
-          return ScalarsInSplitNodes.contains(V) || isVectorized(V);
-        })) {
-      if (TryToFindDuplicates(S)) {
-        auto Invalid = ScheduleBundle::invalid();
-        newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
-                     ReuseShuffleIndices);
-      }
-      return true;
-    }
     SmallVector<Value *> Op1, Op2;
     OrdersType ReorderIndices(VL.size(), VL.size());
     SmallBitVector Op1Indices(VL.size());
@@ -9282,6 +9271,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
     // as alternate ops.
     if (NumParts >= VL.size())
       return false;
+    constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
+    InstructionCost InsertCost = ::getShuffleCost(
+        TTI, TTI::SK_InsertSubvector, VecTy, {}, Kind, Op1.size(), Op2VecTy);
+    FixedVectorType *SubVecTy =
+        getWidenedType(ScalarTy, std::max(Op1.size(), Op2.size()));
+    InstructionCost NewShuffleCost =
+        ::getShuffleCost(TTI, TTI::SK_PermuteTwoSrc, SubVecTy, Mask, Kind);
+    if (LocalState.getOpcode() != Instruction::ICmp &&
+        LocalState.getOpcode() != Instruction::FCmp &&
+        LocalState.getAltOpcode() != Instruction::ICmp &&
+        LocalState.getAltOpcode() != Instruction::FCmp && NumParts <= 1 &&
+        (Mask.empty() || InsertCost >= NewShuffleCost))
+      return false;
     if ((LocalState.getMainOp()->isBinaryOp() &&
          LocalState.getAltOp()->isBinaryOp() &&
          (LocalState.isShiftOp() || LocalState.isBitwiseLogicOp() ||
@@ -9289,15 +9291,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         (LocalState.getMainOp()->isCast() && LocalState.getAltOp()->isCast()) ||
         (LocalState.getMainOp()->isUnaryOp() &&
          LocalState.getAltOp()->isUnaryOp())) {
-      constexpr TTI::TargetCostKind Kind = TTI::TCK_RecipThroughput;
-      InstructionCost InsertCost = ::getShuffleCost(
-          TTI, TTI::SK_InsertSubvector, VecTy, {}, Kind, Op1.size(), Op2VecTy);
-      FixedVectorType *SubVecTy =
-          getWidenedType(ScalarTy, std::max(Op1.size(), Op2.size()));
-      InstructionCost NewShuffleCost =
-          ::getShuffleCost(TTI, TTI::SK_PermuteTwoSrc, SubVecTy, Mask, Kind);
-      if (NumParts <= 1 && (Mask.empty() || InsertCost >= NewShuffleCost))
-        return false;
       InstructionCost OriginalVecOpsCost =
           TTI.getArithmeticInstrCost(Opcode0, VecTy, Kind) +
           TTI.getArithmeticInstrCost(Opcode1, VecTy, Kind);
@@ -9500,9 +9493,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, VecTy, {}, Kind) +
           ::getScalarizationOverhead(*TTI, ScalarTy, VecTy, Extracted,
                                      /*Insert=*/false, /*Extract=*/true, Kind);
-      InstructionCost ScalarizeCostEstimate =
-          ::getScalarizationOverhead(*TTI, ScalarTy, VecTy, Vectorized,
-                                     /*Insert=*/true, /*Extract=*/false, Kind);
+      InstructionCost ScalarizeCostEstimate = ::getScalarizationOverhead(
+          *TTI, ScalarTy, VecTy, Vectorized,
+          /*Insert=*/true, /*Extract=*/false, Kind, /*ForPoisonSrc=*/false);
       PreferScalarize = VectorizeCostEstimate > ScalarizeCostEstimate;
     }
     if (PreferScalarize) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll b/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll
index 38e9ba7ce7028..1c4f51700d083 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/splat-score-adjustment.ll
@@ -7,18 +7,22 @@ define i32 @a() {
 ; CHECK-NEXT:    br label %[[BB1:.*]]
 ; CHECK:       [[BB1]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x i8> [ zeroinitializer, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[BB1]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x i8> [ zeroinitializer, [[TMP0]] ], [ [[TMP17:%.*]], %[[BB1]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
 ; CHECK-NEXT:    [[TMP6]] = load <4 x i8>, ptr null, align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i8> [[TMP6]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP10]], <4 x i8> [[TMP6]], i64 4)
-; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP18]], <8 x i32> <i32 1, i32 2, i32 3, i32 12, i32 3, i32 12, i32 13, i32 14>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <8 x i32> <i32 poison, i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3>
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <8 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3, i32 poison>
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <8 x i8> [[TMP10]], <8 x i8> [[TMP11]], <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP13]], <8 x i8> [[TMP18]], <8 x i32> <i32 1, i32 3, i32 2, i32 9, i32 3, i32 11, i32 9, i32 13>
 ; CHECK-NEXT:    [[TMP22:%.*]] = xor <8 x i8> [[TMP18]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = xor <8 x i8> [[TMP22]], [[TMP5]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i8> [[TMP23]], <8 x i8> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT:    store <8 x i8> [[TMP13]], ptr null, align 4
+; CHECK-NEXT:    store <8 x i8> [[TMP23]], ptr null, align 4
+; CHECK-NEXT:    [[TMP17]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    br label %[[BB1]]
 ;
   br label %1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll
index e9884b24e1078..b7b6c10137b64 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll
@@ -16,9 +16,8 @@ define i1 @test(ptr %0, ptr %1, <2 x float> %2, <2 x float> %3, <2 x float> %4)
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[TMP9]], i32 7
 ; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP17:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP16]], <8 x float> [[TMP15]], i64 8)
-; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> [[TMP12]], <16 x i32> <i32 8, i32 9, i32 9, i32 9, i32 9, i32 9, i32 14, i32 14, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 poison>
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <16 x float> [[TMP18]], float [[TMP9]], i32 15
-; CHECK-NEXT:    [[TMP20:%.*]] = fmul <16 x float> [[TMP17]], [[TMP19]]
+; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP16]], <8 x float> [[TMP15]], i64 8)
+; CHECK-NEXT:    [[TMP20:%.*]] = fmul <16 x float> [[TMP18]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP20]])
 ; CHECK-NEXT:    [[TMP22:%.*]] = call float @foo(float [[TMP21]])
 ; CHECK-NEXT:    ret i1 false

alexey-bataev · 2025-04-09T17:44:32Z

Ping!

Created using spr 1.3.5

RKSimon

LGTM with one minor query

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Created using spr 1.3.5

…s in split nodes Patch removes the restriction for the revectorization of the previously vectorized scalars in split nodes, and moves the cost profitability check to avoid regressions. Reviewers: hiraditya, RKSimon Reviewed By: RKSimon Pull Request: llvm/llvm-project#134286

Sterling-Augustine · 2025-04-15T19:41:08Z

This pr makes the following test case crash. Stack trace below.

$ cat reduced.ll

define i64 @Foo(ptr align 8 dereferenceable(344) %0, i64 %1) {
  %3 = getelementptr i8, ptr %0, i64 104
  %4 = getelementptr i8, ptr %0, i64 112
  %5 = getelementptr i8, ptr %0, i64 24
  %6 = load i64, ptr %3, align 8
  %7 = load i64, ptr %4, align 8
  %8 = load i64, ptr %5, align 8
  %9 = load i64, ptr %0, align 8
  br label %10

10:                                               ; preds = %18, %2
  %11 = phi i64 [ %9, %2 ], [ 0, %18 ]
  %12 = phi i64 [ %8, %2 ], [ %12, %18 ]
  %13 = phi i64 [ %7, %2 ], [ 0, %18 ]
  %14 = phi i64 [ %6, %2 ], [ 0, %18 ]
  switch i32 0, label %15 [
    i32 0, label %18
  ]

15:                                               ; preds = %10
  %16 = tail call i64 @llvm.umin.i64(i64 0, i64 0)
  %17 = tail call i64 @llvm.umax.i64(i64 0, i64 0)
  br label %18

18:                                               ; preds = %15, %10
  %19 = phi i64 [ %17, %15 ], [ 0, %10 ]
  %20 = phi i64 [ %16, %15 ], [ 0, %10 ]
  %21 = phi i64 [ %11, %15 ], [ 0, %10 ]
  %22 = phi i64 [ %12, %15 ], [ 0, %10 ]
  %23 = phi i64 [ %13, %15 ], [ %1, %10 ]
  %24 = phi i64 [ %14, %15 ], [ 0, %10 ]
  br i1 false, label %.loopexit206, label %10

.loopexit206:                                     ; preds = %18
  switch i32 0, label %26 [
    i32 0, label %.cont174
    i32 1, label %25
  ]

25:                                               ; preds = %.loopexit206
  br label %.cont174

26:                                               ; preds = %.loopexit206
  %27 = tail call i64 @llvm.umin.i64(i64 0, i64 0)
  %28 = tail call i64 @llvm.umax.i64(i64 0, i64 0)
  br label %.cont174

.cont174:                                         ; preds = %26, %25, %.loopexit206
  %.sroa.139.1 = phi i64 [ %28, %26 ], [ %19, %25 ], [ %19, %.loopexit206 ]
  %.sroa.133.1 = phi i64 [ %27, %26 ], [ 0, %25 ], [ %20, %.loopexit206 ]
  %.sroa.81.1 = phi i64 [ %23, %26 ], [ 0, %25 ], [ %23, %.loopexit206 ]
  %.sroa.75.1 = phi i64 [ %24, %26 ], [ 0, %25 ], [ %24, %.loopexit206 ]
  %.sroa.21.1 = phi i64 [ %21, %26 ], [ 0, %25 ], [ %21, %.loopexit206 ]
  %.sroa.15.1 = phi i64 [ %22, %26 ], [ 0, %25 ], [ %22, %.loopexit206 ]
  %29 = phi i64 [ %28, %26 ], [ 0, %25 ], [ %19, %.loopexit206 ]
  %30 = phi i64 [ %27, %26 ], [ 0, %25 ], [ %20, %.loopexit206 ]
  ret i64 0
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.umax.i64(i64, i64) #0

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.umin.i64(i64, i64) #0

; uselistorder directives
uselistorder ptr @llvm.umax.i64, { 1, 0 }
uselistorder ptr @llvm.umin.i64, { 1, 0 }

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
augustine:~/repro $ cat reduced.ll | ~/llvm/build/bin/opt -p slp-vectorizer -o /tmp/junk -mtriple=x86_64-unknown-linux-gnu -mattr=+aes -mattr=+cx16 -mattr=+sse4.2 -mattr=+pclmul -mattr=+prfchw -mattr=+avx
opt: /usr/local/google/home/saugustine/llvm/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:3861: ArrayRef<Value *> llvm::slpvectorizer::BoUpSLP::TreeEntry::getOperand(unsigned int) const: Assertion `OpIdx < Operands.size() && "Off bounds"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /usr/local/google/home/saugustine/llvm/build/bin/opt -p slp-vectorizer -o /tmp/junk -mtriple=x86_64-unknown-linux-gnu -mattr=+aes -mattr=+cx16 -mattr=+sse4.2 -mattr=+pclmul -mattr=+prfchw -mattr=+avx
1.	Running pass "function(slp-vectorizer)" on module "<stdin>"
2.	Running pass "slp-vectorizer" on function "Foo"
 #0 0x000056001761dda8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x4a21da8)
 #1 0x000056001761b85e llvm::sys::RunSignalHandlers() (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x4a1f85e)
 #2 0x000056001761e431 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007fac29849e20 (/lib/x86_64-linux-gnu/libc.so.6+0x3fe20)
 #4 0x00007fac2989de5c __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007fac29849d82 raise ./signal/../sysdeps/posix/raise.c:27:6
 #6 0x00007fac298324f0 abort ./stdlib/abort.c:81:7
 #7 0x00007fac29832418 _nl_load_domain ./intl/loadmsgcat.c:1177:9
 #8 0x00007fac29842692 (/lib/x86_64-linux-gnu/libc.so.6+0x38692)
 #9 0x00005600167de601 llvm::slpvectorizer::BoUpSLP::getOperandEntry(llvm::slpvectorizer::BoUpSLP::TreeEntry const*, unsigned int) const (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3be2601)
#10 0x00005600168761ea bool __gnu_cxx::__ops::_Iter_negate<llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(bool)::$_8>::operator()<llvm::slpvectorizer::BoUpSLP::TreeEntry**>(llvm::slpvectorizer::BoUpSLP::TreeEntry**) SLPVectorizer.cpp:0:0
#11 0x00005600167b8572 llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(bool) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3bbc572)
#12 0x0000560016823fb9 llvm::SLPVectorizerPass::tryToVectorizeList(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP&, bool) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3c27fb9)
#13 0x0000560016829dfb bool tryToVectorizeSequence<llvm::Value>(llvm::SmallVectorImpl<llvm::Value*>&, llvm::function_ref<bool (llvm::Value*, llvm::Value*)>, llvm::function_ref<bool (llvm::Value*, llvm::Value*)>, llvm::function_ref<bool (llvm::ArrayRef<llvm::Value*>, bool)>, bool, llvm::slpvectorizer::BoUpSLP&) SLPVectorizer.cpp:0:0
#14 0x000056001681cb83 llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3c20b83)
#15 0x000056001681a5fd llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3c1e5fd)
#16 0x0000560016819ba6 llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3c1dba6)
#17 0x000056001574145d llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x2b4545d)
#18 0x000056001745ebca llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x4862bca)
#19 0x0000560013c8835d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x108c35d)
#20 0x0000560017462ab7 llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x4866ab7)
#21 0x0000560013c88b1d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x108cb1d)
#22 0x000056001745dc7a llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x4861c7a)
#23 0x00005600137e3737 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0xbe7737)
#24 0x00005600137d7a9e optMain (/usr/local/google/home/saugustine/llvm/build/bin/opt+0xbdba9e)
#25 0x00007fac29833d68 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:74:3
#26 0x00007fac29833e25 call_init ./csu/../csu/libc-start.c:128:20
#27 0x00007fac29833e25 __libc_start_main ./csu/../csu/libc-start.c:347:5
#28 0x00005600137d1221 _start (/usr/local/google/home/saugustine/llvm/build/bin/opt+0xbd5221)

alexey-bataev · 2025-04-15T20:34:12Z

This pr makes the following test case crash. Stack trace below.

$ cat reduced.ll

define i64 @Foo(ptr align 8 dereferenceable(344) %0, i64 %1) {
  %3 = getelementptr i8, ptr %0, i64 104
  %4 = getelementptr i8, ptr %0, i64 112
  %5 = getelementptr i8, ptr %0, i64 24
  %6 = load i64, ptr %3, align 8
  %7 = load i64, ptr %4, align 8
  %8 = load i64, ptr %5, align 8
  %9 = load i64, ptr %0, align 8
  br label %10

10:                                               ; preds = %18, %2
  %11 = phi i64 [ %9, %2 ], [ 0, %18 ]
  %12 = phi i64 [ %8, %2 ], [ %12, %18 ]
  %13 = phi i64 [ %7, %2 ], [ 0, %18 ]
  %14 = phi i64 [ %6, %2 ], [ 0, %18 ]
  switch i32 0, label %15 [
    i32 0, label %18
  ]

15:                                               ; preds = %10
  %16 = tail call i64 @llvm.umin.i64(i64 0, i64 0)
  %17 = tail call i64 @llvm.umax.i64(i64 0, i64 0)
  br label %18

18:                                               ; preds = %15, %10
  %19 = phi i64 [ %17, %15 ], [ 0, %10 ]
  %20 = phi i64 [ %16, %15 ], [ 0, %10 ]
  %21 = phi i64 [ %11, %15 ], [ 0, %10 ]
  %22 = phi i64 [ %12, %15 ], [ 0, %10 ]
  %23 = phi i64 [ %13, %15 ], [ %1, %10 ]
  %24 = phi i64 [ %14, %15 ], [ 0, %10 ]
  br i1 false, label %.loopexit206, label %10

.loopexit206:                                     ; preds = %18
  switch i32 0, label %26 [
    i32 0, label %.cont174
    i32 1, label %25
  ]

25:                                               ; preds = %.loopexit206
  br label %.cont174

26:                                               ; preds = %.loopexit206
  %27 = tail call i64 @llvm.umin.i64(i64 0, i64 0)
  %28 = tail call i64 @llvm.umax.i64(i64 0, i64 0)
  br label %.cont174

.cont174:                                         ; preds = %26, %25, %.loopexit206
  %.sroa.139.1 = phi i64 [ %28, %26 ], [ %19, %25 ], [ %19, %.loopexit206 ]
  %.sroa.133.1 = phi i64 [ %27, %26 ], [ 0, %25 ], [ %20, %.loopexit206 ]
  %.sroa.81.1 = phi i64 [ %23, %26 ], [ 0, %25 ], [ %23, %.loopexit206 ]
  %.sroa.75.1 = phi i64 [ %24, %26 ], [ 0, %25 ], [ %24, %.loopexit206 ]
  %.sroa.21.1 = phi i64 [ %21, %26 ], [ 0, %25 ], [ %21, %.loopexit206 ]
  %.sroa.15.1 = phi i64 [ %22, %26 ], [ 0, %25 ], [ %22, %.loopexit206 ]
  %29 = phi i64 [ %28, %26 ], [ 0, %25 ], [ %19, %.loopexit206 ]
  %30 = phi i64 [ %27, %26 ], [ 0, %25 ], [ %20, %.loopexit206 ]
  ret i64 0
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.umax.i64(i64, i64) #0

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.umin.i64(i64, i64) #0

; uselistorder directives
uselistorder ptr @llvm.umax.i64, { 1, 0 }
uselistorder ptr @llvm.umin.i64, { 1, 0 }

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
augustine:~/repro $ cat reduced.ll | ~/llvm/build/bin/opt -p slp-vectorizer -o /tmp/junk -mtriple=x86_64-unknown-linux-gnu -mattr=+aes -mattr=+cx16 -mattr=+sse4.2 -mattr=+pclmul -mattr=+prfchw -mattr=+avx
opt: /usr/local/google/home/saugustine/llvm/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:3861: ArrayRef<Value *> llvm::slpvectorizer::BoUpSLP::TreeEntry::getOperand(unsigned int) const: Assertion `OpIdx < Operands.size() && "Off bounds"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /usr/local/google/home/saugustine/llvm/build/bin/opt -p slp-vectorizer -o /tmp/junk -mtriple=x86_64-unknown-linux-gnu -mattr=+aes -mattr=+cx16 -mattr=+sse4.2 -mattr=+pclmul -mattr=+prfchw -mattr=+avx
1.	Running pass "function(slp-vectorizer)" on module "<stdin>"
2.	Running pass "slp-vectorizer" on function "Foo"
 #0 0x000056001761dda8 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x4a21da8)
 #1 0x000056001761b85e llvm::sys::RunSignalHandlers() (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x4a1f85e)
 #2 0x000056001761e431 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007fac29849e20 (/lib/x86_64-linux-gnu/libc.so.6+0x3fe20)
 #4 0x00007fac2989de5c __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007fac29849d82 raise ./signal/../sysdeps/posix/raise.c:27:6
 #6 0x00007fac298324f0 abort ./stdlib/abort.c:81:7
 #7 0x00007fac29832418 _nl_load_domain ./intl/loadmsgcat.c:1177:9
 #8 0x00007fac29842692 (/lib/x86_64-linux-gnu/libc.so.6+0x38692)
 #9 0x00005600167de601 llvm::slpvectorizer::BoUpSLP::getOperandEntry(llvm::slpvectorizer::BoUpSLP::TreeEntry const*, unsigned int) const (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3be2601)
#10 0x00005600168761ea bool __gnu_cxx::__ops::_Iter_negate<llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(bool)::$_8>::operator()<llvm::slpvectorizer::BoUpSLP::TreeEntry**>(llvm::slpvectorizer::BoUpSLP::TreeEntry**) SLPVectorizer.cpp:0:0
#11 0x00005600167b8572 llvm::slpvectorizer::BoUpSLP::reorderBottomToTop(bool) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3bbc572)
#12 0x0000560016823fb9 llvm::SLPVectorizerPass::tryToVectorizeList(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP&, bool) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3c27fb9)
#13 0x0000560016829dfb bool tryToVectorizeSequence<llvm::Value>(llvm::SmallVectorImpl<llvm::Value*>&, llvm::function_ref<bool (llvm::Value*, llvm::Value*)>, llvm::function_ref<bool (llvm::Value*, llvm::Value*)>, llvm::function_ref<bool (llvm::ArrayRef<llvm::Value*>, bool)>, bool, llvm::slpvectorizer::BoUpSLP&) SLPVectorizer.cpp:0:0
#14 0x000056001681cb83 llvm::SLPVectorizerPass::vectorizeChainsInBlock(llvm::BasicBlock*, llvm::slpvectorizer::BoUpSLP&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3c20b83)
#15 0x000056001681a5fd llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3c1e5fd)
#16 0x0000560016819ba6 llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x3c1dba6)
#17 0x000056001574145d llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x2b4545d)
#18 0x000056001745ebca llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x4862bca)
#19 0x0000560013c8835d llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x108c35d)
#20 0x0000560017462ab7 llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x4866ab7)
#21 0x0000560013c88b1d llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x108cb1d)
#22 0x000056001745dc7a llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0x4861c7a)
#23 0x00005600137e3737 llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool) (/usr/local/google/home/saugustine/llvm/build/bin/opt+0xbe7737)
#24 0x00005600137d7a9e optMain (/usr/local/google/home/saugustine/llvm/build/bin/opt+0xbdba9e)
#25 0x00007fac29833d68 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:74:3
#26 0x00007fac29833e25 call_init ./csu/../csu/libc-start.c:128:20
#27 0x00007fac29833e25 __libc_start_main ./csu/../csu/libc-start.c:347:5
#28 0x00005600137d1221 _start (/usr/local/google/home/saugustine/llvm/build/bin/opt+0xbd5221)

Fixed in 85eb44e

…nodes Patch removes the restriction for the revectorization of the previously vectorized scalars in split nodes, and moves the cost profitability check to avoid regressions. Reviewers: hiraditya, RKSimon Reviewed By: RKSimon Pull Request: llvm#134286

[𝘀𝗽𝗿] initial version

3787680

Created using spr 1.3.5

llvmbot added vectorizers llvm:transforms labels Apr 3, 2025

alexey-bataev requested review from hiraditya and RKSimon April 3, 2025 17:52

Rebase

f64d965

Created using spr 1.3.5

RKSimon approved these changes Apr 10, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Outdated Show resolved Hide resolved

Rebase, address comment

ee0bcbe

Created using spr 1.3.5

alexey-bataev merged commit aaaa2a3 into main Apr 10, 2025
6 of 10 checks passed

alexey-bataev deleted the users/alexey-bataev/spr/slpsupport-vectorization-of-previously-vectorized-scalars-in-split-nodes branch April 10, 2025 16:06

dianqk mentioned this pull request Apr 26, 2025

Abnormally large compilation time with -O2 for a small file on x86_64-linux-gnu #135965

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[SLP]Support vectorization of previously vectorized scalars in split nodes #134286

[SLP]Support vectorization of previously vectorized scalars in split nodes #134286

alexey-bataev commented Apr 3, 2025

llvmbot commented Apr 3, 2025

llvmbot commented Apr 3, 2025

alexey-bataev commented Apr 9, 2025

RKSimon left a comment

Sterling-Augustine commented Apr 15, 2025

alexey-bataev commented Apr 15, 2025

[SLP]Support vectorization of previously vectorized scalars in split nodes #134286

[SLP]Support vectorization of previously vectorized scalars in split nodes #134286

Conversation

alexey-bataev commented Apr 3, 2025

llvmbot commented Apr 3, 2025

llvmbot commented Apr 3, 2025

alexey-bataev commented Apr 9, 2025

RKSimon left a comment

Choose a reason for hiding this comment

Sterling-Augustine commented Apr 15, 2025

alexey-bataev commented Apr 15, 2025