Skip to content

[LV] Extend FindLastIV to unsigned case #141752

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from

Conversation

artagnon
Copy link
Contributor

In an effort to not have two different RecurKinds, one for the signed case, and another for the unsigned case, introduce RecurrenceDescriptor::isReduxSigned() to indicate whether the the RecurKind is of the signed or unsigned variant. Demonstrate its use by extending FindLastIV to the unsigned case.

In an effort to not have two different RecurKinds, one for the signed
case, and another for the unsigned case, introduce
RecurrenceDescriptor::isReduxSigned() to indicate whether the the
RecurKind is of the signed or unsigned variant. Demonstrate its use by
extending FindLastIV to the unsigned case.
@llvmbot
Copy link
Member

llvmbot commented May 28, 2025

@llvm/pr-subscribers-vectorizers
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: Ramkumar Ramachandra (artagnon)

Changes

In an effort to not have two different RecurKinds, one for the signed case, and another for the unsigned case, introduce RecurrenceDescriptor::isReduxSigned() to indicate whether the the RecurKind is of the signed or unsigned variant. Demonstrate its use by extending FindLastIV to the unsigned case.


Patch is 31.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141752.diff

5 Files Affected:

  • (modified) llvm/include/llvm/Analysis/IVDescriptors.h (+24-13)
  • (modified) llvm/lib/Analysis/IVDescriptors.cpp (+35-21)
  • (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+1-1)
  • (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+4-3)
  • (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll (+146-56)
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index d94ffa7287db3..db2731869ed20 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -79,12 +79,13 @@ class RecurrenceDescriptor {
 
   RecurrenceDescriptor(Value *Start, Instruction *Exit, StoreInst *Store,
                        RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
-                       Type *RT, bool Signed, bool Ordered,
-                       SmallPtrSetImpl<Instruction *> &CI,
+                       Type *RT, bool IsResultSigned, bool IsReduxSigned,
+                       bool Ordered, SmallPtrSetImpl<Instruction *> &CI,
                        unsigned MinWidthCastToRecurTy)
       : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
         Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
-        IsSigned(Signed), IsOrdered(Ordered),
+        IsResultSigned(IsResultSigned), IsReduxSigned(IsReduxSigned),
+        IsOrdered(Ordered),
         MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
     CastInsts.insert_range(CI);
   }
@@ -96,12 +97,14 @@ class RecurrenceDescriptor {
         : IsRecurrence(IsRecur), PatternLastInst(I),
           RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
 
-    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
-        : IsRecurrence(true), PatternLastInst(I), RecKind(K),
-          ExactFPMathInst(ExactFP) {}
+    InstDesc(Instruction *I, RecurKind K, bool IsSigned = false)
+        : IsRecurrence(true), IsSigned(IsSigned), PatternLastInst(I),
+          RecKind(K) {}
 
     bool isRecurrence() const { return IsRecurrence; }
 
+    bool isSigned() const { return IsSigned; }
+
     bool needsExactFPMath() const { return ExactFPMathInst != nullptr; }
 
     Instruction *getExactFPMathInst() const { return ExactFPMathInst; }
@@ -113,13 +116,15 @@ class RecurrenceDescriptor {
   private:
     // Is this instruction a recurrence candidate.
     bool IsRecurrence;
+    // Is this recurrence a signed variant.
+    bool IsSigned = false;
     // The last instruction in a min/max pattern (select of the select(icmp())
     // pattern), or the current recurrence instruction otherwise.
     Instruction *PatternLastInst;
     // If this is a min/max pattern.
     RecurKind RecKind;
     // Recurrence does not allow floating-point reassociation.
-    Instruction *ExactFPMathInst;
+    Instruction *ExactFPMathInst = nullptr;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -268,8 +273,9 @@ class RecurrenceDescriptor {
   Value *getSentinelValue() const {
     assert(isFindLastIVRecurrenceKind(Kind) && "Unexpected recurrence kind");
     Type *Ty = StartValue->getType();
-    return ConstantInt::get(Ty,
-                            APInt::getSignedMinValue(Ty->getIntegerBitWidth()));
+    unsigned BW = Ty->getIntegerBitWidth();
+    return ConstantInt::get(Ty, isReduxSigned() ? APInt::getSignedMinValue(BW)
+                                                : APInt::getMinValue(BW));
   }
 
   /// Returns a reference to the instructions used for type-promoting the
@@ -281,8 +287,11 @@ class RecurrenceDescriptor {
     return MinWidthCastToRecurrenceType;
   }
 
-  /// Returns true if all source operands of the recurrence are SExtInsts.
-  bool isSigned() const { return IsSigned; }
+  /// Returns true if the reduction result is signed.
+  bool isResultSigned() const { return IsResultSigned; }
+
+  /// Returns true if the reduction redux is signed.
+  bool isReduxSigned() const { return IsReduxSigned; }
 
   /// Expose an ordered FP reduction to the instance users.
   bool isOrdered() const { return IsOrdered; }
@@ -318,8 +327,10 @@ class RecurrenceDescriptor {
   Instruction *ExactFPMathInst = nullptr;
   // The type of the recurrence.
   Type *RecurrenceType = nullptr;
-  // True if all source operands of the recurrence are SExtInsts.
-  bool IsSigned = false;
+  // True if reduction result is signed.
+  bool IsResultSigned = false;
+  // True if reduction redux is signed.
+  bool IsReduxSigned = false;
   // True if this recurrence can be treated as an in-order reduction.
   // Currently only a non-reassociative FAdd can be considered in-order,
   // if it is also the only FAdd in the PHI's use chain.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b7c7bcab168cc..8d568236f0053 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -88,7 +88,7 @@ static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
 }
 
 /// Compute the minimal bit width needed to represent a reduction whose exit
-/// instruction is given by Exit.
+/// instruction is given by Exit, along with its signedness.
 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
                                                      DemandedBits *DB,
                                                      AssumptionCache *AC,
@@ -255,7 +255,7 @@ bool RecurrenceDescriptor::AddReductionVar(
   SmallPtrSet<Instruction *, 4> CastInsts;
   unsigned MinWidthCastToRecurrenceType;
   Instruction *Start = Phi;
-  bool IsSigned = false;
+  bool IsResultSigned = false, IsReduxSigned = false;
 
   SmallPtrSet<Instruction *, 8> VisitedInsts;
   SmallVector<Instruction *, 8> Worklist;
@@ -396,6 +396,7 @@ bool RecurrenceDescriptor::AddReductionVar(
       //       state accurate while processing the worklist?
       if (ReduxDesc.getRecKind() != RecurKind::None)
         Kind = ReduxDesc.getRecKind();
+      IsReduxSigned = ReduxDesc.isSigned();
     }
 
     bool IsASelect = isa<SelectInst>(Cur);
@@ -565,7 +566,7 @@ bool RecurrenceDescriptor::AddReductionVar(
     //       smaller type. We should just generate a correctly typed expression
     //       to begin with.
     Type *ComputedType;
-    std::tie(ComputedType, IsSigned) =
+    std::tie(ComputedType, IsResultSigned) =
         computeRecurrenceType(ExitInstruction, DB, AC, DT);
     if (ComputedType != RecurrenceType)
       return false;
@@ -595,8 +596,9 @@ bool RecurrenceDescriptor::AddReductionVar(
 
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
-                          FMF, ExactFPMathInst, RecurrenceType, IsSigned,
-                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
+                          FMF, ExactFPMathInst, RecurrenceType, IsResultSigned,
+                          IsReduxSigned, IsOrdered, CastInsts,
+                          MinWidthCastToRecurrenceType);
   RedDes = RD;
 
   return true;
@@ -700,47 +702,59 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
                                      m_Value(NonRdxPhi)))))
     return InstDesc(false, I);
 
-  auto IsIncreasingLoopInduction = [&](Value *V) {
+  // Returns a non-nullopt boolean indicating the signedness of the recurrence
+  // when a valid FindLastIV pattern is found.
+  auto GetInductionSignedness = [&](Value *V) -> std::optional<bool> {
     Type *Ty = V->getType();
     if (!SE.isSCEVable(Ty))
-      return false;
+      return std::nullopt;
 
     auto *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(V));
     if (!AR || AR->getLoop() != TheLoop)
-      return false;
+      return std::nullopt;
 
     const SCEV *Step = AR->getStepRecurrence(SE);
     if (!SE.isKnownPositive(Step))
-      return false;
+      return std::nullopt;
 
-    const ConstantRange IVRange = SE.getSignedRange(AR);
+    const ConstantRange SignedIVRange = SE.getSignedRange(AR);
+    const ConstantRange UnsignedIVRange = SE.getUnsignedRange(AR);
     unsigned NumBits = Ty->getIntegerBitWidth();
     // Keep the minimum value of the recurrence type as the sentinel value.
     // The maximum acceptable range for the increasing induction variable,
     // called the valid range, will be defined as
     //   [<sentinel value> + 1, <sentinel value>)
-    // where <sentinel value> is SignedMin(<recurrence type>)
+    // where <sentinel value> is [Signed|Unsigned]Min(<recurrence type>)
     // TODO: This range restriction can be lifted by adding an additional
     // virtual OR reduction.
-    const APInt Sentinel = APInt::getSignedMinValue(NumBits);
-    const ConstantRange ValidRange =
-        ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
-    LLVM_DEBUG(dbgs() << "LV: FindLastIV valid range is " << ValidRange
-                      << ", and the signed range of " << *AR << " is "
-                      << IVRange << "\n");
+    const APInt SignedSentinel = APInt::getSignedMinValue(NumBits);
+    const APInt UnsignedSentinel = APInt::getMinValue(NumBits);
+    const ConstantRange ValidSignedRange =
+        ConstantRange::getNonEmpty(SignedSentinel + 1, SignedSentinel);
+    const ConstantRange ValidUnsignedRange =
+        ConstantRange::getNonEmpty(UnsignedSentinel + 1, UnsignedSentinel);
+    LLVM_DEBUG(dbgs() << "LV: FindLastIV valid signed range is "
+                      << ValidSignedRange << ", valid unsigned range is "
+                      << ValidUnsignedRange << ", " << *AR
+                      << " signed range is " << SignedIVRange
+                      << ", and unsigned range is " << UnsignedIVRange << "\n");
     // Ensure the induction variable does not wrap around by verifying that its
     // range is fully contained within the valid range.
-    return ValidRange.contains(IVRange);
+    if (ValidSignedRange.contains(SignedIVRange))
+      return true;
+    if (ValidUnsignedRange.contains(UnsignedIVRange))
+      return false;
+    return std::nullopt;
   };
 
   // We are looking for selects of the form:
   //   select(cmp(), phi, increasing_loop_induction) or
   //   select(cmp(), increasing_loop_induction, phi)
   // TODO: Support for monotonically decreasing induction variable
-  if (!IsIncreasingLoopInduction(NonRdxPhi))
-    return InstDesc(false, I);
+  if (auto IsSigned = GetInductionSignedness(NonRdxPhi))
+    return InstDesc(I, RecurKind::FindLastIV, *IsSigned);
 
-  return InstDesc(I, RecurKind::FindLastIV);
+  return InstDesc(false, I);
 }
 
 RecurrenceDescriptor::InstDesc
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 05b5764ffcafc..e5c13f7ddfb68 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9535,7 +9535,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       auto *Trunc =
           new VPWidenCastRecipe(Instruction::Trunc, NewExitingVPV, RdxTy);
       auto *Extnd =
-          RdxDesc.isSigned()
+          RdxDesc.isResultSigned()
               ? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy)
               : new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy);
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 18229780bc4a5..9404423c2afcd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -621,8 +621,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
     unsigned UF = getNumOperands() - 2;
     Value *ReducedPartRdx = State.get(getOperand(2));
     for (unsigned Part = 1; Part < UF; ++Part) {
-      ReducedPartRdx = createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx,
-                                      State.get(getOperand(2 + Part)));
+      ReducedPartRdx = createMinMaxOp(
+          Builder, RdxDesc.isReduxSigned() ? RecurKind::SMax : RecurKind::UMax,
+          ReducedPartRdx, State.get(getOperand(2 + Part)));
     }
 
     return createFindLastIVReduction(Builder, ReducedPartRdx,
@@ -696,7 +697,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
       // If the reduction can be performed in a smaller type, we need to extend
       // the reduction to the wider type before we branch to the original loop.
       if (PhiTy != RdxDesc.getRecurrenceType())
-        ReducedPartRdx = RdxDesc.isSigned()
+        ReducedPartRdx = RdxDesc.isResultSigned()
                              ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
                              : Builder.CreateZExt(ReducedPartRdx, PhiTy);
     }
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
index e4597ebfe7dc8..a497ffcb9923a 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4
@@ -843,66 +843,184 @@ exit:                                             ; preds = %for.body, %entry
 ; This test cannot be vectorized, even with a runtime check.
 ; The construct that are introduced by IndVarSimplify is:
 ;   %1 = trunc i64 %iv to i32
-; However, the loop exit condition is a constant that overflows i32:
+; The loop exit condition is a constant that overflows signed i32,
+; but not unsigned i32:
 ;   %exitcond.not = icmp eq i64 %inc, 4294967294
-; Hence, the i32 will most certainly wrap and hit the sentinel value, and we
-; cannot vectorize this case.
-define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) {
-; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
+; Hence, we can vectorize with the unsigned variant of FindLastIV.
+define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) {
+; CHECK-VF4IC1-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range(
 ; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
 ; CHECK-VF4IC1-NEXT:  [[ENTRY:.*]]:
+; CHECK-VF4IC1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-VF4IC1:       [[VECTOR_PH]]:
+; CHECK-VF4IC1-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK-VF4IC1:       [[VECTOR_BODY]]:
+; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 2147483646, i32 2147483647, i32 -2147483648, i32 -2147483647>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = add i64 2147483646, [[INDEX]]
+; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 0
+; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-VF4IC1-NEXT:    [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-VF4IC1-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
+; CHECK-VF4IC1-NEXT:    br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-VF4IC1:       [[MIDDLE_BLOCK]]:
+; CHECK-VF4IC1-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP5]], 0
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP5]], i32 331
+; CHECK-VF4IC1-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-VF4IC1:       [[SCALAR_PH]]:
+; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 4294967294, %[[MIDDLE_BLOCK]] ], [ 2147483646, %[[ENTRY]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
 ; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK-VF4IC1:       [[FOR_BODY]]:
-; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
-; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4IC1-NEXT:    [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV1]]
+; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4
 ; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
-; CHECK-VF4IC1-NEXT:    [[CONV:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-VF4IC1-NEXT:    [[CONV:%.*]] = trunc i64 [[IV1]] to i32
 ; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]]
-; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
+; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV1]], 1
 ; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294
-; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK-VF4IC1:       [[EXIT]]:
-; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-VF4IC1-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
 ;
-; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
+; CHECK-VF4IC4-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range(
 ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) {
 ; CHECK-VF4IC4-NEXT:  [[ENTRY:.*]]:
+; CHECK-VF4IC4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-VF4IC4:       [[VECTOR_PH]]:
+; CHECK-VF4IC4-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK-VF4IC4:       [[VECTOR_BODY]]:
+; CHECK-VF4IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 2147483646, i32 2147483647, i32 -2147483648, i32 -2147483647>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
+; CHECK-VF4IC4-NEXT:    [[OFFSET_IDX:%.*]] = add i64 2147483646, [[INDEX]]
+; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]]
+; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0
+; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4
+; CHECK-VF4IC4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8
+; CHECK-VF4IC4-NEXT:    [[TMP4:%.*]] = getelemen...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants