[SLP] Make getSameOpcode support different instructions if they have same semantics. #112181

HanKuanChen · 2024-10-14T10:48:45Z

No description provided.

llvmbot · 2024-10-14T10:49:21Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-vectorizers

Author: Han-Kuan Chen (HanKuanChen)

Changes

Patch is 41.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112181.diff

14 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+207-29)
(modified) llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll (+4-4)
(modified) llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll (+3-4)
(modified) llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll (+4-4)
(modified) llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll (+1-3)
(modified) llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll (+11-16)
(modified) llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll (+14-13)
(modified) llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll (+1-3)
(modified) llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll (+1-3)
(modified) llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll (+1-3)
(modified) llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll (+12-7)
(modified) llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll (+23-13)
(modified) llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll (+30-32)
(modified) llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll (+1-3)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 401597af35bdac..fdda87e541ca74 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -818,6 +818,105 @@ struct InstructionsState {
 
 } // end anonymous namespace
 
+struct InterchangeableInstruction {
+  unsigned Opcode;
+  SmallVector<Value *> Ops;
+  template <class... ArgTypes>
+  InterchangeableInstruction(unsigned Opcode, ArgTypes &&...Args)
+      : Opcode(Opcode), Ops{std::forward<decltype(Args)>(Args)...} {}
+};
+
+bool operator<(const InterchangeableInstruction &LHS,
+               const InterchangeableInstruction &RHS) {
+  return LHS.Opcode < RHS.Opcode;
+}
+
+/// \returns a list of interchangeable instructions which \p I can be converted
+/// to.
+/// e.g.,
+/// x << y -> x * (2^y)
+/// x << 1 -> x *   2
+/// x << 0 -> x *   1   -> x - 0 -> x + 0 -> x & 11...1 -> x | 0
+///           x *   0                     -> x & 0
+///           x *  -1   -> 0 - x
+/// TODO: support more patterns
+static SmallVector<InterchangeableInstruction, 6>
+getInterchangeableInstruction(Instruction *I) {
+  // PII = Possible Interchangeable Instruction
+  SmallVector<InterchangeableInstruction, 6> PII;
+  unsigned Opcode = I->getOpcode();
+  PII.emplace_back(Opcode, I->operands());
+  if (!is_contained({Instruction::Shl, Instruction::Mul, Instruction::Sub,
+                     Instruction::Add},
+                    Opcode))
+    return PII;
+  Constant *C;
+  if (match(I, m_BinOp(m_Value(), m_Constant(C)))) {
+    ConstantInt *V = nullptr;
+    if (auto *CI = dyn_cast<ConstantInt>(C)) {
+      V = CI;
+    } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
+      if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
+        V = CI;
+    }
+    if (!V)
+      return PII;
+    Value *Op0 = I->getOperand(0);
+    Type *Op1Ty = I->getOperand(1)->getType();
+    const APInt &Op1Int = V->getValue();
+    Constant *Zero =
+        ConstantInt::get(Op1Ty, APInt::getZero(Op1Int.getBitWidth()));
+    Constant *UnsignedMax =
+        ConstantInt::get(Op1Ty, APInt::getMaxValue(Op1Int.getBitWidth()));
+    switch (Opcode) {
+    case Instruction::Shl: {
+      PII.emplace_back(Instruction::Mul, Op0,
+                       ConstantInt::get(Op1Ty, 1 << Op1Int.getZExtValue()));
+      if (Op1Int.isZero()) {
+        PII.emplace_back(Instruction::Sub, Op0, Zero);
+        PII.emplace_back(Instruction::Add, Op0, Zero);
+        PII.emplace_back(Instruction::And, Op0, UnsignedMax);
+        PII.emplace_back(Instruction::Or, Op0, Zero);
+      }
+      break;
+    }
+    case Instruction::Mul: {
+      switch (Op1Int.getSExtValue()) {
+      case 1:
+        PII.emplace_back(Instruction::Sub, Op0, Zero);
+        PII.emplace_back(Instruction::Add, Op0, Zero);
+        PII.emplace_back(Instruction::And, Op0, UnsignedMax);
+        PII.emplace_back(Instruction::Or, Op0, Zero);
+        break;
+      case 0:
+        PII.emplace_back(Instruction::And, Op0, Zero);
+        break;
+      case -1:
+        PII.emplace_back(Instruction::Sub, Zero, Op0);
+        break;
+      }
+      break;
+    }
+    case Instruction::Sub:
+      if (Op1Int.isZero()) {
+        PII.emplace_back(Instruction::Add, Op0, Zero);
+        PII.emplace_back(Instruction::And, Op0, UnsignedMax);
+        PII.emplace_back(Instruction::Or, Op0, Zero);
+      }
+      break;
+    case Instruction::Add:
+      if (Op1Int.isZero()) {
+        PII.emplace_back(Instruction::And, Op0, UnsignedMax);
+        PII.emplace_back(Instruction::Or, Op0, Zero);
+      }
+      break;
+    }
+  }
+  // std::set_intersection requires a sorted range.
+  sort(PII);
+  return PII;
+}
+
 /// \returns true if \p Opcode is allowed as part of the main/alternate
 /// instruction for SLP vectorization.
 ///
@@ -922,18 +1021,54 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
     if (!isTriviallyVectorizable(BaseID) && BaseMappings.empty())
       return InstructionsState(VL[BaseIndex], nullptr, nullptr);
   }
+  // Currently, this is only used for binary ops.
+  // TODO: support all instructions
+  SmallVector<InterchangeableInstruction> InterchangeableOpcode =
+      getInterchangeableInstruction(cast<Instruction>(VL[BaseIndex]));
+  SmallVector<InterchangeableInstruction> AlternateInterchangeableOpcode;
+  auto UpdateInterchangeableOpcode =
+      [](SmallVector<InterchangeableInstruction> &LHS,
+         ArrayRef<InterchangeableInstruction> RHS) {
+        SmallVector<InterchangeableInstruction> NewInterchangeableOpcode;
+        std::set_intersection(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
+                              std::back_inserter(NewInterchangeableOpcode));
+        if (NewInterchangeableOpcode.empty())
+          return false;
+        LHS = std::move(NewInterchangeableOpcode);
+        return true;
+      };
   for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
     auto *I = cast<Instruction>(VL[Cnt]);
     unsigned InstOpcode = I->getOpcode();
     if (IsBinOp && isa<BinaryOperator>(I)) {
-      if (InstOpcode == Opcode || InstOpcode == AltOpcode)
+      SmallVector<InterchangeableInstruction> ThisInterchangeableOpcode(
+          getInterchangeableInstruction(I));
+      if (UpdateInterchangeableOpcode(InterchangeableOpcode,
+                                      ThisInterchangeableOpcode))
         continue;
-      if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
-          isValidForAlternation(Opcode)) {
-        AltOpcode = InstOpcode;
-        AltIndex = Cnt;
+      if (AlternateInterchangeableOpcode.empty()) {
+        InterchangeableOpcode.erase(
+            std::remove_if(InterchangeableOpcode.begin(),
+                           InterchangeableOpcode.end(),
+                           [](const InterchangeableInstruction &I) {
+                             return !isValidForAlternation(I.Opcode);
+                           }),
+            InterchangeableOpcode.end());
+        ThisInterchangeableOpcode.erase(
+            std::remove_if(ThisInterchangeableOpcode.begin(),
+                           ThisInterchangeableOpcode.end(),
+                           [](const InterchangeableInstruction &I) {
+                             return !isValidForAlternation(I.Opcode);
+                           }),
+            ThisInterchangeableOpcode.end());
+        if (InterchangeableOpcode.empty() || ThisInterchangeableOpcode.empty())
+          return InstructionsState(VL[BaseIndex], nullptr, nullptr);
+        AlternateInterchangeableOpcode = std::move(ThisInterchangeableOpcode);
         continue;
       }
+      if (UpdateInterchangeableOpcode(AlternateInterchangeableOpcode,
+                                      ThisInterchangeableOpcode))
+        continue;
     } else if (IsCastOp && isa<CastInst>(I)) {
       Value *Op0 = IBase->getOperand(0);
       Type *Ty0 = Op0->getType();
@@ -1027,6 +1162,22 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
     return InstructionsState(VL[BaseIndex], nullptr, nullptr);
   }
 
+  if (IsBinOp) {
+    auto FindOp =
+        [&](const SmallVector<InterchangeableInstruction> &CandidateOp) {
+          for (Value *V : VL)
+            for (const InterchangeableInstruction &I : CandidateOp)
+              if (cast<Instruction>(V)->getOpcode() == I.Opcode)
+                return cast<Instruction>(V);
+          llvm_unreachable(
+              "Cannot find the candidate instruction for InstructionsState.");
+        };
+    Instruction *MainOp = FindOp(InterchangeableOpcode);
+    Instruction *AltOp = AlternateInterchangeableOpcode.empty()
+                             ? MainOp
+                             : FindOp(AlternateInterchangeableOpcode);
+    return InstructionsState(VL[BaseIndex], MainOp, AltOp);
+  }
   return InstructionsState(VL[BaseIndex], cast<Instruction>(VL[BaseIndex]),
                            cast<Instruction>(VL[AltIndex]));
 }
@@ -2318,24 +2469,41 @@ class BoUpSLP {
                                  : cast<Instruction>(VL[0])->getNumOperands();
       OpsVec.resize(NumOperands);
       unsigned NumLanes = VL.size();
-      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+      InstructionsState S = getSameOpcode(VL, TLI);
+      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
         OpsVec[OpIdx].resize(NumLanes);
-        for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
-          assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
-          // Our tree has just 3 nodes: the root and two operands.
-          // It is therefore trivial to get the APO. We only need to check the
-          // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
-          // RHS operand. The LHS operand of both add and sub is never attached
-          // to an inversese operation in the linearized form, therefore its APO
-          // is false. The RHS is true only if VL[Lane] is an inverse operation.
-
-          // Since operand reordering is performed on groups of commutative
-          // operations or alternating sequences (e.g., +, -), we can safely
-          // tell the inverse operations by checking commutativity.
-          bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
+      for (auto [I, V] : enumerate(VL)) {
+        assert(isa<Instruction>(V) && "Expected instruction");
+        SmallVector<InterchangeableInstruction> IIList =
+            getInterchangeableInstruction(cast<Instruction>(V));
+        Value *SelectedOp;
+        auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+          return II.Opcode == S.MainOp->getOpcode();
+        });
+        if (Iter == IIList.end()) {
+          Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+            return II.Opcode == S.AltOp->getOpcode();
+          });
+          SelectedOp = S.AltOp;
+        } else {
+          SelectedOp = S.MainOp;
+        }
+        assert(Iter != IIList.end() &&
+               "Cannot find an interchangeable instruction.");
+        // Our tree has just 3 nodes: the root and two operands.
+        // It is therefore trivial to get the APO. We only need to check the
+        // opcode of V and whether the operand at OpIdx is the LHS or RHS
+        // operand. The LHS operand of both add and sub is never attached to an
+        // inversese operation in the linearized form, therefore its APO is
+        // false. The RHS is true only if V is an inverse operation.
+
+        // Since operand reordering is performed on groups of commutative
+        // operations or alternating sequences (e.g., +, -), we can safely
+        // tell the inverse operations by checking commutativity.
+        bool IsInverseOperation = !isCommutative(cast<Instruction>(SelectedOp));
+        for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
           bool APO = (OpIdx == 0) ? false : IsInverseOperation;
-          OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
-                                 APO, false};
+          OpsVec[OpIdx][I] = {Iter->Ops[OpIdx], APO, false};
         }
       }
     }
@@ -3227,15 +3395,25 @@ class BoUpSLP {
       auto *I0 = cast<Instruction>(Scalars[0]);
       Operands.resize(I0->getNumOperands());
       unsigned NumLanes = Scalars.size();
-      for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
-           OpIdx != NumOperands; ++OpIdx) {
+      unsigned NumOperands = I0->getNumOperands();
+      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
         Operands[OpIdx].resize(NumLanes);
-        for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
-          auto *I = cast<Instruction>(Scalars[Lane]);
-          assert(I->getNumOperands() == NumOperands &&
-                 "Expected same number of operands");
-          Operands[OpIdx][Lane] = I->getOperand(OpIdx);
-        }
+      for (auto [I, V] : enumerate(Scalars)) {
+        SmallVector<InterchangeableInstruction> IIList =
+            getInterchangeableInstruction(cast<Instruction>(V));
+        auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+          return II.Opcode == MainOp->getOpcode();
+        });
+        if (Iter == IIList.end())
+          Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+            return II.Opcode == AltOp->getOpcode();
+          });
+        assert(Iter != IIList.end() &&
+               "Cannot find an interchangeable instruction.");
+        assert(Iter->Ops.size() == NumOperands &&
+               "Expected same number of operands");
+        for (auto [J, Op] : enumerate(Iter->Ops))
+          Operands[J][I] = Op;
       }
     }
 
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
index c18811a35c1eeb..c7c999bb572851 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
@@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
 ;
 ; POW2-ONLY-LABEL: @store_try_reorder(
 ; POW2-ONLY-NEXT:  entry:
-; POW2-ONLY-NEXT:    [[ADD:%.*]] = add i32 0, 0
-; POW2-ONLY-NEXT:    store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; POW2-ONLY-NEXT:    [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT:    [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT:    [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
+; POW2-ONLY-NEXT:    store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
 ; POW2-ONLY-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
index 3fa42047162e45..7bc03e7c7755b4 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
@@ -7,13 +7,12 @@ define void @test(ptr %a, i64 %0) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
 ; CHECK-NEXT:    br label %[[BB:.*]]
 ; CHECK:       [[BB]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
-; CHECK-NEXT:    [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
+; CHECK-NEXT:    [[ARRAYIDX17_I28_1:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> poison)
 ; CHECK-NEXT:    [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
index 308d0e27f1ea89..e158c2a3ed87ea 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
@@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
 ;
 ; POW2-ONLY-LABEL: @store_try_reorder(
 ; POW2-ONLY-NEXT:  entry:
-; POW2-ONLY-NEXT:    [[ADD:%.*]] = add i32 0, 0
-; POW2-ONLY-NEXT:    store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; POW2-ONLY-NEXT:    [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
+; POW2-ONLY-NEXT:    [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT:    [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
+; POW2-ONLY-NEXT:    store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
 ; POW2-ONLY-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
index d388fd17925a16..d2e70f05204d79 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -10,9 +10,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @bar()
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 9, i32 9, i32 9, i32 9>
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret i32 undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
index 889f5a95c81d69..7af0c64f187480 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll
@@ -4,22 +4,17 @@
 define void @test(ptr %0, ptr %1, ptr %2) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 4
-; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP11:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP8]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]]
-; CHECK-NEXT:    [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
-; CHECK-NEXT:    [[TMP14:%.*]] = add <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 2, i32 0, i32 1, i32 7>
-; CHECK-NEXT:    [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT:    [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer
-; CHECK-NEXT:    [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer
-; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = sub <4 x i32> [[TMP8]], [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[TMP5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = add <4 x i32> <i32 0, i32 0, i32 1, i32 0>, [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = add <4 x i32> [[TMP11]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = add <4 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x ...
[truncated]

alexey-bataev · 2024-10-14T20:39:30Z

llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll

-; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>


Shall we drop nsw here? Please double check

See def0fc1.
But I wonder why we pass VL0 instead of nullptr to propagateIRFlags. Even if we don't enable interchangeable instruction, pass nullptr should be enough.

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

github-actions · 2024-10-17T04:07:23Z

✅ With the latest revision this PR passed the C/C++ code formatter.

same semantics.

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

alexey-bataev · 2024-10-21T10:32:30Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -14935,7 +15108,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
      Value *V = Builder.CreateBinOp(
          static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
          RHS);
-      propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
+      propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end());


Should it always be nullptr or are there cases where we can keep it?

Actually I don't know why we pass VL0 here. Only alternate operation should pass non nullptr value.

It should pass VL0 here, but need to check if all opcodes are originally mathed and if not, then pass fourth argument /*IncludeWrapFlags=*/false

Before the PR, there is no difference between VL0 and nullptr. The opcode must be the same for all VL.
After the PR, VL0 cannot be used because opcode may be different.
I don't know why VL0 is used in the beginning since pass VL0 and nullptr will get the same result for propagateIRFlags.

I think this part still requires extra work. If the opcode of the instruction does not match the opcode of intersection, its flags are ignored. This is not correct and must be fixed

This is used to fix

- ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10> - ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10> - ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> + ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>

VL0 is mul here. If we pass VL0, then eventually nsw will be passed.
However, shl does not contain nsw. We should pass nullptr here to get the correct result.

Say, you have something like {mul nsw, shl, mul nsw, mul nsw}. For this case you will still emit mul nsw <4 x >, b ecause shl will be ignored

Yes. But actually shl does not have nsw. We should emit mul <4 x >.

Yes, it what is expected. But I assume that instead currently it will emit mul nsw <4 x >

alexey-bataev · 2024-10-21T20:10:27Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+        if (NewInterchangeableOpcode.empty())
+          return false;
+        LHS.swap(NewInterchangeableOpcode);
+        return true;


Suggested change

if (NewInterchangeableOpcode.empty())

return false;

LHS.swap(NewInterchangeableOpcode);

return true;

LHS.swap(NewInterchangeableOpcode);

return !LHS.empty();

We do not want LHS be empty if NewInterchangeableOpcode is empty.

alexey-bataev · 2024-10-21T20:19:30Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -2335,24 +2479,41 @@ class BoUpSLP {
                                 : cast<Instruction>(VL[0])->getNumOperands();
      OpsVec.resize(NumOperands);
      unsigned NumLanes = VL.size();
-      for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+      InstructionsState S = getSameOpcode(VL, TLI);


Suggest to pass it to VLOperands constructor instead and use it instead of regenerating.

Should we only use VLOperands? Right now the code is like this

// Sort operands of the instructions so that each side is more likely to // have the same opcode. if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) { ValueList Left, Right; reorderInputsAccordingToOpcode(VL, Left, Right, *this); TE->setOperand(0, Left); TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); return; } TE->setOperandsInOrder(); for (unsigned I : seq<unsigned>(0, VL0->getNumOperands())) buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});

We need to implement the logics in VLOperands and Tree::setOperandsInOrder. It will be simpler if we use VLOperands only.

You can try. But keep in mind, these elements should not be reordered at all. Otherwise, it may affect compile time.

I will do this in another PR.

see ee74f11

alexey-bataev · 2024-10-21T20:20:40Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+        auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+          return II.Opcode == S.MainOp->getOpcode();
+        });
+        if (Iter == IIList.end()) {
+          Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+            return II.Opcode == S.AltOp->getOpcode();
+          });
+          SelectedOp = S.AltOp;
+        } else {
+          SelectedOp = S.MainOp;
+        }


Better to allow to specify particual opcode as operand of getInterchangeableInstruction to allow early filtering + simplify the code

Any examples? Cannot image how to modify getInterchangeableInstruction.

alexey-bataev · 2024-10-21T20:21:18Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+            getInterchangeableInstruction(cast<Instruction>(V));
+        auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+          return II.Opcode == MainOp->getOpcode();
+        });
+        if (Iter == IIList.end())
+          Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
+            return II.Opcode == AltOp->getOpcode();
+          });
+        assert(Iter != IIList.end() &&
+               "Cannot find an interchangeable instruction.");
+        assert(Iter->Ops.size() == NumOperands &&
+               "Expected same number of operands");


Same, try to add extra operand to getInterchangeableInstruction to allow filtering inside

alexey-bataev · 2024-10-21T20:23:05Z

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -14935,7 +15108,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
      Value *V = Builder.CreateBinOp(
          static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
          RHS);
-      propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
+      propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end());


It should pass VL0 here, but need to check if all opcodes are originally mathed and if not, then pass fourth argument /*IncludeWrapFlags=*/false

…-opcode

MainOp and AltOp.

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

github-actions · 2024-12-12T05:12:11Z

✅ With the latest revision this PR passed the undef deprecator.

llvm-ci · 2024-12-13T04:11:41Z

LLVM Buildbot has detected a new failure on builder clang-hip-vega20 running on hip-vega20-0 while building llvm at step 3 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/11175

Here is the relevant piece of the build log for the reference

Step 3 (annotate) failure: '../llvm-zorg/zorg/buildbot/builders/annotated/hip-build.sh --jobs=' (failure)
...
[28/38] : && /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang -O3 -DNDEBUG  tools/CMakeFiles/fpcmp-target.dir/fpcmp.c.o -o tools/fpcmp-target   && :
[29/38] /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -MD -MT External/HIP/CMakeFiles/empty-hip-6.0.2.dir/empty.hip.o -MF External/HIP/CMakeFiles/empty-hip-6.0.2.dir/empty.hip.o.d -o External/HIP/CMakeFiles/empty-hip-6.0.2.dir/empty.hip.o -c /buildbot/llvm-test-suite/External/HIP/empty.hip
[30/38] : && /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -O3 -DNDEBUG  External/HIP/CMakeFiles/empty-hip-6.0.2.dir/empty.hip.o -o External/HIP/empty-hip-6.0.2  --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --hip-link -rtlib=compiler-rt -unwindlib=libgcc -frtlib-add-rpath && cd /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP && /usr/local/bin/cmake -E create_symlink /buildbot/llvm-test-suite/External/HIP/empty.reference_output /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP/empty.reference_output-hip-6.0.2
[31/38] /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -fopenmp -MD -MT External/HIP/CMakeFiles/with-fopenmp-hip-6.0.2.dir/with-fopenmp.hip.o -MF External/HIP/CMakeFiles/with-fopenmp-hip-6.0.2.dir/with-fopenmp.hip.o.d -o External/HIP/CMakeFiles/with-fopenmp-hip-6.0.2.dir/with-fopenmp.hip.o -c /buildbot/llvm-test-suite/External/HIP/with-fopenmp.hip
[32/38] /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -MD -MT External/HIP/CMakeFiles/saxpy-hip-6.0.2.dir/saxpy.hip.o -MF External/HIP/CMakeFiles/saxpy-hip-6.0.2.dir/saxpy.hip.o.d -o External/HIP/CMakeFiles/saxpy-hip-6.0.2.dir/saxpy.hip.o -c /buildbot/llvm-test-suite/External/HIP/saxpy.hip
[33/38] : && /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -O3 -DNDEBUG  External/HIP/CMakeFiles/with-fopenmp-hip-6.0.2.dir/with-fopenmp.hip.o -o External/HIP/with-fopenmp-hip-6.0.2  --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --hip-link -rtlib=compiler-rt -unwindlib=libgcc -frtlib-add-rpath && cd /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP && /usr/local/bin/cmake -E create_symlink /buildbot/llvm-test-suite/External/HIP/with-fopenmp.reference_output /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP/with-fopenmp.reference_output-hip-6.0.2
[34/38] : && /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -O3 -DNDEBUG  External/HIP/CMakeFiles/saxpy-hip-6.0.2.dir/saxpy.hip.o -o External/HIP/saxpy-hip-6.0.2  --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --hip-link -rtlib=compiler-rt -unwindlib=libgcc -frtlib-add-rpath && cd /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP && /usr/local/bin/cmake -E create_symlink /buildbot/llvm-test-suite/External/HIP/saxpy.reference_output /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP/saxpy.reference_output-hip-6.0.2
[35/38] /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -MD -MT External/HIP/CMakeFiles/memmove-hip-6.0.2.dir/memmove.hip.o -MF External/HIP/CMakeFiles/memmove-hip-6.0.2.dir/memmove.hip.o.d -o External/HIP/CMakeFiles/memmove-hip-6.0.2.dir/memmove.hip.o -c /buildbot/llvm-test-suite/External/HIP/memmove.hip
[36/38] : && /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -O3 -DNDEBUG  External/HIP/CMakeFiles/memmove-hip-6.0.2.dir/memmove.hip.o -o External/HIP/memmove-hip-6.0.2  --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --hip-link -rtlib=compiler-rt -unwindlib=libgcc -frtlib-add-rpath && cd /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP && /usr/local/bin/cmake -E create_symlink /buildbot/llvm-test-suite/External/HIP/memmove.reference_output /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP/memmove.reference_output-hip-6.0.2
[37/38] /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -xhip -mfma -MD -MT External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -MF External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o.d -o External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -c /buildbot/llvm-test-suite/External/HIP/workload/ray-tracing/TheNextWeek/main.cc
FAILED: External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o 
/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -xhip -mfma -MD -MT External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -MF External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o.d -o External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -c /buildbot/llvm-test-suite/External/HIP/workload/ray-tracing/TheNextWeek/main.cc
clang-19: /buildbot/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:11868: llvm::slpvectorizer::BoUpSLP::getEntryCost(const llvm::slpvectorizer::BoUpSLP::TreeEntry*, llvm::ArrayRef<llvm::Value*>, llvm::SmallPtrSetImpl<llvm::Value*>&)::<lambda(unsigned int)>: Assertion `E->isOpcodeOrAlt(VI) && "Unexpected main/alternate opcode"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.	Program arguments: /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19 -cc1 -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-obj -disable-free -clear-ast-before-backend -main-file-name main.cc -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +fma -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build -fcoverage-compilation-dir=/buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build -resource-dir /buildbot/hip-vega20-0/clang-hip-vega20/llvm/lib/clang/20 -dependency-file External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o.d -MT External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -sys-header-deps -internal-isystem /buildbot/hip-vega20-0/clang-hip-vega20/llvm/lib/clang/20/include/cuda_wrappers -idirafter /buildbot/Externals/hip/rocm-6.0.2/include -include __clang_hip_runtime_wrapper.h -D NDEBUG -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/backward -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/backward -internal-isystem /buildbot/hip-vega20-0/clang-hip-vega20/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /buildbot/hip-vega20-0/clang-hip-vega20/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Werror=date-time -w -fdeprecated-macro -ferror-limit 19 -fhip-new-launch-api -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -vectorize-loops -vectorize-slp -fcuda-include-gpubinary /tmp/main-499139.hipfb -cuid=9c43066b7056eb66 -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -x hip /buildbot/llvm-test-suite/External/HIP/workload/ray-tracing/TheNextWeek/main.cc
1.	<eof> parser at end of file
2.	Optimizer
3.	Running pass "function<eager-inv>(float2int,lower-constant-intrinsics,chr,loop(loop-rotate<header-duplication;no-prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,slp-vectorizer,vector-combine,instcombine<max-iterations=1;no-verify-fixpoint>,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>)" on module "/buildbot/llvm-test-suite/External/HIP/workload/ray-tracing/TheNextWeek/main.cc"
4.	Running pass "slp-vectorizer" on function "_ZL16stbi__idct_blockPhiPs"
 #0 0x000055bf6c9be8d0 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x29498d0)
 #1 0x000055bf6c9bbcef llvm::sys::RunSignalHandlers() (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x2946cef)
 #2 0x000055bf6c9bbe45 SignalHandler(int) Signals.cpp:0:0
 #3 0x00007f59121f5520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x00007f59122499fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007f59122499fc __pthread_kill_internal ./nptl/pthread_kill.c:78:10
 #6 0x00007f59122499fc pthread_kill ./nptl/pthread_kill.c:89:10
 #7 0x00007f59121f5476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #8 0x00007f59121db7f3 abort ./stdlib/abort.c:81:7
 #9 0x00007f59121db71b _nl_load_domain ./intl/loadmsgcat.c:1177:9
#10 0x00007f59121ece96 (/lib/x86_64-linux-gnu/libc.so.6+0x39e96)
#11 0x000055bf6e406dec llvm::InstructionCost llvm::function_ref<llvm::InstructionCost (unsigned int)>::callback_fn<llvm::slpvectorizer::BoUpSLP::getEntryCost(llvm::slpvectorizer::BoUpSLP::TreeEntry const*, llvm::ArrayRef<llvm::Value*>, llvm::SmallPtrSetImpl<llvm::Value*>&)::'lambda21'(unsigned int)>(long, unsigned int) SLPVectorizer.cpp:0:0
#12 0x000055bf6e461749 llvm::slpvectorizer::BoUpSLP::getEntryCost(llvm::slpvectorizer::BoUpSLP::TreeEntry const*, llvm::ArrayRef<llvm::Value*>, llvm::SmallPtrSetImpl<llvm::Value*>&)::'lambda0'(llvm::function_ref<llvm::InstructionCost (unsigned int)>, llvm::function_ref<llvm::InstructionCost (llvm::InstructionCost)>)::operator()(llvm::function_ref<llvm::InstructionCost (unsigned int)>, llvm::function_ref<llvm::InstructionCost (llvm::InstructionCost)>) const SLPVectorizer.cpp:0:0
#13 0x000055bf6e4b34b4 llvm::slpvectorizer::BoUpSLP::getEntryCost(llvm::slpvectorizer::BoUpSLP::TreeEntry const*, llvm::ArrayRef<llvm::Value*>, llvm::SmallPtrSetImpl<llvm::Value*>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x443e4b4)
#14 0x000055bf6e4cabe2 llvm::slpvectorizer::BoUpSLP::getTreeCost(llvm::ArrayRef<llvm::Value*>) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x4455be2)
#15 0x000055bf6e4dd0bc llvm::SLPVectorizerPass::vectorizeStoreChain(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP&, unsigned int, unsigned int, unsigned int&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x44680bc)
#16 0x000055bf6e4de984 llvm::SLPVectorizerPass::vectorizeStores(llvm::ArrayRef<llvm::StoreInst*>, llvm::slpvectorizer::BoUpSLP&, llvm::DenseSet<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, llvm::DenseMapInfo<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, void>>&)::'lambda'(std::set<std::pair<unsigned int, int>, llvm::SLPVectorizerPass::vectorizeStores(llvm::ArrayRef<llvm::StoreInst*>, llvm::slpvectorizer::BoUpSLP&, llvm::DenseSet<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, llvm::DenseMapInfo<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, void>>&)::StoreDistCompare, std::allocator<std::pair<unsigned int, int>>> const&)::operator()(std::set<std::pair<unsigned int, int>, llvm::SLPVectorizerPass::vectorizeStores(llvm::ArrayRef<llvm::StoreInst*>, llvm::slpvectorizer::BoUpSLP&, llvm::DenseSet<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, llvm::DenseMapInfo<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, void>>&)::StoreDistCompare, std::allocator<std::pair<unsigned int, int>>> const&) const SLPVectorizer.cpp:0:0
#17 0x000055bf6e4e0158 llvm::SLPVectorizerPass::vectorizeStores(llvm::ArrayRef<llvm::StoreInst*>, llvm::slpvectorizer::BoUpSLP&, llvm::DenseSet<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, llvm::DenseMapInfo<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, void>>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x446b158)
#18 0x000055bf6e4e11c9 llvm::SLPVectorizerPass::vectorizeStoreChains(llvm::slpvectorizer::BoUpSLP&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x446c1c9)
#19 0x000055bf6e4e2725 llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) (.part.0) SLPVectorizer.cpp:0:0
#20 0x000055bf6e4e32f1 llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x446e2f1)
#21 0x000055bf6deffbb6 llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x3e8abb6)
#22 0x000055bf6c34210f llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x22cd10f)
#23 0x000055bf6ad16286 llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0xca1286)
#24 0x000055bf6c34046b llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x22cb46b)
#25 0x000055bf6ad14b96 llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0xc9fb96)
#26 0x000055bf6c34106d llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x22cc06d)
#27 0x000055bf6cc55023 (anonymous namespace)::EmitAssemblyHelper::RunOptimizationPipeline(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>&, std::unique_ptr<llvm::ToolOutputFile, std::default_delete<llvm::ToolOutputFile>>&, clang::BackendConsumer*) BackendUtil.cpp:0:0
#28 0x000055bf6cc5899c clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x2be399c)
Step 11 (Building HIP test-suite) failure: Building HIP test-suite (failure)
...
[28/38] : && /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang -O3 -DNDEBUG  tools/CMakeFiles/fpcmp-target.dir/fpcmp.c.o -o tools/fpcmp-target   && :
[29/38] /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -MD -MT External/HIP/CMakeFiles/empty-hip-6.0.2.dir/empty.hip.o -MF External/HIP/CMakeFiles/empty-hip-6.0.2.dir/empty.hip.o.d -o External/HIP/CMakeFiles/empty-hip-6.0.2.dir/empty.hip.o -c /buildbot/llvm-test-suite/External/HIP/empty.hip
[30/38] : && /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -O3 -DNDEBUG  External/HIP/CMakeFiles/empty-hip-6.0.2.dir/empty.hip.o -o External/HIP/empty-hip-6.0.2  --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --hip-link -rtlib=compiler-rt -unwindlib=libgcc -frtlib-add-rpath && cd /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP && /usr/local/bin/cmake -E create_symlink /buildbot/llvm-test-suite/External/HIP/empty.reference_output /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP/empty.reference_output-hip-6.0.2
[31/38] /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -fopenmp -MD -MT External/HIP/CMakeFiles/with-fopenmp-hip-6.0.2.dir/with-fopenmp.hip.o -MF External/HIP/CMakeFiles/with-fopenmp-hip-6.0.2.dir/with-fopenmp.hip.o.d -o External/HIP/CMakeFiles/with-fopenmp-hip-6.0.2.dir/with-fopenmp.hip.o -c /buildbot/llvm-test-suite/External/HIP/with-fopenmp.hip
[32/38] /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -MD -MT External/HIP/CMakeFiles/saxpy-hip-6.0.2.dir/saxpy.hip.o -MF External/HIP/CMakeFiles/saxpy-hip-6.0.2.dir/saxpy.hip.o.d -o External/HIP/CMakeFiles/saxpy-hip-6.0.2.dir/saxpy.hip.o -c /buildbot/llvm-test-suite/External/HIP/saxpy.hip
[33/38] : && /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -O3 -DNDEBUG  External/HIP/CMakeFiles/with-fopenmp-hip-6.0.2.dir/with-fopenmp.hip.o -o External/HIP/with-fopenmp-hip-6.0.2  --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --hip-link -rtlib=compiler-rt -unwindlib=libgcc -frtlib-add-rpath && cd /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP && /usr/local/bin/cmake -E create_symlink /buildbot/llvm-test-suite/External/HIP/with-fopenmp.reference_output /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP/with-fopenmp.reference_output-hip-6.0.2
[34/38] : && /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -O3 -DNDEBUG  External/HIP/CMakeFiles/saxpy-hip-6.0.2.dir/saxpy.hip.o -o External/HIP/saxpy-hip-6.0.2  --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --hip-link -rtlib=compiler-rt -unwindlib=libgcc -frtlib-add-rpath && cd /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP && /usr/local/bin/cmake -E create_symlink /buildbot/llvm-test-suite/External/HIP/saxpy.reference_output /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP/saxpy.reference_output-hip-6.0.2
[35/38] /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -MD -MT External/HIP/CMakeFiles/memmove-hip-6.0.2.dir/memmove.hip.o -MF External/HIP/CMakeFiles/memmove-hip-6.0.2.dir/memmove.hip.o.d -o External/HIP/CMakeFiles/memmove-hip-6.0.2.dir/memmove.hip.o -c /buildbot/llvm-test-suite/External/HIP/memmove.hip
[36/38] : && /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -O3 -DNDEBUG  External/HIP/CMakeFiles/memmove-hip-6.0.2.dir/memmove.hip.o -o External/HIP/memmove-hip-6.0.2  --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --hip-link -rtlib=compiler-rt -unwindlib=libgcc -frtlib-add-rpath && cd /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP && /usr/local/bin/cmake -E create_symlink /buildbot/llvm-test-suite/External/HIP/memmove.reference_output /buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build/External/HIP/memmove.reference_output-hip-6.0.2
[37/38] /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -xhip -mfma -MD -MT External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -MF External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o.d -o External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -c /buildbot/llvm-test-suite/External/HIP/workload/ray-tracing/TheNextWeek/main.cc
FAILED: External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o 
/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang++ -DNDEBUG  -O3 -DNDEBUG   -w -Werror=date-time --rocm-path=/buildbot/Externals/hip/rocm-6.0.2 --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx1030 --offload-arch=gfx1100 -xhip -mfma -MD -MT External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -MF External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o.d -o External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -c /buildbot/llvm-test-suite/External/HIP/workload/ray-tracing/TheNextWeek/main.cc
clang-19: /buildbot/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp:11868: llvm::slpvectorizer::BoUpSLP::getEntryCost(const llvm::slpvectorizer::BoUpSLP::TreeEntry*, llvm::ArrayRef<llvm::Value*>, llvm::SmallPtrSetImpl<llvm::Value*>&)::<lambda(unsigned int)>: Assertion `E->isOpcodeOrAlt(VI) && "Unexpected main/alternate opcode"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.	Program arguments: /buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19 -cc1 -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-obj -disable-free -clear-ast-before-backend -main-file-name main.cc -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +fma -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build -fcoverage-compilation-dir=/buildbot/hip-vega20-0/clang-hip-vega20/test-suite-build -resource-dir /buildbot/hip-vega20-0/clang-hip-vega20/llvm/lib/clang/20 -dependency-file External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o.d -MT External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -sys-header-deps -internal-isystem /buildbot/hip-vega20-0/clang-hip-vega20/llvm/lib/clang/20/include/cuda_wrappers -idirafter /buildbot/Externals/hip/rocm-6.0.2/include -include __clang_hip_runtime_wrapper.h -D NDEBUG -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/backward -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/backward -internal-isystem /buildbot/hip-vega20-0/clang-hip-vega20/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -internal-isystem /buildbot/hip-vega20-0/clang-hip-vega20/llvm/lib/clang/20/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Werror=date-time -w -fdeprecated-macro -ferror-limit 19 -fhip-new-launch-api -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -fcxx-exceptions -fexceptions -vectorize-loops -vectorize-slp -fcuda-include-gpubinary /tmp/main-499139.hipfb -cuid=9c43066b7056eb66 -fcuda-allow-variadic-functions -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o External/HIP/CMakeFiles/TheNextWeek-hip-6.0.2.dir/workload/ray-tracing/TheNextWeek/main.cc.o -x hip /buildbot/llvm-test-suite/External/HIP/workload/ray-tracing/TheNextWeek/main.cc
1.	<eof> parser at end of file
2.	Optimizer
3.	Running pass "function<eager-inv>(float2int,lower-constant-intrinsics,chr,loop(loop-rotate<header-duplication;no-prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,slp-vectorizer,vector-combine,instcombine<max-iterations=1;no-verify-fixpoint>,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>)" on module "/buildbot/llvm-test-suite/External/HIP/workload/ray-tracing/TheNextWeek/main.cc"
4.	Running pass "slp-vectorizer" on function "_ZL16stbi__idct_blockPhiPs"
 #0 0x000055bf6c9be8d0 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x29498d0)
 #1 0x000055bf6c9bbcef llvm::sys::RunSignalHandlers() (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x2946cef)
 #2 0x000055bf6c9bbe45 SignalHandler(int) Signals.cpp:0:0
 #3 0x00007f59121f5520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x00007f59122499fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007f59122499fc __pthread_kill_internal ./nptl/pthread_kill.c:78:10
 #6 0x00007f59122499fc pthread_kill ./nptl/pthread_kill.c:89:10
 #7 0x00007f59121f5476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #8 0x00007f59121db7f3 abort ./stdlib/abort.c:81:7
 #9 0x00007f59121db71b _nl_load_domain ./intl/loadmsgcat.c:1177:9
#10 0x00007f59121ece96 (/lib/x86_64-linux-gnu/libc.so.6+0x39e96)
#11 0x000055bf6e406dec llvm::InstructionCost llvm::function_ref<llvm::InstructionCost (unsigned int)>::callback_fn<llvm::slpvectorizer::BoUpSLP::getEntryCost(llvm::slpvectorizer::BoUpSLP::TreeEntry const*, llvm::ArrayRef<llvm::Value*>, llvm::SmallPtrSetImpl<llvm::Value*>&)::'lambda21'(unsigned int)>(long, unsigned int) SLPVectorizer.cpp:0:0
#12 0x000055bf6e461749 llvm::slpvectorizer::BoUpSLP::getEntryCost(llvm::slpvectorizer::BoUpSLP::TreeEntry const*, llvm::ArrayRef<llvm::Value*>, llvm::SmallPtrSetImpl<llvm::Value*>&)::'lambda0'(llvm::function_ref<llvm::InstructionCost (unsigned int)>, llvm::function_ref<llvm::InstructionCost (llvm::InstructionCost)>)::operator()(llvm::function_ref<llvm::InstructionCost (unsigned int)>, llvm::function_ref<llvm::InstructionCost (llvm::InstructionCost)>) const SLPVectorizer.cpp:0:0
#13 0x000055bf6e4b34b4 llvm::slpvectorizer::BoUpSLP::getEntryCost(llvm::slpvectorizer::BoUpSLP::TreeEntry const*, llvm::ArrayRef<llvm::Value*>, llvm::SmallPtrSetImpl<llvm::Value*>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x443e4b4)
#14 0x000055bf6e4cabe2 llvm::slpvectorizer::BoUpSLP::getTreeCost(llvm::ArrayRef<llvm::Value*>) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x4455be2)
#15 0x000055bf6e4dd0bc llvm::SLPVectorizerPass::vectorizeStoreChain(llvm::ArrayRef<llvm::Value*>, llvm::slpvectorizer::BoUpSLP&, unsigned int, unsigned int, unsigned int&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x44680bc)
#16 0x000055bf6e4de984 llvm::SLPVectorizerPass::vectorizeStores(llvm::ArrayRef<llvm::StoreInst*>, llvm::slpvectorizer::BoUpSLP&, llvm::DenseSet<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, llvm::DenseMapInfo<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, void>>&)::'lambda'(std::set<std::pair<unsigned int, int>, llvm::SLPVectorizerPass::vectorizeStores(llvm::ArrayRef<llvm::StoreInst*>, llvm::slpvectorizer::BoUpSLP&, llvm::DenseSet<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, llvm::DenseMapInfo<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, void>>&)::StoreDistCompare, std::allocator<std::pair<unsigned int, int>>> const&)::operator()(std::set<std::pair<unsigned int, int>, llvm::SLPVectorizerPass::vectorizeStores(llvm::ArrayRef<llvm::StoreInst*>, llvm::slpvectorizer::BoUpSLP&, llvm::DenseSet<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, llvm::DenseMapInfo<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, void>>&)::StoreDistCompare, std::allocator<std::pair<unsigned int, int>>> const&) const SLPVectorizer.cpp:0:0
#17 0x000055bf6e4e0158 llvm::SLPVectorizerPass::vectorizeStores(llvm::ArrayRef<llvm::StoreInst*>, llvm::slpvectorizer::BoUpSLP&, llvm::DenseSet<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, llvm::DenseMapInfo<std::tuple<llvm::Value*, llvm::Value*, llvm::Value*, llvm::Value*, unsigned int>, void>>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x446b158)
#18 0x000055bf6e4e11c9 llvm::SLPVectorizerPass::vectorizeStoreChains(llvm::slpvectorizer::BoUpSLP&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x446c1c9)
#19 0x000055bf6e4e2725 llvm::SLPVectorizerPass::runImpl(llvm::Function&, llvm::ScalarEvolution*, llvm::TargetTransformInfo*, llvm::TargetLibraryInfo*, llvm::AAResults*, llvm::LoopInfo*, llvm::DominatorTree*, llvm::AssumptionCache*, llvm::DemandedBits*, llvm::OptimizationRemarkEmitter*) (.part.0) SLPVectorizer.cpp:0:0
#20 0x000055bf6e4e32f1 llvm::SLPVectorizerPass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x446e2f1)
#21 0x000055bf6deffbb6 llvm::detail::PassModel<llvm::Function, llvm::SLPVectorizerPass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x3e8abb6)
#22 0x000055bf6c34210f llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x22cd10f)
#23 0x000055bf6ad16286 llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0xca1286)
#24 0x000055bf6c34046b llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x22cb46b)
#25 0x000055bf6ad14b96 llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0xc9fb96)
#26 0x000055bf6c34106d llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x22cc06d)
#27 0x000055bf6cc55023 (anonymous namespace)::EmitAssemblyHelper::RunOptimizationPipeline(clang::BackendAction, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>&, std::unique_ptr<llvm::ToolOutputFile, std::default_delete<llvm::ToolOutputFile>>&, clang::BackendConsumer*) BackendUtil.cpp:0:0
#28 0x000055bf6cc5899c clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) (/buildbot/hip-vega20-0/clang-hip-vega20/llvm/bin/clang-19+0x2be399c)

llvm-ci · 2024-12-13T04:21:05Z

LLVM Buildbot has detected a new failure on builder clang-cmake-x86_64-avx512-linux running on avx512-intel64 while building llvm at step 13 "test-suite".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/133/builds/8338

Here is the relevant piece of the build log for the reference

Step 13 (test-suite) failure: test (failure)
...
size..interp: 28 
size..note.ABI-tag: 32 
size..note.gnu.property: 32 
size..plt: 48 
size..rela.dyn: 192 
size..rela.plt: 48 
size..rodata: 91 
size..text: 2496 
**********
NOEXE: test-suite :: SingleSource/UnitTests/Vectorizer/gcc-loops.test (2455 of 2519)
******************** TEST 'test-suite :: SingleSource/UnitTests/Vectorizer/gcc-loops.test' FAILED ********************
Executable '/localdisk2/buildbot/llvm-worker/clang-cmake-x86_64-avx512-linux/test/sandbox/build/SingleSource/UnitTests/Vectorizer/gcc-loops' is missing
********************
PASS: test-suite :: SingleSource/UnitTests/Vector/AVX512VL/Vector-AVX512VL-i64gather_64.test (2456 of 2519)
********** TEST 'test-suite :: SingleSource/UnitTests/Vector/AVX512VL/Vector-AVX512VL-i64gather_64.test' RESULTS **********
compile_time: 0.0000 
exec_time: 0.0049 
hash: "9ef5ed4a2785bd740b64ef45c222af1e" 
link_time: 0.0242 
size: 23424 
size..bss: 5111824 
size..comment: 154 
size..data: 4 
size..data.rel.ro: 8 
size..dynamic: 496 
size..dynstr: 153 
size..dynsym: 168 
size..eh_frame: 316 
size..eh_frame_hdr: 84 
size..fini: 13 
size..fini_array: 8 
size..gnu.build.attributes: 5004 
size..gnu.hash: 28 
size..gnu.version: 14 
size..gnu.version_r: 48 
size..got: 40 
size..got.plt: 40 
size..init: 27 
size..init_array: 8 
size..interp: 28 
size..note.ABI-tag: 32 
size..note.gnu.property: 32 
size..plt: 48 
size..rela.dyn: 192 
size..rela.plt: 48 
size..rodata: 95 
size..text: 2222 
**********
PASS: test-suite :: SingleSource/UnitTests/Vector/Vector-constpool.test (2457 of 2519)

llvm-ci · 2024-12-13T04:31:19Z

LLVM Buildbot has detected a new failure on builder llvm-clang-aarch64-darwin running on doug-worker-4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/11281

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'lit :: shtest-define.py' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 3
echo '-- Available Tests --' > /Users/buildbot/buildbot-root/aarch64-darwin/build/utils/lit/tests/Output/shtest-define.py.tmp.tests.actual.txt
# executed command: echo '-- Available Tests --'
# RUN: at line 22
not env -u FILECHECK_OPTS "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9" /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va   Inputs/shtest-define/errors/assignment/before-name.txt 2>&1 | FileCheck -match-full-lines  Inputs/shtest-define/errors/assignment/before-name.txt -dump-input-filter=all -vv -color && echo '  shtest-define :: errors/assignment/before-name.txt' >> /Users/buildbot/buildbot-root/aarch64-darwin/build/utils/lit/tests/Output/shtest-define.py.tmp.tests.actual.txt
# executed command: not env -u FILECHECK_OPTS /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va Inputs/shtest-define/errors/assignment/before-name.txt
# executed command: FileCheck -match-full-lines Inputs/shtest-define/errors/assignment/before-name.txt -dump-input-filter=all -vv -color
# executed command: echo '  shtest-define :: errors/assignment/before-name.txt'
# RUN: at line 24
not env -u FILECHECK_OPTS "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9" /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va   Inputs/shtest-define/errors/assignment/between-name-equals.txt 2>&1 | FileCheck -match-full-lines  Inputs/shtest-define/errors/assignment/between-name-equals.txt -dump-input-filter=all -vv -color && echo '  shtest-define :: errors/assignment/between-name-equals.txt' >> /Users/buildbot/buildbot-root/aarch64-darwin/build/utils/lit/tests/Output/shtest-define.py.tmp.tests.actual.txt
# executed command: not env -u FILECHECK_OPTS /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va Inputs/shtest-define/errors/assignment/between-name-equals.txt
# executed command: FileCheck -match-full-lines Inputs/shtest-define/errors/assignment/between-name-equals.txt -dump-input-filter=all -vv -color
# executed command: echo '  shtest-define :: errors/assignment/between-name-equals.txt'
# RUN: at line 26
not env -u FILECHECK_OPTS "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9" /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va   Inputs/shtest-define/errors/assignment/braces-empty.txt 2>&1 | FileCheck -match-full-lines  Inputs/shtest-define/errors/assignment/braces-empty.txt -dump-input-filter=all -vv -color && echo '  shtest-define :: errors/assignment/braces-empty.txt' >> /Users/buildbot/buildbot-root/aarch64-darwin/build/utils/lit/tests/Output/shtest-define.py.tmp.tests.actual.txt
# executed command: not env -u FILECHECK_OPTS /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va Inputs/shtest-define/errors/assignment/braces-empty.txt
# executed command: FileCheck -match-full-lines Inputs/shtest-define/errors/assignment/braces-empty.txt -dump-input-filter=all -vv -color
# executed command: echo '  shtest-define :: errors/assignment/braces-empty.txt'
# RUN: at line 28
not env -u FILECHECK_OPTS "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9" /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va   Inputs/shtest-define/errors/assignment/braces-with-dot.txt 2>&1 | FileCheck -match-full-lines  Inputs/shtest-define/errors/assignment/braces-with-dot.txt -dump-input-filter=all -vv -color && echo '  shtest-define :: errors/assignment/braces-with-dot.txt' >> /Users/buildbot/buildbot-root/aarch64-darwin/build/utils/lit/tests/Output/shtest-define.py.tmp.tests.actual.txt
# executed command: not env -u FILECHECK_OPTS /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va Inputs/shtest-define/errors/assignment/braces-with-dot.txt
# executed command: FileCheck -match-full-lines Inputs/shtest-define/errors/assignment/braces-with-dot.txt -dump-input-filter=all -vv -color
# executed command: echo '  shtest-define :: errors/assignment/braces-with-dot.txt'
# RUN: at line 30
not env -u FILECHECK_OPTS "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9" /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va   Inputs/shtest-define/errors/assignment/braces-with-equals.txt 2>&1 | FileCheck -match-full-lines  Inputs/shtest-define/errors/assignment/braces-with-equals.txt -dump-input-filter=all -vv -color && echo '  shtest-define :: errors/assignment/braces-with-equals.txt' >> /Users/buildbot/buildbot-root/aarch64-darwin/build/utils/lit/tests/Output/shtest-define.py.tmp.tests.actual.txt
# executed command: not env -u FILECHECK_OPTS /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va Inputs/shtest-define/errors/assignment/braces-with-equals.txt
# executed command: FileCheck -match-full-lines Inputs/shtest-define/errors/assignment/braces-with-equals.txt -dump-input-filter=all -vv -color
# executed command: echo '  shtest-define :: errors/assignment/braces-with-equals.txt'
# RUN: at line 32
not env -u FILECHECK_OPTS "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9" /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va   Inputs/shtest-define/errors/assignment/braces-with-newline.txt 2>&1 | FileCheck -match-full-lines  Inputs/shtest-define/errors/assignment/braces-with-newline.txt -dump-input-filter=all -vv -color && echo '  shtest-define :: errors/assignment/braces-with-newline.txt' >> /Users/buildbot/buildbot-root/aarch64-darwin/build/utils/lit/tests/Output/shtest-define.py.tmp.tests.actual.txt
# executed command: not env -u FILECHECK_OPTS /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va Inputs/shtest-define/errors/assignment/braces-with-newline.txt
# executed command: FileCheck -match-full-lines Inputs/shtest-define/errors/assignment/braces-with-newline.txt -dump-input-filter=all -vv -color
# executed command: echo '  shtest-define :: errors/assignment/braces-with-newline.txt'
# RUN: at line 34
not env -u FILECHECK_OPTS "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9" /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va   Inputs/shtest-define/errors/assignment/braces-with-number.txt 2>&1 | FileCheck -match-full-lines  Inputs/shtest-define/errors/assignment/braces-with-number.txt -dump-input-filter=all -vv -color && echo '  shtest-define :: errors/assignment/braces-with-number.txt' >> /Users/buildbot/buildbot-root/aarch64-darwin/build/utils/lit/tests/Output/shtest-define.py.tmp.tests.actual.txt
# executed command: not env -u FILECHECK_OPTS /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va Inputs/shtest-define/errors/assignment/braces-with-number.txt
# executed command: FileCheck -match-full-lines Inputs/shtest-define/errors/assignment/braces-with-number.txt -dump-input-filter=all -vv -color
# executed command: echo '  shtest-define :: errors/assignment/braces-with-number.txt'
# RUN: at line 36
not env -u FILECHECK_OPTS "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9" /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va   Inputs/shtest-define/errors/assignment/braces-with-ws.txt 2>&1 | FileCheck -match-full-lines  Inputs/shtest-define/errors/assignment/braces-with-ws.txt -dump-input-filter=all -vv -color && echo '  shtest-define :: errors/assignment/braces-with-ws.txt' >> /Users/buildbot/buildbot-root/aarch64-darwin/build/utils/lit/tests/Output/shtest-define.py.tmp.tests.actual.txt
# executed command: not env -u FILECHECK_OPTS /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va Inputs/shtest-define/errors/assignment/braces-with-ws.txt
# executed command: FileCheck -match-full-lines Inputs/shtest-define/errors/assignment/braces-with-ws.txt -dump-input-filter=all -vv -color
# executed command: echo '  shtest-define :: errors/assignment/braces-with-ws.txt'
# RUN: at line 38
not env -u FILECHECK_OPTS "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/bin/python3.9" /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical -va   Inputs/shtest-define/errors/assignment/empty.txt 2>&1 | FileCheck -match-full-lines  Inputs/shtest-define/errors/assignment/empty.txt -dump-input-filter=all -vv -color && echo '  shtest-define :: errors/assignment/empty.txt' >> /Users/buildbot/buildbot-root/aarch64-darwin/build/utils/lit/tests/Output/shtest-define.py.tmp.tests.actual.txt
...

…ey have same semantics. (#112181)" This reverts commit 8220415.

llvm-ci · 2024-12-13T04:45:05Z

LLVM Buildbot has detected a new failure on builder flang-runtime-cuda-clang running on as-builder-7 while building llvm at step 10 "build-flang-runtime-FortranRuntime".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/7/builds/8567

Here is the relevant piece of the build log for the reference

Step 10 (build-flang-runtime-FortranRuntime) failure: cmake (failure)
...
  115 | static RT_API_ATTRS void *MemmoveWrapper(
      |                           ^~~~~~~~~~~~~~
1 warning generated.
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/type-info.cpp:11:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/tools.h:17:
/home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/../include/flang/Runtime/freestanding-tools.h:115:27: warning: unused function 'MemmoveWrapper' [-Wunused-function]
  115 | static RT_API_ATTRS void *MemmoveWrapper(
      |                           ^~~~~~~~~~~~~~
1 warning generated.
12.965 [1/24/41] Building CXX object CMakeFiles/FortranRuntime.dir/edit-input.cpp.o
FAILED: CMakeFiles/FortranRuntime.dir/edit-input.cpp.o 
/home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/install-clang/bin/clang++ -DFLANG_LITTLE_ENDIAN=1 -DGTEST_HAS_RTTI=0 -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DOMP_OFFLOAD_BUILD -I/home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/../include -I/home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/build/flang-runtime -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -fno-lto -O3 -DNDEBUG   -U_GLIBCXX_ASSERTIONS -U_LIBCPP_ENABLE_ASSERTIONS -std=c++17  -fno-exceptions -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-rtti -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=sm_50,sm_60,sm_70,sm_80 -foffload-lto -MD -MT CMakeFiles/FortranRuntime.dir/edit-input.cpp.o -MF CMakeFiles/FortranRuntime.dir/edit-input.cpp.o.d -o CMakeFiles/FortranRuntime.dir/edit-input.cpp.o -c /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/edit-input.cpp
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/edit-input.cpp:9:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/edit-input.h:12:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/format.h:14:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/environment.h:13:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/../include/flang/Decimal/decimal.h:29:
/home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/../include/flang/Decimal/binary-floating-point.h:39:24: warning: declaration is not declared in any declare target region [-Wopenmp-target]
   39 |   static constexpr int significandBits{realChars.significandBits};
      |                        ^
/home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/../include/flang/Decimal/binary-floating-point.h:51:59: note: used here
   51 |   static constexpr RawType significandMask{(RawType{1} << significandBits) - 1};
      |                                                           ^~~~~~~~~~~~~~~
1 warning generated.
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/edit-input.cpp:9:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/edit-input.h:12:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/format.h:14:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/environment.h:13:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/../include/flang/Decimal/decimal.h:29:
/home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/../include/flang/Decimal/binary-floating-point.h:39:24: warning: declaration is not declared in any declare target region [-Wopenmp-target]
   39 |   static constexpr int significandBits{realChars.significandBits};
      |                        ^
/home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/../include/flang/Decimal/binary-floating-point.h:51:59: note: used here
   51 |   static constexpr RawType significandMask{(RawType{1} << significandBits) - 1};
      |                                                           ^~~~~~~~~~~~~~~
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/edit-input.cpp:9:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/edit-input.h:12:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/format.h:19:
/home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/../include/flang/Runtime/freestanding-tools.h:115:27: warning: unused function 'MemmoveWrapper' [-Wunused-function]
  115 | static RT_API_ATTRS void *MemmoveWrapper(
      |                           ^~~~~~~~~~~~~~
2 warnings generated.
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/edit-input.cpp:9:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/edit-input.h:12:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/format.h:14:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/environment.h:13:
In file included from /home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/../include/flang/Decimal/decimal.h:29:
/home/buildbot/worker/as-builder-7/ramdisk/flang-runtime-cuda-clang/llvm-project/flang/runtime/../include/flang/Decimal/binary-floating-point.h:39:24: warning: declaration is not declared in any declare target region [-Wopenmp-target]
   39 |   static constexpr int significandBits{realChars.significandBits};

nikic · 2024-12-13T10:28:48Z

Looks like this was already reverted for other reasons, so just as a FYI this also causes a significant compile-time regression: https://llvm-compile-time-tracker.com/compare.php?from=02bcaca5995de283c85acfcca61a39baac315794&to=82204154b7bd1f8c487c94c7ef00399d776b29f0&stat=instructions:u

…same semantics. (llvm#112181)

HanKuanChen requested review from preames and alexey-bataev October 14, 2024 10:48

llvmbot added vectorizers llvm:transforms labels Oct 14, 2024

HanKuanChen force-pushed the slp-interchangeable-opcode branch from 2d81590 to d9d8f3c Compare October 14, 2024 14:25

alexey-bataev reviewed Oct 14, 2024

View reviewed changes

alexey-bataev reviewed Oct 15, 2024

View reviewed changes

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Outdated Show resolved Hide resolved

HanKuanChen requested a review from alexey-bataev October 17, 2024 04:03

HanKuanChen force-pushed the slp-interchangeable-opcode branch 2 times, most recently from 7a6cbcb to 48bae64 Compare October 17, 2024 08:03

[SLP] Make getSameOpcode support different instructions if they have

ad591ac

same semantics.

HanKuanChen force-pushed the slp-interchangeable-opcode branch from 48bae64 to ad591ac Compare October 18, 2024 15:08

getSExtValue may use too many bits

f393590

alexey-bataev reviewed Oct 21, 2024

View reviewed changes

apply comment

83ed351

HanKuanChen requested a review from alexey-bataev October 21, 2024 10:41

alexey-bataev reviewed Oct 21, 2024

View reviewed changes

reduce repeated code

9672f6d

HanKuanChen force-pushed the slp-interchangeable-opcode branch from a56bd1c to 9672f6d Compare October 23, 2024 06:50

HanKuanChen requested a review from alexey-bataev October 23, 2024 08:24

HanKuanChen added 5 commits December 10, 2024 23:10

Merge remote-tracking branch 'upstream/main' into slp-interchangeable…

b1c2047

…-opcode

fix conflict

1097781

fix VL may contain PoisonValue

999f45a

appendOperandsOfVL does not have to call getSameOpcode. Instead, we pass

ee74f11

MainOp and AltOp.

refactor getInterchangeableInstruction

ddf7ab4

alexey-bataev reviewed Dec 11, 2024

View reviewed changes

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Outdated Show resolved Hide resolved

apply comment

7c40025

replace undef with poison

4752ab6

alexey-bataev approved these changes Dec 12, 2024

View reviewed changes

HanKuanChen merged commit 8220415 into llvm:main Dec 13, 2024
8 checks passed

HanKuanChen deleted the slp-interchangeable-opcode branch December 13, 2024 04:06

HanKuanChen added a commit that referenced this pull request Dec 13, 2024

Revert "[SLP] Make getSameOpcode support different instructions if th…

3133acf

…ey have same semantics. (#112181)" This reverts commit 8220415.

HanKuanChen mentioned this pull request Dec 17, 2024

[SLP] NFC. Replace MainOp and AltOp in TreeEntry with InstructionsState. #120198

Merged

HanKuanChen added a commit to HanKuanChen/llvm-project that referenced this pull request Jan 6, 2025

[SLP] Make getSameOpcode support different instructions if they have …

ffe640e

…same semantics. (llvm#112181)

HanKuanChen added a commit to HanKuanChen/llvm-project that referenced this pull request Jan 7, 2025

[SLP] Make getSameOpcode support different instructions if they have …

8ab8156

…same semantics. (llvm#112181)

alexey-bataev mentioned this pull request Jan 22, 2025

[SLP]Reduce number of alternate instruction, where possible #123360

Merged

[SLP] Make getSameOpcode support different instructions if they have same semantics. #112181

[SLP] Make getSameOpcode support different instructions if they have same semantics. #112181

Uh oh!

Conversation

HanKuanChen commented Oct 14, 2024

Uh oh!

llvmbot commented Oct 14, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Oct 17, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

alexey-bataev Oct 22, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Dec 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

llvm-ci commented Dec 13, 2024

Uh oh!

llvm-ci commented Dec 13, 2024

Uh oh!

llvm-ci commented Dec 13, 2024

llvmbot commented Oct 14, 2024 •

edited

Loading

github-actions bot commented Oct 17, 2024 •

edited

Loading

alexey-bataev Oct 22, 2024 •

edited

Loading

github-actions bot commented Dec 12, 2024 •

edited

Loading