-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[SLP] Make getSameOpcode support interchangeable instructions. #127450
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP] Make getSameOpcode support interchangeable instructions. #127450
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Han-Kuan Chen (HanKuanChen) ChangesWe use the term "interchangeable instructions" to refer to different operators that have the same meaning (e.g., Patch is 55.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127450.diff 23 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e946620406c2e..eb1a6fb55c9d1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -810,6 +810,205 @@ static std::optional<unsigned> getExtractIndex(Instruction *E) {
namespace {
+/// Base class for representing instructions that can be interchanged with other
+/// equivalent forms. For example, multiplication by a power of 2 can be
+/// interchanged with a left shift.
+///
+/// Derived classes implement specific interchange patterns by overriding the
+/// virtual methods to define their interchange logic.
+///
+/// The class maintains a reference to the main instruction (MainOp) and provides
+/// methods to:
+/// - Check if another instruction is interchangeable (isSame)
+/// - Get the opcode for the interchangeable form (getInterchangeableInstructionOpcode)
+/// - Get the operands for the interchangeable form (getInterchangeableInstructionOps)
+class InterchangeableInstruction {
+protected:
+ Instruction *const MainOp;
+
+ /// Return non nullptr if the right operand of I is ConstantInt.
+ static ConstantInt *isBinOpWithConstantInt(Instruction *I) {
+ Constant *C;
+ if (!match(I, m_BinOp(m_Value(), m_Constant(C))))
+ return nullptr;
+ if (auto *CI = dyn_cast<ConstantInt>(C)) {
+ return CI;
+ } else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
+ if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
+ return CI;
+ }
+ return nullptr;
+ }
+
+public:
+ InterchangeableInstruction(Instruction *MainOp) : MainOp(MainOp) {}
+ virtual bool isSame(Instruction *I) {
+ return MainOp->getOpcode() == I->getOpcode();
+ }
+ virtual unsigned getInterchangeableInstructionOpcode() {
+ return MainOp->getOpcode();
+ }
+ virtual SmallVector<Value *>
+ getInterchangeableInstructionOps(Instruction *I) {
+ assert(MainOp->getOpcode() == I->getOpcode());
+ return SmallVector<Value *>(MainOp->operands());
+ }
+ virtual ~InterchangeableInstruction() = default;
+};
+
+class BinOpIsNoOp final : public InterchangeableInstruction {
+ constexpr static std::initializer_list<unsigned> SupportedOp = {
+ Instruction::Add, Instruction::Sub, Instruction::Mul, Instruction::Shl,
+ Instruction::AShr, Instruction::And, Instruction::Or, Instruction::Xor};
+ SmallVector<unsigned> CandidateOp = SupportedOp;
+
+public:
+ using InterchangeableInstruction::InterchangeableInstruction;
+ bool isSame(Instruction *I) override {
+ unsigned Opcode = I->getOpcode();
+ if (!is_contained(SupportedOp, Opcode))
+ return false;
+ ConstantInt *CI = isBinOpWithConstantInt(I);
+ if (CI) {
+ switch (Opcode) {
+ case Instruction::Mul:
+ if (CI->getValue().isOne())
+ return true;
+ break;
+ case Instruction::And:
+ if (CI->getValue().isAllOnes())
+ return true;
+ break;
+ default:
+ if (CI->getValue().isZero())
+ return true;
+ }
+ }
+ if (is_contained(CandidateOp, Opcode)) {
+ CandidateOp = {Opcode};
+ return true;
+ }
+ return false;
+ }
+ unsigned getInterchangeableInstructionOpcode() override {
+ assert(!CandidateOp.empty() && "Cannot find interchangeable instruction.");
+ if (is_contained(CandidateOp, MainOp->getOpcode()))
+ return MainOp->getOpcode();
+ return CandidateOp[0];
+ }
+ SmallVector<Value *>
+ getInterchangeableInstructionOps(Instruction *I) override {
+ assert(is_contained(SupportedOp, I->getOpcode()));
+ return {MainOp->getOperand(0),
+ ConstantInt::get(MainOp->getOperand(1)->getType(),
+ I->getOpcode() == Instruction::Mul)};
+ }
+};
+
+class MulAndShlWithConstantInt final : public InterchangeableInstruction {
+ constexpr static std::initializer_list<unsigned> SupportedOp = {
+ Instruction::Mul, Instruction::Shl};
+ SmallVector<unsigned> CandidateOp = SupportedOp;
+
+public:
+ using InterchangeableInstruction::InterchangeableInstruction;
+ bool isSame(Instruction *I) override {
+ unsigned Opcode = I->getOpcode();
+ if (!is_contained(SupportedOp, Opcode))
+ return false;
+ ConstantInt *CI = isBinOpWithConstantInt(I);
+ if (CI && (Opcode != Instruction::Mul || CI->getValue().isPowerOf2()))
+ return true;
+ if (is_contained(CandidateOp, Opcode)) {
+ CandidateOp = {Opcode};
+ return true;
+ }
+ return false;
+ }
+ unsigned getInterchangeableInstructionOpcode() override {
+ assert(!CandidateOp.empty() && "Cannot find interchangeable instruction.");
+ if (is_contained(CandidateOp, MainOp->getOpcode()))
+ return MainOp->getOpcode();
+ return CandidateOp[0];
+ }
+ SmallVector<Value *>
+ getInterchangeableInstructionOps(Instruction *I) override {
+ assert(is_contained(SupportedOp, I->getOpcode()));
+ if (MainOp->getOpcode() == I->getOpcode())
+ return SmallVector<Value *>(MainOp->operands());
+ const APInt &Op1Int = isBinOpWithConstantInt(MainOp)->getValue();
+ return {MainOp->getOperand(0),
+ ConstantInt::get(MainOp->getOperand(1)->getType(),
+ I->getOpcode() == Instruction::Mul
+ ? (1 << Op1Int.getZExtValue())
+ : Op1Int.logBase2())};
+ }
+};
+
+static SmallVector<std::unique_ptr<InterchangeableInstruction>>
+getInterchangeableInstruction(Instruction *MainOp) {
+ SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate;
+ Candidate.push_back(std::make_unique<InterchangeableInstruction>(MainOp));
+ if (MainOp->isBinaryOp()) {
+ Candidate.push_back(std::make_unique<BinOpIsNoOp>(MainOp));
+ Candidate.push_back(std::make_unique<MulAndShlWithConstantInt>(MainOp));
+ }
+ return Candidate;
+}
+
+static bool getInterchangeableInstruction(
+ SmallVector<std::unique_ptr<InterchangeableInstruction>> &Candidate,
+ Instruction *I) {
+ auto Iter = std::stable_partition(
+ Candidate.begin(), Candidate.end(),
+ [&](const std::unique_ptr<InterchangeableInstruction> &C) {
+ return C->isSame(I);
+ });
+ if (Iter == Candidate.begin())
+ return false;
+ Candidate.erase(Iter, Candidate.end());
+ return true;
+}
+
+static bool isConvertible(Instruction *I, Instruction *MainOp,
+ Instruction *AltOp) {
+ if (!I->isBinaryOp())
+ return I->getOpcode() == MainOp->getOpcode() ||
+ I->getOpcode() == AltOp->getOpcode();
+ assert(MainOp && "MainOp cannot be nullptr.");
+ SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate(
+ getInterchangeableInstruction(I));
+ for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+ if (C->isSame(I) && C->isSame(MainOp))
+ return true;
+ Candidate = getInterchangeableInstruction(I);
+ assert(AltOp && "AltOp cannot be nullptr.");
+ for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+ if (C->isSame(I) && C->isSame(AltOp))
+ return true;
+ return false;
+}
+
+static std::pair<Instruction *, SmallVector<Value *>>
+convertTo(Instruction *I, Instruction *MainOp, Instruction *AltOp) {
+ assert(isConvertible(I, MainOp, AltOp) && "Cannot convert the instruction.");
+ if (!I->isBinaryOp())
+ return std::make_pair(I->getOpcode() == MainOp->getOpcode() ? MainOp
+ : AltOp,
+ SmallVector<Value *>(I->operands()));
+ SmallVector<std::unique_ptr<InterchangeableInstruction>> Candidate(
+ getInterchangeableInstruction(I));
+ for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+ if (C->isSame(I) && C->isSame(MainOp))
+ return std::make_pair(MainOp,
+ C->getInterchangeableInstructionOps(MainOp));
+ Candidate = getInterchangeableInstruction(I);
+ for (std::unique_ptr<InterchangeableInstruction> &C : Candidate)
+ if (C->isSame(I) && C->isSame(AltOp))
+ return std::make_pair(AltOp, C->getInterchangeableInstructionOps(AltOp));
+ llvm_unreachable("Cannot convert the instruction.");
+}
+
/// Main data required for vectorization of instructions.
class InstructionsState {
/// The main/alternate instruction. MainOp is also VL0.
@@ -836,8 +1035,7 @@ class InstructionsState {
bool isAltShuffle() const { return getMainOp() != getAltOp(); }
bool isOpcodeOrAlt(Instruction *I) const {
- unsigned CheckedOpcode = I->getOpcode();
- return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode;
+ return isConvertible(I, MainOp, AltOp);
}
/// Checks if the current state is valid, i.e. has non-null MainOp
@@ -931,6 +1129,11 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
unsigned Opcode = MainOp->getOpcode();
unsigned AltOpcode = Opcode;
+ SmallVector<std::unique_ptr<InterchangeableInstruction>>
+ InterchangeableInstructionCandidate(
+ getInterchangeableInstruction(MainOp));
+ SmallVector<std::unique_ptr<InterchangeableInstruction>>
+ AlternateInterchangeableInstructionCandidate;
bool SwappedPredsCompatible = IsCmpOp && [&]() {
SetVector<unsigned> UniquePreds, UniqueNonSwappedPreds;
UniquePreds.insert(BasePred);
@@ -977,14 +1180,18 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid();
unsigned InstOpcode = I->getOpcode();
if (IsBinOp && isa<BinaryOperator>(I)) {
- if (InstOpcode == Opcode || InstOpcode == AltOpcode)
- continue;
- if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
- isValidForAlternation(Opcode)) {
- AltOpcode = InstOpcode;
- AltOp = I;
+ if (getInterchangeableInstruction(InterchangeableInstructionCandidate, I))
continue;
+ if (AlternateInterchangeableInstructionCandidate.empty()) {
+ if (!isValidForAlternation(Opcode) ||
+ !isValidForAlternation(InstOpcode))
+ return InstructionsState::invalid();
+ AlternateInterchangeableInstructionCandidate =
+ getInterchangeableInstruction(I);
}
+ if (getInterchangeableInstruction(
+ AlternateInterchangeableInstructionCandidate, I))
+ continue;
} else if (IsCastOp && isa<CastInst>(I)) {
Value *Op0 = MainOp->getOperand(0);
Type *Ty0 = Op0->getType();
@@ -1085,6 +1292,29 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid();
}
+ if (IsBinOp) {
+ auto FindOp =
+ [&](ArrayRef<std::unique_ptr<InterchangeableInstruction>> Candidate) {
+ for (const std::unique_ptr<InterchangeableInstruction> &I :
+ Candidate) {
+ unsigned InterchangeableInstructionOpcode =
+ I->getInterchangeableInstructionOpcode();
+ for (Value *V : VL) {
+ if (isa<PoisonValue>(V))
+ continue;
+ if (cast<Instruction>(V)->getOpcode() ==
+ InterchangeableInstructionOpcode)
+ return cast<Instruction>(V);
+ }
+ }
+ llvm_unreachable(
+ "Cannot find the candidate instruction for InstructionsState.");
+ };
+ MainOp = FindOp(InterchangeableInstructionCandidate);
+ AltOp = AlternateInterchangeableInstructionCandidate.empty()
+ ? MainOp
+ : FindOp(AlternateInterchangeableInstructionCandidate);
+ }
return InstructionsState(MainOp, AltOp);
}
@@ -2447,29 +2677,28 @@ class BoUpSLP {
ArgSize = isa<IntrinsicInst>(MainOp) ? IntrinsicNumOperands : NumOperands;
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
- for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
+ for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx)
OpsVec[OpIdx].resize(NumLanes);
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
- assert((isa<Instruction>(VL[Lane]) || isa<PoisonValue>(VL[Lane])) &&
- "Expected instruction or poison value");
- // Our tree has just 3 nodes: the root and two operands.
- // It is therefore trivial to get the APO. We only need to check the
- // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
- // RHS operand. The LHS operand of both add and sub is never attached
- // to an inversese operation in the linearized form, therefore its APO
- // is false. The RHS is true only if VL[Lane] is an inverse operation.
-
- // Since operand reordering is performed on groups of commutative
- // operations or alternating sequences (e.g., +, -), we can safely
- // tell the inverse operations by checking commutativity.
- if (isa<PoisonValue>(VL[Lane])) {
- if (auto *EI = dyn_cast<ExtractElementInst>(MainOp)) {
- if (OpIdx == 0) {
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ assert((isa<Instruction>(VL[Lane]) || isa<PoisonValue>(VL[Lane])) &&
+ "Expected instruction or poison value");
+ // Our tree has just 3 nodes: the root and two operands.
+ // It is therefore trivial to get the APO. We only need to check the
+ // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or RHS
+ // operand. The LHS operand of both add and sub is never attached to an
+ // inversese operation in the linearized form, therefore its APO is
+ // false. The RHS is true only if VL[Lane] is an inverse operation.
+
+ // Since operand reordering is performed on groups of commutative
+ // operations or alternating sequences (e.g., +, -), we can safely tell
+ // the inverse operations by checking commutativity.
+ if (isa<PoisonValue>(VL[Lane])) {
+ for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
+ if (OpIdx == 0) {
+ if (auto *EI = dyn_cast<ExtractElementInst>(MainOp)) {
OpsVec[OpIdx][Lane] = {EI->getVectorOperand(), true, false};
continue;
- }
- } else if (auto *EV = dyn_cast<ExtractValueInst>(MainOp)) {
- if (OpIdx == 0) {
+ } else if (auto *EV = dyn_cast<ExtractValueInst>(MainOp)) {
OpsVec[OpIdx][Lane] = {EV->getAggregateOperand(), true, false};
continue;
}
@@ -2477,12 +2706,15 @@ class BoUpSLP {
OpsVec[OpIdx][Lane] = {
PoisonValue::get(MainOp->getOperand(OpIdx)->getType()), true,
false};
- continue;
}
- bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
+ continue;
+ }
+ auto [SelectedOp, Ops] =
+ convertTo(cast<Instruction>(VL[Lane]), MainOp, S.getAltOp());
+ bool IsInverseOperation = !isCommutative(SelectedOp);
+ for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
- OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
- APO, false};
+ OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false};
}
}
}
@@ -8501,8 +8733,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
BlockScheduling &BS = *BSRef;
+ SmallVector<Value *> MainOpIsTheFirst(UniqueValues);
+ auto MainOpIter = find(MainOpIsTheFirst, S.getMainOp());
+ std::rotate(MainOpIsTheFirst.begin(), MainOpIter, std::next(MainOpIter));
+
std::optional<ScheduleData *> Bundle =
- BS.tryScheduleBundle(UniqueValues, this, S);
+ BS.tryScheduleBundle(MainOpIsTheFirst, this, S);
#ifdef EXPENSIVE_CHECKS
// Make sure we didn't break any internal invariants
BS.verify();
@@ -15889,7 +16125,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Value *V = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
RHS);
- propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
+ propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end());
if (auto *I = dyn_cast<Instruction>(V)) {
V = ::propagateMetadata(I, E->Scalars);
// Drop nuw flags for abs(sub(commutative), true).
@@ -17169,6 +17405,7 @@ BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
std::optional<BoUpSLP::ScheduleData *>
BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const InstructionsState &S) {
+ assert(VL[0] == S.getMainOp() && "MainOp must be the first element of VL.");
// No need to schedule PHIs, insertelement, extractelement and extractvalue
// instructions.
if (isa<PHINode>(S.getMainOp()) ||
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
index 3ebe920d17343..781954cbec2f7 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
@@ -5,7 +5,9 @@ define void @h() {
; CHECK-LABEL: define void @h() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
+; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i8, ptr null, i64 24
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[ARRAYIDX18]], align 2
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
index feb4ad865f314..d527d38adbee3 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
@@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
;
; POW2-ONLY-LABEL: @store_try_reorder(
; POW2-ONLY-NEXT: entry:
-; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
-; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
-; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
-; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
+; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887:%.*]], align 4
+; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
+; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[ARRAYIDX_I1887]], i64 2
+; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
index fd3d4ab80b29c..ff897180cc9b7 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll
@@ -7,13 +7,12 @@ define void @test(ptr %a, i64 %0) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
; CHECK-NEXT: br label %[[BB:.*]]
; CHECK: [[BB]]:
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
-; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 tr...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' b4061a5a80a37ed5c500b2f0ef25e0328e5b60a8 760d852a549503bd192a23acbd8b2fdf01833607 llvm/test/Transforms/SLPVectorizer/isOpcodeOrAlt.ll llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll llvm/test/Transforms/SLPVectorizer/X86/reduced-val-vectorized-in-transform.ll llvm/test/Transforms/SLPVectorizer/X86/reorder_diamond_match.ll llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
|
||
public: | ||
InterchangeableInstruction(Instruction *MainOp) : MainOp(MainOp) {} | ||
virtual bool isSame(Instruction *I) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We'd better to avoid virtual where possible and replace by static dispatching, if possible
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about this one?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably cannot. I expected to add other class here. For example,
define void @test(<16 x i8> %81, ptr %output) {
entry:
%shuffle.i = shufflevector <16 x i8> %81, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%shuffle.i109 = shufflevector <16 x i8> %81, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vmovl.i108 = zext <8 x i8> %shuffle.i to <8 x i16>
%vmovl.i = zext <8 x i8> %shuffle.i109 to <8 x i16>
%vext = shufflevector <8 x i16> %vmovl.i108, <8 x i16> %vmovl.i, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
%vext20 = shufflevector <8 x i16> %vmovl.i108, <8 x i16> %vmovl.i, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
%vext29 = shufflevector <8 x i16> %vmovl.i108, <8 x i16> %vmovl.i, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
%gep0 = getelementptr inbounds i16, ptr %output, i64 0
%gep1 = getelementptr inbounds i16, ptr %output, i64 8
%gep2 = getelementptr inbounds i16, ptr %output, i64 16
%gep3 = getelementptr inbounds i16, ptr %output, i64 24
store <8 x i16> %vext, ptr %gep0, align 4
store <8 x i16> %vmovl.i108, ptr %gep1, align 4
store <8 x i16> %vext20, ptr %gep2, align 4
store <8 x i16> %vext29, ptr %gep3, align 4
ret void
}
zext and shufflevector are interchangeable. %vmovl.i108
can be shufflevector <8 x i16> %vmovl.i108, <8 x i16> %vmovl.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
.
%vmovl.i108 = shufflevector <8 x i16> %vmovl.i108, <8 x i16> %vmovl.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%vext = shufflevector <8 x i16> %vmovl.i108, <8 x i16> %vmovl.i, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
%vext20 = shufflevector <8 x i16> %vmovl.i108, <8 x i16> %vmovl.i, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
%vext29 = shufflevector <8 x i16> %vmovl.i108, <8 x i16> %vmovl.i, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
virtual can help me do that.
auto MainOpIter = find(MainOpIsTheFirst, S.getMainOp()); | ||
std::rotate(MainOpIsTheFirst.begin(), MainOpIter, std::next(MainOpIter)); | ||
|
||
std::optional<ScheduleData *> Bundle = | ||
BS.tryScheduleBundle(UniqueValues, this, S); | ||
BS.tryScheduleBundle(MainOpIsTheFirst, this, S); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you need this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A Bundle is built in the order of VL.
BoUpSLP::ScheduleData *
BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
ScheduleData *Bundle = nullptr;
ScheduleData *PrevInBundle = nullptr;
for (Value *V : VL) {
if (doesNotNeedToBeScheduled(V))
continue;
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember &&
"no ScheduleData for bundle member "
"(maybe not in same basic block)");
assert(BundleMember->isSchedulingEntity() &&
"bundle member already part of other bundle");
if (PrevInBundle) {
PrevInBundle->NextInBundle = BundleMember;
} else {
Bundle = BundleMember;
}
That means the first one Value which is NOT doesNotNeedToBeScheduled
will be the FirstInBundle
.
When SLP calls cancelScheduling
, InstructionsState.MainOp
will be passed into cancelScheduling
.
auto *Bundle = buildBundle(VL);
TryScheduleBundleImpl(ReSchedule, Bundle);
if (!Bundle->isReady()) {
cancelScheduling(VL, S.getMainOp());
The Bundle
is from getScheduleData(OpValue)
. But the latter code assume it is the first one in a bundle (by Bundle->isSchedulingEntity()
).
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
Value *OpValue) {
if (isa<PHINode>(OpValue) || isVectorLikeInstWithConstOps(OpValue) ||
doesNotNeedToSchedule(VL))
return;
if (doesNotNeedToBeScheduled(OpValue))
OpValue = *find_if_not(VL, doesNotNeedToBeScheduled);
ScheduleData *Bundle = getScheduleData(OpValue);
LLVM_DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n");
assert(!Bundle->IsScheduled &&
"Can't cancel bundle which is already scheduled");
assert(Bundle->isSchedulingEntity() &&
(Bundle->isPartOfBundle() || needToScheduleSingleInstruction(VL)) &&
"tried to unbundle something which is not a bundle");
This is not true for interchangeable instructions. For example,
define i32 @test() {
entry:
%mul = mul i32 1, 0
%shl = shl i32 %mul, 0
%xor1 = xor i32 %shl, %mul
ret i32 %xor1
}
VL is
%shl = shl i32 %mul, 0
%mul = mul i32 1, 0
The MainOp is %mul
here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
assert(VL[0] == S.getMainOp() && "MainOp must be the first element of VL.");
is added in the beginning of tryScheduleBundle
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better to adjust cancelScheduling, the first item in the bundle is just a Bundle->FirstInBundle
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
see 968f346
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i8, ptr null, i64 24 | ||
; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2 | ||
; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[ARRAYIDX18]], align 2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regression?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
see 29c8cff
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.v8i64(<8 x i64> zeroinitializer, i64 0) | ||
; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i64> [[TMP0]], zeroinitializer | ||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> zeroinitializer, <4 x i64> [[RDX_OP]], i64 0) | ||
; CHECK-NEXT: [[OP_RDX:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP1]]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regression?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. We can get this without 4a08497 if we make %or54.i.i.6 = or i32 %xor148.2.i.6, 0
to %or54.i.i.6 = xor i32 %xor148.2.i.6, 0
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be good somehow to estimate here, which one is better, and select best solution
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
before this PR, SLP are trying to vectorize VL with size 12.
%xor148.2.i.1 = xor i32 0, 0
%xor148.2.i.2 = xor i32 0, 0
%xor148.2.i.3 = xor i32 0, 0
%xor148.2.i.4 = xor i32 0, 0
%xor148.2.i.5 = xor i32 0, 0
%xor148.2.i.6 = xor i32 0, 0
%xor148.2.i.7 = xor i32 0, 0
%or54.i.i.6 = or i32 %xor148.2.i.6, 0
i32 poison
i32 poison
i32 poison
the code has a check like this
if (VL.size() <= 2 || LoadEntriesToVectorize.contains(Idx) ||
!(!E.hasState() || E.getOpcode() == Instruction::Load ||
E.isAltShuffle() || !allSameBlock(VL)) ||
allConstant(VL) || isSplat(VL))
continue;
VL is alternate shuffle (xor and or) and SLP will try to combine from VL[0] to VL[7]. (CombinedEntriesWithIndices
)
VL is NOT alternate shuffle when this PR is applied.
CombinedEntriesWithIndices
is always considered in processBuildVector
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to fix this check, if it causes the regression
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But it should be in another PR. Even without this PR, SLP can still get the same result if %or54.i.i.6
uses xor. It is not related to interchangeable instruction but SLP issue.
…hangeableInstructionOps to getOperand
|
||
public: | ||
InterchangeableInstruction(Instruction *MainOp) : MainOp(MainOp) {} | ||
virtual bool isSame(Instruction *I) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about this one?
for (std::unique_ptr<InterchangeableInstruction> &C : Candidate) | ||
if (C->isSame(I) && C->isSame(AltOp)) | ||
return std::make_pair(AltOp, C->getOperand(AltOp)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you need this lookup and cannot return it as a result of getInterchangeableInstruction
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
follow #127450 (comment)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still do not understand why you can do selection immediately during getInterchangeableInstruction and need an extra filtering. Do the filtering during lookup to reduce number of candidates
auto MainOpIter = find(MainOpIsTheFirst, S.getMainOp()); | ||
std::rotate(MainOpIsTheFirst.begin(), MainOpIter, std::next(MainOpIter)); | ||
|
||
std::optional<ScheduleData *> Bundle = | ||
BS.tryScheduleBundle(UniqueValues, this, S); | ||
BS.tryScheduleBundle(MainOpIsTheFirst, this, S); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better to adjust cancelScheduling, the first item in the bundle is just a Bundle->FirstInBundle
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.v8i64(<8 x i64> zeroinitializer, i64 0) | ||
; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i64> [[TMP0]], zeroinitializer | ||
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> zeroinitializer, <4 x i64> [[RDX_OP]], i64 0) | ||
; CHECK-NEXT: [[OP_RDX:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP1]]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be good somehow to estimate here, which one is better, and select best solution
// For example, VL [x + 0, y * 1] can be converted to [x << 0, y << 0], but | ||
// 'shl' does not exist in VL. In the end, we convert VL to [x * 1, y * 1]. | ||
// SeenBefore is used to know what operations have been seen before. | ||
mutable MaskType SeenBefore = 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Avoid using mutable
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then isSame cannot be const.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then isSame is wrong nname and it should not be const
if (Candidate & SHL_BIT) | ||
return Instruction::Shl; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You always have a priority here, which one is better to choose. No need to combine all possible alternatives using bitmasks, you can safely return single candidate
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand. How do I know whether VL can use Shl without checking SHL_BIT is set?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IF the mask contains shl, it is always first candidate. If the mask contains ashr, it is always the second candidate. Why do you need to keep all candidates (in mask), if you have strict order, which one should be chosen?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't get it. VL [x << 0, y * 3]
cannot use Shl but can use mul to be [x * 1, y * 3]
.
for (std::unique_ptr<InterchangeableInstruction> &C : Candidate) | ||
if (C->isSame(I) && C->isSame(AltOp)) | ||
return std::make_pair(AltOp, C->getOperand(AltOp)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still do not understand why you can do selection immediately during getInterchangeableInstruction and need an extra filtering. Do the filtering during lookup to reduce number of candidates
return Instruction::Xor; | ||
llvm_unreachable("Cannot find interchangeable instruction."); | ||
} | ||
SmallVector<Value *> getOperand(Instruction *To) const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you really need to return SmallVector here or just a pair is enough?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pair is inconvenient. See the following code
auto [SelectedOp, Ops] =
convertTo(cast<Instruction>(VL[Lane]), MainOp, S.getAltOp());
bool IsInverseOperation = !isCommutative(SelectedOp);
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false};
}
Ops can be accessed by index. pair cannot.
Rebase? |
…eable-instruction-2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
This commit causes a crash when building highway tests, e.g. https://github.com/google/highway/blob/master/hwy/tests/arithmetic_test.cc Some tests (e.g. unroller_test.cc) fail with an assertion:
Other tests (e.g. arithmetic_test.cc) also crash with an identical stack, but with no assertion. |
Any build command? |
Hopefully better: a direct IR repro (this one is reduced from zstd_opt.c): target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.ZSTD_optimal_t = type { i32, i32, i32, i32, [3 x i32] }
define i64 @ZSTD_compressBlock_opt_generic(ptr %ms, i32 %0, i32 %lastStretch.sroa.3.0.copyload) {
entry:
%1 = load ptr, ptr %ms, align 8
%sub506 = sub i32 %0, %lastStretch.sroa.3.0.copyload
%add537 = add i32 %sub506, 1
%idxprom556 = zext i32 %add537 to i64
%arrayidx5573 = getelementptr %struct.ZSTD_optimal_t, ptr %1, i64 %idxprom556
store i32 0, ptr %arrayidx5573, align 4
%idxprom563.phi.trans.insert = zext i32 %sub506 to i64
%arrayidx564.phi.trans.insert = getelementptr %struct.ZSTD_optimal_t, ptr %1, i64 %idxprom563.phi.trans.insert
%.pre = load i8, ptr %arrayidx564.phi.trans.insert, align 1
br label %while.body562
while.body562: ; preds = %while.body562, %entry
store i8 %.pre, ptr %ms, align 1
br label %while.body562
} Repros w/ |
Cannot reproduce the error. My SHA-1 is 71a0cfd. Do you miss anything? |
#127450)" This reverts commit 71a0cfd. This commit triggers failed asserts when compiling ffmpeg. The issue is reproducible with a small standalone reproducer like this: void make_filters_from_proto(int *filter[][2], int bands) { int c, q, n; for (;; q++) { n = 0; for (; n < 7; n++) { int theta = (q * (n - 6) + (n >> 1) - 3) % bands; if (theta) c = theta; filter[q][n][0] = c; } } } $ clang -target x86_64-linux-gnu -c repro.c -O3 clang: ../lib/Transforms/Vectorize/SLPVectorizer.cpp:989: llvm::SmallVector<llvm ::Value*> {anonymous}::BinOpSameOpcodeHelper::InterchangeableInfo::getOperand(ll vm::Instruction*) const: Assertion `FromCIValue.isZero() && "Cannot convert the instruction."' failed. The same issue also reproduces for a large number of other target triples, aarch64-linux-gnu and others.
I also ran into failed asserts since this change, so I pushed a revert. The issue is reproducible with the following reduced input: void make_filters_from_proto(int *filter[][2], int bands) {
int c, q, n;
for (;; q++) {
n = 0;
for (; n < 7; n++) {
int theta = (q * (n - 6) + (n >> 1) - 3) % bands;
if (theta)
c = theta;
filter[q][n][0] = c;
}
}
} Compiled e.g. like this:
The issue also reproduces by compiling ffmpeg with e.g. llvm-test-suite, according to the instructions at https://github.com/llvm/llvm-test-suite/blob/main/External/ffmpeg/README.md. |
I'm also getting crashes when building libjpeg-turbo or libavcodec:
|
…127450) We use the term "interchangeable instructions" to refer to different operators that have the same meaning (e.g., `add x, 0` is equivalent to `mul x, 1`). Non-constant values are not supported, as they may incur high costs with little benefit. --------- Co-authored-by: Alexey Bataev <[email protected]>
Please revert this patch and put both patches for review. |
The error you encountered is different from the others. I would appreciate it if you could provide the steps to reproduce it. Here is my cmake command
|
Fun fact: the revert didn't fix the libavcodec crash for me, but fixed the libjpeg-turbo one. |
Actually, the revert fixed it, but this relanded in #132887 with a version that still crashes on libavcodec but not libjpeg-turbo. |
Can you give me the test? |
As reduced by cvise:
Compile with these flags: |
I'm also still hitting crashes after the reland, also in libavcodec (but I hit it in another source file first, so I've reduced that one): short *transform_4x4_luma_12_coeffs;
int transform_4x4_luma_12_i;
void transform_4x4_luma_12() {
transform_4x4_luma_12_i = 0;
for (; transform_4x4_luma_12_i < 4; transform_4x4_luma_12_i++) {
int c2 = -transform_4x4_luma_12_coeffs[3];
transform_4x4_luma_12_coeffs[2] = transform_4x4_luma_12_coeffs[1] + 7 >> 7;
transform_4x4_luma_12_coeffs[0] = transform_4x4_luma_12_coeffs[1] =
transform_4x4_luma_12_coeffs[3] + transform_4x4_luma_12_coeffs[1] >> 7;
transform_4x4_luma_12_coeffs[3] = c2 >> 7;
transform_4x4_luma_12_coeffs += 4;
}
} Compiled with:
I can push a revert in a little while. And before relanding, at this point it would probably be good to test building all of ffmpeg with the suggested patch. |
#132887)" This reverts commit 6e66cfe. This change causes crashes on compiling some inputs, see #127450 (comment) and #127450 (comment) for details.
…nstructions. (#132887)" This reverts commit 6e66cfe. This change causes crashes on compiling some inputs, see llvm/llvm-project#127450 (comment) and llvm/llvm-project#127450 (comment) for details.
We use the term "interchangeable instructions" to refer to different operators that have the same meaning (e.g.,
add x, 0
is equivalent tomul x, 1
).Non-constant values are not supported, as they may incur high costs with little benefit.