Skip to content

Commit c2c1e6e

Browse files
authored
[VPlan] Replace disjoint or with add instead of dropping disjoint. (#83821)
Dropping disjoint from an OR may yield incorrect results, as some analysis may have converted it to an Add implicitly (e.g. SCEV used for dependence analysis). Instead, replace it with an equivalent Add. This is possible as all users of the disjoint OR only access lanes where the operands are disjoint or poison otherwise. Note that replacing all disjoint ORs with ADDs instead of dropping the flags is not strictly necessary. It is only needed for disjoint ORs that SCEV treated as ADDs, but those are not tracked. There are other places that may drop poison-generating flags; those likely need similar treatment. Fixes #81872 PR: #83821
1 parent a6e231b commit c2c1e6e

File tree

5 files changed

+32
-1
lines changed

5 files changed

+32
-1
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ class VPBuilder {
6868
public:
6969
VPBuilder() = default;
7070
VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
71+
VPBuilder(VPRecipeBase *InsertPt) {
72+
setInsertPoint(InsertPt->getParent(), InsertPt->getIterator());
73+
}
7174

7275
/// Clear the insertion point: created instructions will not be inserted into
7376
/// a block.

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,6 +1127,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
11271127
return WrapFlags.HasNSW;
11281128
}
11291129

1130+
bool isDisjoint() const {
1131+
assert(OpType == OperationType::DisjointOp &&
1132+
"recipe cannot have a disjoing flag");
1133+
return DisjointFlags.IsDisjoint;
1134+
}
1135+
11301136
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
11311137
void printFlags(raw_ostream &O) const;
11321138
#endif

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,11 @@ m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
261261
return m_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
262262
}
263263

264+
template <typename Op0_t, typename Op1_t>
265+
inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or>
266+
m_Or(const Op0_t &Op0, const Op1_t &Op1) {
267+
return m_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
268+
}
264269
} // namespace VPlanPatternMatch
265270
} // namespace llvm
266271

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,6 +1216,23 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
12161216
// load/store. If the underlying instruction has poison-generating flags,
12171217
// drop them directly.
12181218
if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
1219+
VPValue *A, *B;
1220+
using namespace llvm::VPlanPatternMatch;
1221+
// Dropping disjoint from an OR may yield incorrect results, as some
1222+
// analysis may have converted it to an Add implicitly (e.g. SCEV used
1223+
// for dependence analysis). Instead, replace it with an equivalent Add.
1224+
// This is possible as all users of the disjoint OR only access lanes
1225+
// where the operands are disjoint or poison otherwise.
1226+
if (match(RecWithFlags, m_Or(m_VPValue(A), m_VPValue(B))) &&
1227+
RecWithFlags->isDisjoint()) {
1228+
VPBuilder Builder(RecWithFlags);
1229+
VPInstruction *New = Builder.createOverflowingOp(
1230+
Instruction::Add, {A, B}, {false, false},
1231+
RecWithFlags->getDebugLoc());
1232+
RecWithFlags->replaceAllUsesWith(New);
1233+
RecWithFlags->eraseFromParent();
1234+
CurRec = New;
1235+
}
12191236
RecWithFlags->dropPoisonGeneratingFlags();
12201237
} else {
12211238
Instruction *Instr = dyn_cast_or_null<Instruction>(

llvm/test/Transforms/LoopVectorize/X86/pr81872.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 {
2929
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
3030
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
3131
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
32-
; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP0]], 1
32+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 1
3333
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]]
3434
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0
3535
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3

0 commit comments

Comments
 (0)