Skip to content

Commit a686c60

Browse files
committed
[DivRemPairs] Recommit: Handling for expanded-form rem - recomposition (PR42673)
Summary: While `-div-rem-pairs` pass can decompose rem in div+rem pair when div-rem pair is unsupported by target, nothing performs the opposite fold. We can't do that in InstCombine or DAGCombine since neither of those has access to TTI. So it makes most sense to teach `-div-rem-pairs` about it. If we matched rem in expanded form, we know we will be able to place div-rem pair next to each other so we won't regress the situation. Also, we shouldn't decompose rem if we matched already-decomposed form. This is surprisingly straight-forward otherwise. The original patch was committed in rL367288 but was reverted in rL367289 because it exposed pre-existing RAUW issues in internal data structures of the pass; those now have been addressed in a previous patch. https://bugs.llvm.org/show_bug.cgi?id=42673 Reviewers: spatel, RKSimon, efriedma, ZaMaZaN4iK, bogner Reviewed By: bogner Subscribers: bogner, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65298 llvm-svn: 367419
1 parent a9d5843 commit a686c60

File tree

4 files changed

+118
-22
lines changed

4 files changed

+118
-22
lines changed

llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ struct DivRemMapKey {
3232
AssertingVH<Value> Dividend;
3333
AssertingVH<Value> Divisor;
3434

35+
DivRemMapKey() = default;
36+
3537
DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor)
3638
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
3739
};

llvm/lib/Transforms/Scalar/DivRemPairs.cpp

Lines changed: 100 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
//===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===//
1+
//===- DivRemPairs.cpp - Hoist/[dr]ecompose division and remainder --------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// This pass hoists and/or decomposes integer division and remainder
9+
// This pass hoists and/or decomposes/recomposes integer division and remainder
1010
// instructions to enable CFG improvements and better codegen.
1111
//
1212
//===----------------------------------------------------------------------===//
@@ -19,20 +19,57 @@
1919
#include "llvm/Analysis/TargetTransformInfo.h"
2020
#include "llvm/IR/Dominators.h"
2121
#include "llvm/IR/Function.h"
22+
#include "llvm/IR/PatternMatch.h"
2223
#include "llvm/Pass.h"
2324
#include "llvm/Support/DebugCounter.h"
2425
#include "llvm/Transforms/Scalar.h"
2526
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
2627

2728
using namespace llvm;
29+
using namespace llvm::PatternMatch;
2830

2931
#define DEBUG_TYPE "div-rem-pairs"
3032
STATISTIC(NumPairs, "Number of div/rem pairs");
33+
STATISTIC(NumRecomposed, "Number of instructions recomposed");
3134
STATISTIC(NumHoisted, "Number of instructions hoisted");
3235
STATISTIC(NumDecomposed, "Number of instructions decomposed");
3336
DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
3437
"Controls transformations in div-rem-pairs pass");
3538

39+
namespace {
40+
struct ExpandedMatch {
41+
DivRemMapKey Key;
42+
Instruction *Value;
43+
};
44+
} // namespace
45+
46+
/// See if we can match: (which is the form we expand into)
47+
/// X - ((X ?/ Y) * Y)
48+
/// which is equivalent to:
49+
/// X ?% Y
50+
static llvm::Optional<ExpandedMatch> matchExpandedRem(Instruction &I) {
51+
Value *Dividend, *XroundedDownToMultipleOfY;
52+
if (!match(&I, m_Sub(m_Value(Dividend), m_Value(XroundedDownToMultipleOfY))))
53+
return llvm::None;
54+
55+
Value *Divisor;
56+
Instruction *Div;
57+
// Look for ((X / Y) * Y)
58+
if (!match(
59+
XroundedDownToMultipleOfY,
60+
m_c_Mul(m_CombineAnd(m_IDiv(m_Specific(Dividend), m_Value(Divisor)),
61+
m_Instruction(Div)),
62+
m_Deferred(Divisor))))
63+
return llvm::None;
64+
65+
ExpandedMatch M;
66+
M.Key.SignedOp = Div->getOpcode() == Instruction::SDiv;
67+
M.Key.Dividend = Dividend;
68+
M.Key.Divisor = Divisor;
69+
M.Value = &I;
70+
return M;
71+
}
72+
3673
/// A thin wrapper to store two values that we matched as div-rem pair.
3774
/// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
3875
struct DivRemPairWorklistEntry {
@@ -62,6 +99,16 @@ struct DivRemPairWorklistEntry {
6299
/// In this pair, what are the divident and divisor?
63100
Value *getDividend() const { return DivInst->getOperand(0); }
64101
Value *getDivisor() const { return DivInst->getOperand(1); }
102+
103+
bool isRemExpanded() const {
104+
switch (RemInst->getOpcode()) {
105+
case Instruction::SRem:
106+
case Instruction::URem:
107+
return false; // single 'rem' instruction - unexpanded form.
108+
default:
109+
return true; // anything else means we have remainder in expanded form.
110+
}
111+
}
65112
};
66113
using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;
67114

@@ -87,6 +134,8 @@ static DivRemWorklistTy getWorklist(Function &F) {
87134
RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I;
88135
else if (I.getOpcode() == Instruction::URem)
89136
RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I;
137+
else if (auto Match = matchExpandedRem(I))
138+
RemMap[Match->Key] = Match->Value;
90139
}
91140
}
92141

@@ -137,22 +186,61 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
137186

138187
// Process each entry in the worklist.
139188
for (DivRemPairWorklistEntry &E : Worklist) {
189+
if (!DebugCounter::shouldExecute(DRPCounter))
190+
continue;
191+
140192
bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());
141193

142194
auto &DivInst = E.DivInst;
143195
auto &RemInst = E.RemInst;
144196

197+
const bool RemOriginallyWasInExpandedForm = E.isRemExpanded();
198+
199+
if (HasDivRemOp && E.isRemExpanded()) {
200+
// The target supports div+rem but the rem is expanded.
201+
// We should recompose it first.
202+
Value *X = E.getDividend();
203+
Value *Y = E.getDivisor();
204+
Instruction *RealRem = E.isSigned() ? BinaryOperator::CreateSRem(X, Y)
205+
: BinaryOperator::CreateURem(X, Y);
206+
// Note that we place it right next to the original expanded instruction,
207+
// and letting further handling to move it if needed.
208+
RealRem->setName(RemInst->getName() + ".recomposed");
209+
RealRem->insertAfter(RemInst);
210+
Instruction *OrigRemInst = RemInst;
211+
// Update AssertingVH<> with new instruction so it doesn't assert.
212+
RemInst = RealRem;
213+
// And replace the original instruction with the new one.
214+
OrigRemInst->replaceAllUsesWith(RealRem);
215+
OrigRemInst->eraseFromParent();
216+
NumRecomposed++;
217+
// Note that we have left ((X / Y) * Y) around.
218+
// If it had other uses we could rewrite it as X - X % Y
219+
}
220+
221+
assert((!E.isRemExpanded() || !HasDivRemOp) &&
222+
"*If* the target supports div-rem, then by now the RemInst *is* "
223+
"Instruction::[US]Rem.");
224+
145225
// If the target supports div+rem and the instructions are in the same block
146226
// already, there's nothing to do. The backend should handle this. If the
147227
// target does not support div+rem, then we will decompose the rem.
148228
if (HasDivRemOp && RemInst->getParent() == DivInst->getParent())
149229
continue;
150230

151231
bool DivDominates = DT.dominates(DivInst, RemInst);
152-
if (!DivDominates && !DT.dominates(RemInst, DivInst))
232+
if (!DivDominates && !DT.dominates(RemInst, DivInst)) {
233+
// We have matching div-rem pair, but they are in two different blocks,
234+
// neither of which dominates one another.
235+
assert(!RemOriginallyWasInExpandedForm &&
236+
"Won't happen for expanded-form rem.");
237+
// FIXME: We could hoist both ops to the common predecessor block?
153238
continue;
239+
}
154240

155-
if (!DebugCounter::shouldExecute(DRPCounter))
241+
// The target does not have a single div/rem operation,
242+
// and the rem is already in expanded form. Nothing to do.
243+
if (!HasDivRemOp && E.isRemExpanded())
156244
continue;
157245

158246
if (HasDivRemOp) {
@@ -164,9 +252,15 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
164252
DivInst->moveAfter(RemInst);
165253
NumHoisted++;
166254
} else {
167-
// The target does not have a single div/rem operation. Decompose the
168-
// remainder calculation as:
255+
// The target does not have a single div/rem operation,
256+
// and the rem is *not* in a already-expanded form.
257+
// Decompose the remainder calculation as:
169258
// X % Y --> X - ((X / Y) * Y).
259+
260+
assert(!RemOriginallyWasInExpandedForm &&
261+
"We should not be expanding if the rem was in expanded form to "
262+
"begin with.");
263+
170264
Value *X = E.getDividend();
171265
Value *Y = E.getDivisor();
172266
Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);

llvm/test/Transforms/DivRemPairs/X86/div-expanded-rem-pair.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ define void @decompose_illegal_srem_same_block(i32 %a, i32 %b) {
77
; CHECK-LABEL: @decompose_illegal_srem_same_block(
88
; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]]
99
; CHECK-NEXT: [[T0:%.*]] = mul i32 [[DIV]], [[B]]
10-
; CHECK-NEXT: [[REM:%.*]] = sub i32 [[A]], [[T0]]
11-
; CHECK-NEXT: call void @foo(i32 [[REM]], i32 [[DIV]])
10+
; CHECK-NEXT: [[REM_RECOMPOSED:%.*]] = srem i32 [[A]], [[B]]
11+
; CHECK-NEXT: call void @foo(i32 [[REM_RECOMPOSED]], i32 [[DIV]])
1212
; CHECK-NEXT: ret void
1313
;
1414
%div = sdiv i32 %a, %b
@@ -22,8 +22,8 @@ define void @decompose_illegal_urem_same_block(i32 %a, i32 %b) {
2222
; CHECK-LABEL: @decompose_illegal_urem_same_block(
2323
; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[A:%.*]], [[B:%.*]]
2424
; CHECK-NEXT: [[T0:%.*]] = mul i32 [[DIV]], [[B]]
25-
; CHECK-NEXT: [[REM:%.*]] = sub i32 [[A]], [[T0]]
26-
; CHECK-NEXT: call void @foo(i32 [[REM]], i32 [[DIV]])
25+
; CHECK-NEXT: [[REM_RECOMPOSED:%.*]] = urem i32 [[A]], [[B]]
26+
; CHECK-NEXT: call void @foo(i32 [[REM_RECOMPOSED]], i32 [[DIV]])
2727
; CHECK-NEXT: ret void
2828
;
2929
%div = udiv i32 %a, %b
@@ -39,14 +39,14 @@ define i16 @hoist_srem(i16 %a, i16 %b) {
3939
; CHECK-LABEL: @hoist_srem(
4040
; CHECK-NEXT: entry:
4141
; CHECK-NEXT: [[DIV:%.*]] = sdiv i16 [[A:%.*]], [[B:%.*]]
42+
; CHECK-NEXT: [[REM_RECOMPOSED:%.*]] = srem i16 [[A]], [[B]]
4243
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[DIV]], 42
4344
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
4445
; CHECK: if:
4546
; CHECK-NEXT: [[T0:%.*]] = mul i16 [[DIV]], [[B]]
46-
; CHECK-NEXT: [[REM:%.*]] = sub i16 [[A]], [[T0]]
4747
; CHECK-NEXT: br label [[END]]
4848
; CHECK: end:
49-
; CHECK-NEXT: [[RET:%.*]] = phi i16 [ [[REM]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
49+
; CHECK-NEXT: [[RET:%.*]] = phi i16 [ [[REM_RECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
5050
; CHECK-NEXT: ret i16 [[RET]]
5151
;
5252
entry:
@@ -70,14 +70,14 @@ define i8 @hoist_urem(i8 %a, i8 %b) {
7070
; CHECK-LABEL: @hoist_urem(
7171
; CHECK-NEXT: entry:
7272
; CHECK-NEXT: [[DIV:%.*]] = udiv i8 [[A:%.*]], [[B:%.*]]
73+
; CHECK-NEXT: [[REM_RECOMPOSED:%.*]] = urem i8 [[A]], [[B]]
7374
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[DIV]], 42
7475
; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[END:%.*]]
7576
; CHECK: if:
7677
; CHECK-NEXT: [[T0:%.*]] = mul i8 [[DIV]], [[B]]
77-
; CHECK-NEXT: [[REM:%.*]] = sub i8 [[A]], [[T0]]
7878
; CHECK-NEXT: br label [[END]]
7979
; CHECK: end:
80-
; CHECK-NEXT: [[RET:%.*]] = phi i8 [ [[REM]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
80+
; CHECK-NEXT: [[RET:%.*]] = phi i8 [ [[REM_RECOMPOSED]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
8181
; CHECK-NEXT: ret i8 [[RET]]
8282
;
8383
entry:
@@ -122,11 +122,11 @@ define i32 @srem_of_srem_expanded(i32 %X, i32 %Y, i32 %Z) {
122122
; CHECK-NEXT: [[T0:%.*]] = mul nsw i32 [[Z:%.*]], [[Y:%.*]]
123123
; CHECK-NEXT: [[T1:%.*]] = sdiv i32 [[X:%.*]], [[T0]]
124124
; CHECK-NEXT: [[T2:%.*]] = mul nsw i32 [[T0]], [[T1]]
125-
; CHECK-NEXT: [[T3:%.*]] = sub nsw i32 [[X]], [[T2]]
126-
; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3]], [[Y]]
125+
; CHECK-NEXT: [[T3_RECOMPOSED:%.*]] = srem i32 [[X]], [[T0]]
126+
; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3_RECOMPOSED]], [[Y]]
127127
; CHECK-NEXT: [[T5:%.*]] = mul nsw i32 [[T4]], [[Y]]
128-
; CHECK-NEXT: [[T6:%.*]] = sub nsw i32 [[T3]], [[T5]]
129-
; CHECK-NEXT: ret i32 [[T6]]
128+
; CHECK-NEXT: [[T6_RECOMPOSED:%.*]] = srem i32 [[T3_RECOMPOSED]], [[Y]]
129+
; CHECK-NEXT: ret i32 [[T6_RECOMPOSED]]
130130
;
131131
%t0 = mul nsw i32 %Z, %Y
132132
%t1 = sdiv i32 %X, %t0

llvm/test/Transforms/DivRemPairs/X86/div-rem-pairs.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,11 +173,11 @@ define i32 @srem_of_srem_expanded(i32 %X, i32 %Y, i32 %Z) {
173173
; CHECK-NEXT: [[T0:%.*]] = mul nsw i32 [[Z:%.*]], [[Y:%.*]]
174174
; CHECK-NEXT: [[T1:%.*]] = sdiv i32 [[X:%.*]], [[T0]]
175175
; CHECK-NEXT: [[T2:%.*]] = mul nsw i32 [[T0]], [[T1]]
176-
; CHECK-NEXT: [[T3:%.*]] = sub nsw i32 [[X]], [[T2]]
177-
; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3]], [[Y]]
176+
; CHECK-NEXT: [[T3_RECOMPOSED:%.*]] = srem i32 [[X]], [[T0]]
177+
; CHECK-NEXT: [[T4:%.*]] = sdiv i32 [[T3_RECOMPOSED]], [[Y]]
178178
; CHECK-NEXT: [[T5:%.*]] = mul nsw i32 [[T4]], [[Y]]
179-
; CHECK-NEXT: [[T6:%.*]] = sub nsw i32 [[T3]], [[T5]]
180-
; CHECK-NEXT: ret i32 [[T6]]
179+
; CHECK-NEXT: [[T6_RECOMPOSED:%.*]] = srem i32 [[T3_RECOMPOSED]], [[Y]]
180+
; CHECK-NEXT: ret i32 [[T6_RECOMPOSED]]
181181
;
182182
%t0 = mul nsw i32 %Z, %Y
183183
%t1 = sdiv i32 %X, %t0

0 commit comments

Comments
 (0)