Skip to content

Commit fc1184a

Browse files
committed
Move to AggressiveInstCombine
1 parent e7d14ce commit fc1184a

File tree

5 files changed

+246
-166
lines changed

5 files changed

+246
-166
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,62 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
827827
return true;
828828
}
829829

830+
/// Combine away instructions providing they are still equivalent when compared
831+
/// against 0. i.e do they have any bits set.
832+
static Value *optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder) {
833+
auto *I = dyn_cast<Instruction>(V);
834+
if (!I || I->getOpcode() != Instruction::Or || !I->hasOneUse())
835+
return nullptr;
836+
837+
Value *A;
838+
839+
// Look deeper into the chain of or's, combining away shl (so long as they are
840+
// nuw or nsw).
841+
Value *Op0 = I->getOperand(0);
842+
if (match(Op0, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
843+
m_NUWShl(m_Value(A), m_Value()))))
844+
Op0 = A;
845+
else if (auto *NOp = optimizeShiftInOrChain(Op0, Builder))
846+
Op0 = NOp;
847+
848+
Value *Op1 = I->getOperand(1);
849+
if (match(Op1, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
850+
m_NUWShl(m_Value(A), m_Value()))))
851+
Op1 = A;
852+
else if (auto *NOp = optimizeShiftInOrChain(Op1, Builder))
853+
Op1 = NOp;
854+
855+
if (Op0 != I->getOperand(0) || Op1 != I->getOperand(1))
856+
return Builder.CreateOr(Op0, Op1);
857+
return nullptr;
858+
}
859+
860+
static bool foldICmpOrChain(Instruction &I, const DataLayout &DL,
861+
TargetTransformInfo &TTI, AliasAnalysis &AA,
862+
const DominatorTree &DT) {
863+
CmpPredicate Pred;
864+
Value *Op0;
865+
if (!match(&I, m_ICmp(Pred, m_Value(Op0), m_Zero())) ||
866+
!ICmpInst::isEquality(Pred))
867+
return false;
868+
869+
// If the chain or or's matches a load, combine to that before attempting to
870+
// remove shifts.
871+
if (auto OpI = dyn_cast<Instruction>(Op0))
872+
if (OpI->getOpcode() == Instruction::Or)
873+
if (foldConsecutiveLoads(*OpI, DL, TTI, AA, DT))
874+
return true;
875+
876+
IRBuilder<> Builder(&I);
877+
// icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b), 0
878+
if (auto *Res = optimizeShiftInOrChain(Op0, Builder)) {
879+
I.replaceAllUsesWith(Builder.CreateICmp(Pred, Res, I.getOperand(1)));
880+
return true;
881+
}
882+
883+
return false;
884+
}
885+
830886
// Calculate GEP Stride and accumulated const ModOffset. Return Stride and
831887
// ModOffset
832888
static std::pair<APInt, APInt>
@@ -1253,6 +1309,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
12531309
MadeChange |= tryToRecognizeTableBasedCttz(I);
12541310
MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
12551311
MadeChange |= foldPatternedLoads(I, DL);
1312+
MadeChange |= foldICmpOrChain(I, DL, TTI, AA, DT);
12561313
// NOTE: This function introduces erasing of the instruction `I`, so it
12571314
// needs to be called at the end of this sequence, otherwise we may make
12581315
// bugs.

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,33 +1223,6 @@ Instruction *InstCombinerImpl::foldSignBitTest(ICmpInst &I) {
12231223
X, ConstantInt::getNullValue(XTy));
12241224
}
12251225

1226-
/// Combine away instructions providing they are still equivalent when compared
1227-
/// against 0. i.e do they have any bits set.
1228-
static Value *combineAwayHasAnyBitsSetChain(Value *V,
1229-
InstCombiner::BuilderTy &Builder) {
1230-
auto *I = dyn_cast<Instruction>(V);
1231-
if (!I || I->getOpcode() != Instruction::Or || !I->hasOneUse())
1232-
return nullptr;
1233-
1234-
// Remove the shl in or(shl(x, y), z) so long as the shl is nuw or nsw.
1235-
Value *A, *B;
1236-
if (match(I, m_c_Or(m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
1237-
m_NUWShl(m_Value(A), m_Value())),
1238-
m_Value(B))))
1239-
return Builder.CreateOr(A, B);
1240-
1241-
// Look deeper into the chain of or's.
1242-
Value *Op0 = I->getOperand(0);
1243-
if (auto *NOp = combineAwayHasAnyBitsSetChain(Op0, Builder))
1244-
Op0 = NOp;
1245-
Value *Op1 = I->getOperand(1);
1246-
if (auto *NOp = combineAwayHasAnyBitsSetChain(Op1, Builder))
1247-
Op1 = NOp;
1248-
if (Op0 != I->getOperand(0) || Op1 != I->getOperand(1))
1249-
return Builder.CreateOr(Op0, Op1);
1250-
return nullptr;
1251-
}
1252-
12531226
// Handle icmp pred X, 0
12541227
Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
12551228
CmpInst::Predicate Pred = Cmp.getPredicate();
@@ -1325,12 +1298,6 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
13251298
// eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that
13261299
// will fold to a constant elsewhere.
13271300
}
1328-
1329-
// icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b), 0
1330-
if (ICmpInst::isEquality(Pred))
1331-
if (auto *Res = combineAwayHasAnyBitsSetChain(Cmp.getOperand(0), Builder))
1332-
return new ICmpInst(Pred, Res, Cmp.getOperand(1));
1333-
13341301
return nullptr;
13351302
}
13361303

llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2480,3 +2480,55 @@ define void @bitcast_gep(ptr %p, ptr %dest) {
24802480
store i32 %trunc, ptr %dest, align 4
24812481
ret void
24822482
}
2483+
2484+
define i1 @loadCombine_4consecutive_rev_icmp0(ptr %p) {
2485+
; LE-LABEL: @loadCombine_4consecutive_rev_icmp0(
2486+
; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1
2487+
; LE-NEXT: [[C:%.*]] = icmp eq i32 [[L1]], 0
2488+
; LE-NEXT: ret i1 [[C]]
2489+
;
2490+
; BE-LABEL: @loadCombine_4consecutive_rev_icmp0(
2491+
; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
2492+
; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
2493+
; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
2494+
; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
2495+
; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1
2496+
; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
2497+
; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
2498+
; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
2499+
; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32
2500+
; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
2501+
; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
2502+
; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8
2503+
; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
2504+
; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
2505+
; BE-NEXT: [[O1:%.*]] = or i32 [[S4]], [[S3]]
2506+
; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S2]]
2507+
; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E1]]
2508+
; BE-NEXT: [[C:%.*]] = icmp eq i32 [[O3]], 0
2509+
; BE-NEXT: ret i1 [[C]]
2510+
;
2511+
%p1 = getelementptr i8, ptr %p, i32 1
2512+
%p2 = getelementptr i8, ptr %p, i32 2
2513+
%p3 = getelementptr i8, ptr %p, i32 3
2514+
%l1 = load i8, ptr %p
2515+
%l2 = load i8, ptr %p1
2516+
%l3 = load i8, ptr %p2
2517+
%l4 = load i8, ptr %p3
2518+
2519+
%e1 = zext i8 %l1 to i32
2520+
%e2 = zext i8 %l2 to i32
2521+
%e3 = zext i8 %l3 to i32
2522+
%e4 = zext i8 %l4 to i32
2523+
2524+
%s2 = shl i32 %e2, 8
2525+
%s3 = shl i32 %e3, 16
2526+
%s4 = shl i32 %e4, 24
2527+
2528+
%o1 = or i32 %s4, %s3
2529+
%o2 = or i32 %o1, %s2
2530+
%o3 = or i32 %o2, %e1
2531+
2532+
%c = icmp eq i32 %o3, 0
2533+
ret i1 %c
2534+
}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
3+
4+
define i1 @remove_shift_nuw_ab(i8 %a, i8 %b, i8 %s) {
5+
; CHECK-LABEL: @remove_shift_nuw_ab(
6+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
7+
; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
8+
; CHECK-NEXT: ret i1 [[IC]]
9+
;
10+
%t = shl nuw i8 %a, %s
11+
%or = or i8 %t, %b
12+
%ic = icmp eq i8 %or, 0
13+
ret i1 %ic
14+
}
15+
16+
define i1 @remove_shift_nuw_ba(i8 %a, i8 %b, i8 %s) {
17+
; CHECK-LABEL: @remove_shift_nuw_ba(
18+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[B:%.*]], [[T:%.*]]
19+
; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
20+
; CHECK-NEXT: ret i1 [[IC]]
21+
;
22+
%t = shl nuw i8 %a, %s
23+
%or = or i8 %b, %t
24+
%ic = icmp eq i8 %or, 0
25+
ret i1 %ic
26+
}
27+
28+
define i1 @remove_shift_nsw(i8 %a, i8 %b, i8 %s) {
29+
; CHECK-LABEL: @remove_shift_nsw(
30+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
31+
; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
32+
; CHECK-NEXT: ret i1 [[IC]]
33+
;
34+
%t = shl nsw i8 %a, %s
35+
%or = or i8 %t, %b
36+
%ic = icmp eq i8 %or, 0
37+
ret i1 %ic
38+
}
39+
40+
define i1 @remove_shift_nuw_ne(i8 %a, i8 %b, i8 %s) {
41+
; CHECK-LABEL: @remove_shift_nuw_ne(
42+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
43+
; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
44+
; CHECK-NEXT: ret i1 [[IC]]
45+
;
46+
%t = shl nuw i8 %a, %s
47+
%or = or i8 %t, %b
48+
%ic = icmp eq i8 %or, 0
49+
ret i1 %ic
50+
}
51+
52+
define i1 @remove_shift_nsw_ne(i8 %a, i8 %b, i8 %s) {
53+
; CHECK-LABEL: @remove_shift_nsw_ne(
54+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[T:%.*]], [[B:%.*]]
55+
; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
56+
; CHECK-NEXT: ret i1 [[IC]]
57+
;
58+
%t = shl nsw i8 %a, %s
59+
%or = or i8 %t, %b
60+
%ic = icmp eq i8 %or, 0
61+
ret i1 %ic
62+
}
63+
64+
define i1 @remove_shift_wraps(i8 %a, i8 %b, i8 %s) {
65+
; CHECK-LABEL: @remove_shift_wraps(
66+
; CHECK-NEXT: [[T:%.*]] = shl i8 [[A:%.*]], [[S:%.*]]
67+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[T]], [[B:%.*]]
68+
; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
69+
; CHECK-NEXT: ret i1 [[IC]]
70+
;
71+
%t = shl i8 %a, %s
72+
%or = or i8 %t, %b
73+
%ic = icmp eq i8 %or, 0
74+
ret i1 %ic
75+
}
76+
77+
define i1 @remove_shift_chain_d(i8 %a, i8 %b, i8 %c, i8 %d, i8 %s) {
78+
; CHECK-LABEL: @remove_shift_chain_d(
79+
; CHECK-NEXT: [[OR1:%.*]] = or i8 [[A:%.*]], [[B:%.*]]
80+
; CHECK-NEXT: [[OR2:%.*]] = or i8 [[C:%.*]], [[DT:%.*]]
81+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[OR1]], [[OR2]]
82+
; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
83+
; CHECK-NEXT: ret i1 [[IC]]
84+
;
85+
%dt = shl nuw i8 %d, %s
86+
%or1 = or i8 %a, %b
87+
%or2 = or i8 %c, %dt
88+
%or = or i8 %or1, %or2
89+
%ic = icmp eq i8 %or, 0
90+
ret i1 %ic
91+
}
92+
93+
define i1 @remove_shift_chain_abcd(i8 %a, i8 %b, i8 %c, i8 %d, i8 %s) {
94+
; CHECK-LABEL: @remove_shift_chain_abcd(
95+
; CHECK-NEXT: [[OR1:%.*]] = or i8 [[AT:%.*]], [[BT:%.*]]
96+
; CHECK-NEXT: [[OR2:%.*]] = or i8 [[CT:%.*]], [[DT:%.*]]
97+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[OR1]], [[OR2]]
98+
; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
99+
; CHECK-NEXT: ret i1 [[IC]]
100+
;
101+
%at = shl nuw i8 %a, %s
102+
%bt = shl nuw i8 %b, 2
103+
%ct = shl nuw i8 %c, 1
104+
%dt = shl nuw i8 %d, %s
105+
%or1 = or i8 %at, %bt
106+
%or2 = or i8 %ct, %dt
107+
%or = or i8 %or1, %or2
108+
%ic = icmp eq i8 %or, 0
109+
ret i1 %ic
110+
}
111+
112+
define i1 @remove_shift_chain_abcd_multiuse(i8 %a, i8 %b, i8 %c, i8 %d, i8 %s) {
113+
; CHECK-LABEL: @remove_shift_chain_abcd_multiuse(
114+
; CHECK-NEXT: [[AT:%.*]] = shl nuw i8 [[A:%.*]], [[S:%.*]]
115+
; CHECK-NEXT: [[BT:%.*]] = shl nuw i8 [[B:%.*]], 2
116+
; CHECK-NEXT: [[CT:%.*]] = shl nuw i8 [[C:%.*]], 1
117+
; CHECK-NEXT: [[DT:%.*]] = shl nuw i8 [[D:%.*]], [[S]]
118+
; CHECK-NEXT: [[OR1:%.*]] = or i8 [[AT]], [[BT]]
119+
; CHECK-NEXT: [[OR2:%.*]] = or i8 [[CT]], [[DT]]
120+
; CHECK-NEXT: [[OR:%.*]] = or i8 [[OR1]], [[OR2]]
121+
; CHECK-NEXT: [[IC:%.*]] = icmp eq i8 [[OR]], 0
122+
; CHECK-NEXT: call void @use(i8 [[OR]])
123+
; CHECK-NEXT: ret i1 [[IC]]
124+
;
125+
%at = shl nuw i8 %a, %s
126+
%bt = shl nuw i8 %b, 2
127+
%ct = shl nuw i8 %c, 1
128+
%dt = shl nuw i8 %d, %s
129+
%or1 = or i8 %at, %bt
130+
%or2 = or i8 %ct, %dt
131+
%or = or i8 %or1, %or2
132+
%ic = icmp eq i8 %or, 0
133+
call void @use(i8 %or)
134+
ret i1 %ic
135+
}
136+
137+
declare void @use(i8)

0 commit comments

Comments
 (0)