Skip to content

Commit d666616

Browse files
authored
[AArch64] Fold swapped sub/SUBS conditions (#121412)
This fold already exists in a couple places (DAG and CGP), where an icmps operands are swapped to allow CSE with a sub. They do not handle constants though. This patch adds an AArch64 version that can be more precise.
1 parent 82be3ad commit d666616

File tree

4 files changed

+76
-36
lines changed

4 files changed

+76
-36
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25026,6 +25026,30 @@ static SDValue performCSELCombine(SDNode *N,
2502625026
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
2502725027
return Folded;
2502825028

25029+
// CSEL a, b, cc, SUBS(x, y) -> CSEL a, b, swapped(cc), SUBS(y, x)
25030+
// if SUB(y, x) already exists and we can produce a swapped predicate for cc.
25031+
SDValue Cond = N->getOperand(3);
25032+
if (DCI.isAfterLegalizeDAG() && Cond.getOpcode() == AArch64ISD::SUBS &&
25033+
Cond.hasOneUse() && Cond->hasNUsesOfValue(0, 0) &&
25034+
DAG.doesNodeExist(ISD::SUB, N->getVTList(),
25035+
{Cond.getOperand(1), Cond.getOperand(0)}) &&
25036+
!DAG.doesNodeExist(ISD::SUB, N->getVTList(),
25037+
{Cond.getOperand(0), Cond.getOperand(1)}) &&
25038+
!isNullConstant(Cond.getOperand(1))) {
25039+
AArch64CC::CondCode OldCond =
25040+
static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
25041+
AArch64CC::CondCode NewCond = getSwappedCondition(OldCond);
25042+
if (NewCond != AArch64CC::AL) {
25043+
SDLoc DL(N);
25044+
SDValue Sub = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
25045+
Cond.getOperand(1), Cond.getOperand(0));
25046+
return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0),
25047+
N->getOperand(1),
25048+
DAG.getConstant(NewCond, DL, MVT::i32),
25049+
Sub.getValue(1));
25050+
}
25051+
}
25052+
2502925053
return performCONDCombine(N, DCI, DAG, 2, 3);
2503025054
}
2503125055

llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,36 @@ inline static CondCode getInvertedCondCode(CondCode Code) {
306306
return static_cast<CondCode>(static_cast<unsigned>(Code) ^ 0x1);
307307
}
308308

309+
/// getSwappedCondition - assume the flags are set by MI(a,b), return
310+
/// the condition code if we modify the instructions such that flags are
311+
/// set by MI(b,a).
312+
inline static CondCode getSwappedCondition(CondCode CC) {
313+
switch (CC) {
314+
default:
315+
return AL;
316+
case EQ:
317+
return EQ;
318+
case NE:
319+
return NE;
320+
case HS:
321+
return LS;
322+
case LO:
323+
return HI;
324+
case HI:
325+
return LO;
326+
case LS:
327+
return HS;
328+
case GE:
329+
return LE;
330+
case LT:
331+
return GT;
332+
case GT:
333+
return LT;
334+
case LE:
335+
return GE;
336+
}
337+
}
338+
309339
/// Given a condition code, return NZCV flags that would satisfy that condition.
310340
/// The flag bits are in the format expected by the ccmp instructions.
311341
/// Note that many different flag settings can satisfy a given condition code,

llvm/test/CodeGen/AArch64/adds_cmn.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,8 @@ entry:
6262
define { i32, i32 } @subs_cmp_c(i32 noundef %x, i32 noundef %y) {
6363
; CHECK-LABEL: subs_cmp_c:
6464
; CHECK: // %bb.0: // %entry
65-
; CHECK-NEXT: cmp w0, w1
66-
; CHECK-NEXT: sub w1, w1, w0
67-
; CHECK-NEXT: cset w8, hs
68-
; CHECK-NEXT: mov w0, w8
65+
; CHECK-NEXT: subs w1, w1, w0
66+
; CHECK-NEXT: cset w0, ls
6967
; CHECK-NEXT: ret
7068
entry:
7169
%0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y)

llvm/test/CodeGen/AArch64/csel-subs-swapped.ll

Lines changed: 20 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ define i32 @eq_i32(i32 %x) {
55
; CHECK-LABEL: eq_i32:
66
; CHECK: // %bb.0:
77
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
8-
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
9-
; CHECK-NEXT: sub w8, w8, w0
8+
; CHECK-NEXT: subs w8, w8, w0
109
; CHECK-NEXT: csel w0, w0, w8, eq
1110
; CHECK-NEXT: ret
1211
%cmp = icmp eq i32 %x, -2097152
@@ -19,8 +18,7 @@ define i32 @ne_i32(i32 %x) {
1918
; CHECK-LABEL: ne_i32:
2019
; CHECK: // %bb.0:
2120
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
22-
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
23-
; CHECK-NEXT: sub w8, w8, w0
21+
; CHECK-NEXT: subs w8, w8, w0
2422
; CHECK-NEXT: csel w0, w0, w8, ne
2523
; CHECK-NEXT: ret
2624
%cmp = icmp ne i32 %x, -2097152
@@ -33,9 +31,8 @@ define i32 @sgt_i32(i32 %x) {
3331
; CHECK-LABEL: sgt_i32:
3432
; CHECK: // %bb.0:
3533
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
36-
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
37-
; CHECK-NEXT: sub w8, w8, w0
38-
; CHECK-NEXT: csel w0, w0, w8, gt
34+
; CHECK-NEXT: subs w8, w8, w0
35+
; CHECK-NEXT: csel w0, w0, w8, lt
3936
; CHECK-NEXT: ret
4037
%cmp = icmp sgt i32 %x, -2097152
4138
%sub = sub i32 -2097152, %x
@@ -62,9 +59,8 @@ define i32 @slt_i32(i32 %x) {
6259
; CHECK-LABEL: slt_i32:
6360
; CHECK: // %bb.0:
6461
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
65-
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
66-
; CHECK-NEXT: sub w8, w8, w0
67-
; CHECK-NEXT: csel w0, w0, w8, lt
62+
; CHECK-NEXT: subs w8, w8, w0
63+
; CHECK-NEXT: csel w0, w0, w8, gt
6864
; CHECK-NEXT: ret
6965
%cmp = icmp slt i32 %x, -2097152
7066
%sub = sub i32 -2097152, %x
@@ -91,9 +87,8 @@ define i32 @ugt_i32(i32 %x) {
9187
; CHECK-LABEL: ugt_i32:
9288
; CHECK: // %bb.0:
9389
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
94-
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
95-
; CHECK-NEXT: sub w8, w8, w0
96-
; CHECK-NEXT: csel w0, w0, w8, hi
90+
; CHECK-NEXT: subs w8, w8, w0
91+
; CHECK-NEXT: csel w0, w0, w8, lo
9792
; CHECK-NEXT: ret
9893
%cmp = icmp ugt i32 %x, -2097152
9994
%sub = sub i32 -2097152, %x
@@ -120,9 +115,8 @@ define i32 @ult_i32(i32 %x) {
120115
; CHECK-LABEL: ult_i32:
121116
; CHECK: // %bb.0:
122117
; CHECK-NEXT: mov w8, #-2097152 // =0xffe00000
123-
; CHECK-NEXT: cmn w0, #512, lsl #12 // =2097152
124-
; CHECK-NEXT: sub w8, w8, w0
125-
; CHECK-NEXT: csel w0, w0, w8, lo
118+
; CHECK-NEXT: subs w8, w8, w0
119+
; CHECK-NEXT: csel w0, w0, w8, hi
126120
; CHECK-NEXT: ret
127121
%cmp = icmp ult i32 %x, -2097152
128122
%sub = sub i32 -2097152, %x
@@ -150,8 +144,7 @@ define i64 @eq_i64(i64 %x) {
150144
; CHECK-LABEL: eq_i64:
151145
; CHECK: // %bb.0:
152146
; CHECK-NEXT: mov w8, #100 // =0x64
153-
; CHECK-NEXT: cmp x0, #100
154-
; CHECK-NEXT: sub x8, x8, x0
147+
; CHECK-NEXT: subs x8, x8, x0
155148
; CHECK-NEXT: csel x0, x0, x8, eq
156149
; CHECK-NEXT: ret
157150
%cmp = icmp eq i64 %x, 100
@@ -164,8 +157,7 @@ define i64 @ne_i64(i64 %x) {
164157
; CHECK-LABEL: ne_i64:
165158
; CHECK: // %bb.0:
166159
; CHECK-NEXT: mov w8, #100 // =0x64
167-
; CHECK-NEXT: cmp x0, #100
168-
; CHECK-NEXT: sub x8, x8, x0
160+
; CHECK-NEXT: subs x8, x8, x0
169161
; CHECK-NEXT: csel x0, x0, x8, ne
170162
; CHECK-NEXT: ret
171163
%cmp = icmp ne i64 %x, 100
@@ -178,9 +170,8 @@ define i64 @sgt_i64(i64 %x) {
178170
; CHECK-LABEL: sgt_i64:
179171
; CHECK: // %bb.0:
180172
; CHECK-NEXT: mov w8, #100 // =0x64
181-
; CHECK-NEXT: cmp x0, #100
182-
; CHECK-NEXT: sub x8, x8, x0
183-
; CHECK-NEXT: csel x0, x0, x8, gt
173+
; CHECK-NEXT: subs x8, x8, x0
174+
; CHECK-NEXT: csel x0, x0, x8, lt
184175
; CHECK-NEXT: ret
185176
%cmp = icmp sgt i64 %x, 100
186177
%sub = sub i64 100, %x
@@ -206,9 +197,8 @@ define i64 @slt_i64(i64 %x) {
206197
; CHECK-LABEL: slt_i64:
207198
; CHECK: // %bb.0:
208199
; CHECK-NEXT: mov w8, #100 // =0x64
209-
; CHECK-NEXT: cmp x0, #100
210-
; CHECK-NEXT: sub x8, x8, x0
211-
; CHECK-NEXT: csel x0, x0, x8, lt
200+
; CHECK-NEXT: subs x8, x8, x0
201+
; CHECK-NEXT: csel x0, x0, x8, gt
212202
; CHECK-NEXT: ret
213203
%cmp = icmp slt i64 %x, 100
214204
%sub = sub i64 100, %x
@@ -234,9 +224,8 @@ define i64 @ugt_i64(i64 %x) {
234224
; CHECK-LABEL: ugt_i64:
235225
; CHECK: // %bb.0:
236226
; CHECK-NEXT: mov w8, #100 // =0x64
237-
; CHECK-NEXT: cmp x0, #100
238-
; CHECK-NEXT: sub x8, x8, x0
239-
; CHECK-NEXT: csel x0, x0, x8, hi
227+
; CHECK-NEXT: subs x8, x8, x0
228+
; CHECK-NEXT: csel x0, x0, x8, lo
240229
; CHECK-NEXT: ret
241230
%cmp = icmp ugt i64 %x, 100
242231
%sub = sub i64 100, %x
@@ -262,9 +251,8 @@ define i64 @ult_i64(i64 %x) {
262251
; CHECK-LABEL: ult_i64:
263252
; CHECK: // %bb.0:
264253
; CHECK-NEXT: mov w8, #100 // =0x64
265-
; CHECK-NEXT: cmp x0, #100
266-
; CHECK-NEXT: sub x8, x8, x0
267-
; CHECK-NEXT: csel x0, x0, x8, lo
254+
; CHECK-NEXT: subs x8, x8, x0
255+
; CHECK-NEXT: csel x0, x0, x8, hi
268256
; CHECK-NEXT: ret
269257
%cmp = icmp ult i64 %x, 100
270258
%sub = sub i64 100, %x

0 commit comments

Comments
 (0)