Skip to content

Commit 2dda529

Browse files
authored
[AArch64] Fix Fold of Compare with Right-shifted Value (#127209)
This change folds (setcc ne (lshr x c) 0) for 64-bit types and constants c >= 32. This fold already existed for other types or smaller constants but was not applicable to 64-bit types and constants >= 32 due to a comparison of the constant c with the bit size of the setcc operation. The type of this operation is legalized to i32, which does not necessarily match the type of the lshr operation. Use the bit size of the type of the lshr operation instead for the comparison. Fixes #122380.
1 parent 15c2d1b commit 2dda529

File tree

2 files changed

+124
-2
lines changed

2 files changed

+124
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -25070,10 +25070,10 @@ static SDValue performSETCCCombine(SDNode *N,
2507025070
// setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne
2507125071
if (Cond == ISD::SETNE && isNullConstant(RHS) &&
2507225072
LHS->getOpcode() == ISD::SRL && isa<ConstantSDNode>(LHS->getOperand(1)) &&
25073-
LHS->getConstantOperandVal(1) < VT.getScalarSizeInBits() &&
2507425073
LHS->hasOneUse()) {
2507525074
EVT TstVT = LHS->getValueType(0);
25076-
if (TstVT.isScalarInteger() && TstVT.getFixedSizeInBits() <= 64) {
25075+
if (TstVT.isScalarInteger() && TstVT.getFixedSizeInBits() <= 64 &&
25076+
LHS->getConstantOperandVal(1) < TstVT.getFixedSizeInBits()) {
2507725077
// this pattern will get better opt in emitComparison
2507825078
uint64_t TstImm = -1ULL << LHS->getConstantOperandVal(1);
2507925079
SDValue TST = DAG.getNode(ISD::AND, DL, TstVT, LHS->getOperand(0),
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -mtriple=aarch64-unknown-unknown < %s -o -| FileCheck %s
3+
4+
define i1 @lsr_1_ne_0_16(i16 %x) {
5+
; CHECK-LABEL: lsr_1_ne_0_16:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: tst w0, #0xfffe
8+
; CHECK-NEXT: cset w0, ne
9+
; CHECK-NEXT: ret
10+
%shr = lshr i16 %x, 1
11+
%cmp = icmp ne i16 %shr, 0
12+
ret i1 %cmp
13+
}
14+
15+
define i1 @lsr_1_ne_0_32(i32 %x) {
16+
; CHECK-LABEL: lsr_1_ne_0_32:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: tst w0, #0xfffffffe
19+
; CHECK-NEXT: cset w0, ne
20+
; CHECK-NEXT: ret
21+
%shr = lshr i32 %x, 1
22+
%cmp = icmp ne i32 %shr, 0
23+
ret i1 %cmp
24+
}
25+
26+
define i1 @lsr_30_ne_0_32(i32 %x) {
27+
; CHECK-LABEL: lsr_30_ne_0_32:
28+
; CHECK: // %bb.0:
29+
; CHECK-NEXT: tst w0, #0xc0000000
30+
; CHECK-NEXT: cset w0, ne
31+
; CHECK-NEXT: ret
32+
%shr = lshr i32 %x, 30
33+
%cmp = icmp ne i32 %shr, 0
34+
ret i1 %cmp
35+
}
36+
37+
define i1 @lsr_31_ne_0_32(i32 %x) {
38+
; CHECK-LABEL: lsr_31_ne_0_32:
39+
; CHECK: // %bb.0:
40+
; CHECK-NEXT: lsr w0, w0, #31
41+
; CHECK-NEXT: ret
42+
%shr = lshr i32 %x, 31
43+
%cmp = icmp ne i32 %shr, 0
44+
ret i1 %cmp
45+
}
46+
47+
define i1 @lsr_1_ne_0_64(i64 %x) {
48+
; CHECK-LABEL: lsr_1_ne_0_64:
49+
; CHECK: // %bb.0:
50+
; CHECK-NEXT: tst x0, #0xfffffffffffffffe
51+
; CHECK-NEXT: cset w0, ne
52+
; CHECK-NEXT: ret
53+
%shr = lshr i64 %x, 1
54+
%cmp = icmp ne i64 %shr, 0
55+
ret i1 %cmp
56+
}
57+
58+
define i1 @lsr_31_ne_0_64(i64 %x) {
59+
; CHECK-LABEL: lsr_31_ne_0_64:
60+
; CHECK: // %bb.0:
61+
; CHECK-NEXT: tst x0, #0xffffffff80000000
62+
; CHECK-NEXT: cset w0, ne
63+
; CHECK-NEXT: ret
64+
%shr = lshr i64 %x, 31
65+
%cmp = icmp ne i64 %shr, 0
66+
ret i1 %cmp
67+
}
68+
69+
define i1 @lsr_32_ne_0_64(i64 %x) {
70+
; CHECK-LABEL: lsr_32_ne_0_64:
71+
; CHECK: // %bb.0:
72+
; CHECK-NEXT: tst x0, #0xffffffff00000000
73+
; CHECK-NEXT: cset w0, ne
74+
; CHECK-NEXT: ret
75+
%shr = lshr i64 %x, 32
76+
%cmp = icmp ne i64 %shr, 0
77+
ret i1 %cmp
78+
}
79+
80+
define i1 @lsr_33_ne_0_64(i64 %x) {
81+
; CHECK-LABEL: lsr_33_ne_0_64:
82+
; CHECK: // %bb.0:
83+
; CHECK-NEXT: tst x0, #0xfffffffe00000000
84+
; CHECK-NEXT: cset w0, ne
85+
; CHECK-NEXT: ret
86+
%shr = lshr i64 %x, 33
87+
%cmp = icmp ne i64 %shr, 0
88+
ret i1 %cmp
89+
}
90+
91+
define i1 @lsr_62_ne_0_64(i64 %x) {
92+
; CHECK-LABEL: lsr_62_ne_0_64:
93+
; CHECK: // %bb.0:
94+
; CHECK-NEXT: tst x0, #0xc000000000000000
95+
; CHECK-NEXT: cset w0, ne
96+
; CHECK-NEXT: ret
97+
%shr = lshr i64 %x, 62
98+
%cmp = icmp ne i64 %shr, 0
99+
ret i1 %cmp
100+
}
101+
102+
define i1 @lsr_63_ne_0_64(i64 %x) {
103+
; CHECK-LABEL: lsr_63_ne_0_64:
104+
; CHECK: // %bb.0:
105+
; CHECK-NEXT: lsr x0, x0, #63
106+
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
107+
; CHECK-NEXT: ret
108+
%shr = lshr i64 %x, 63
109+
%cmp = icmp ne i64 %shr, 0
110+
ret i1 %cmp
111+
}
112+
113+
define <4 x i1> @lsr_1_ne_0_v4i16(<4 x i16> %x) {
114+
; CHECK-LABEL: lsr_1_ne_0_v4i16:
115+
; CHECK: // %bb.0:
116+
; CHECK-NEXT: ushr v0.4h, v0.4h, #1
117+
; CHECK-NEXT: cmtst v0.4h, v0.4h, v0.4h
118+
; CHECK-NEXT: ret
119+
%shr = lshr <4 x i16> %x, <i16 1, i16 1, i16 1, i16 1>
120+
%cmp = icmp ne <4 x i16> %shr, <i16 0, i16 0, i16 0, i16 0>
121+
ret <4 x i1> %cmp
122+
}

0 commit comments

Comments
 (0)