Skip to content

Commit f699e12

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:2feffecb8853 into amd-gfx:ba1f05bbae53
Local branch amd-gfx ba1f05b Merged main:1d0f40ba05b7 into amd-gfx:edac555a5f87 Remote branch main 2feffec [ConstantRange] Estimate tighter lower (upper) bounds for masked binary and (or) (llvm#120352)
2 parents ba1f05b + 2feffec commit f699e12

File tree

5 files changed

+199
-16
lines changed

5 files changed

+199
-16
lines changed

clang/test/CodeGen/AArch64/fpm-helpers.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ extern "C" {
3535
//
3636
fpm_t test_init() { return __arm_fpm_init(); }
3737

38-
// CHECK-LABEL: define dso_local noundef i64 @test_src1_1(
38+
// CHECK-LABEL: define dso_local noundef range(i64 0, -6) i64 @test_src1_1(
3939
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
4040
// CHECK-NEXT: [[ENTRY:.*:]]
4141
// CHECK-NEXT: ret i64 -8
@@ -44,7 +44,7 @@ fpm_t test_src1_1() {
4444
return __arm_set_fpm_src1_format(INIT_ONES, __ARM_FPM_E5M2);
4545
}
4646

47-
// CHECK-LABEL: define dso_local noundef i64 @test_src1_2(
47+
// CHECK-LABEL: define dso_local noundef range(i64 0, -6) i64 @test_src1_2(
4848
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
4949
// CHECK-NEXT: [[ENTRY:.*:]]
5050
// CHECK-NEXT: ret i64 1
@@ -53,7 +53,7 @@ fpm_t test_src1_2() {
5353
return __arm_set_fpm_src1_format(INIT_ZERO, __ARM_FPM_E4M3);
5454
}
5555

56-
// CHECK-LABEL: define dso_local noundef i64 @test_src2_1(
56+
// CHECK-LABEL: define dso_local noundef range(i64 0, -48) i64 @test_src2_1(
5757
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
5858
// CHECK-NEXT: [[ENTRY:.*:]]
5959
// CHECK-NEXT: ret i64 -57
@@ -62,7 +62,7 @@ fpm_t test_src2_1() {
6262
return __arm_set_fpm_src2_format(INIT_ONES, __ARM_FPM_E5M2);
6363
}
6464

65-
// CHECK-LABEL: define dso_local noundef i64 @test_src2_2(
65+
// CHECK-LABEL: define dso_local noundef range(i64 0, -48) i64 @test_src2_2(
6666
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
6767
// CHECK-NEXT: [[ENTRY:.*:]]
6868
// CHECK-NEXT: ret i64 8
@@ -71,7 +71,7 @@ fpm_t test_src2_2() {
7171
return __arm_set_fpm_src2_format(INIT_ZERO, __ARM_FPM_E4M3);
7272
}
7373

74-
// CHECK-LABEL: define dso_local noundef i64 @test_dst1_1(
74+
// CHECK-LABEL: define dso_local noundef range(i64 0, -384) i64 @test_dst1_1(
7575
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
7676
// CHECK-NEXT: [[ENTRY:.*:]]
7777
// CHECK-NEXT: ret i64 -449
@@ -80,7 +80,7 @@ fpm_t test_dst1_1() {
8080
return __arm_set_fpm_dst_format(INIT_ONES, __ARM_FPM_E5M2);
8181
}
8282

83-
// CHECK-LABEL: define dso_local noundef i64 @test_dst2_2(
83+
// CHECK-LABEL: define dso_local noundef range(i64 0, -384) i64 @test_dst2_2(
8484
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
8585
// CHECK-NEXT: [[ENTRY:.*:]]
8686
// CHECK-NEXT: ret i64 64
@@ -139,21 +139,21 @@ fpm_t test_lscale() { return __arm_set_fpm_lscale(INIT_ZERO, 127); }
139139
//
140140
fpm_t test_lscale2() { return __arm_set_fpm_lscale2(INIT_ZERO, 63); }
141141

142-
// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_1(
142+
// CHECK-LABEL: define dso_local noundef range(i64 0, 4278190081) i64 @test_nscale_1(
143143
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
144144
// CHECK-NEXT: [[ENTRY:.*:]]
145145
// CHECK-NEXT: ret i64 2147483648
146146
//
147147
fpm_t test_nscale_1() { return __arm_set_fpm_nscale(INIT_ZERO, -128); }
148148

149-
// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_2(
149+
// CHECK-LABEL: define dso_local noundef range(i64 0, 4278190081) i64 @test_nscale_2(
150150
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
151151
// CHECK-NEXT: [[ENTRY:.*:]]
152152
// CHECK-NEXT: ret i64 2130706432
153153
//
154154
fpm_t test_nscale_2() { return __arm_set_fpm_nscale(INIT_ZERO, 127); }
155155

156-
// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_3(
156+
// CHECK-LABEL: define dso_local noundef range(i64 0, 4278190081) i64 @test_nscale_3(
157157
// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
158158
// CHECK-NEXT: [[ENTRY:.*:]]
159159
// CHECK-NEXT: ret i64 4278190080

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 522562
19+
#define LLVM_MAIN_REVISION 522563
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/IR/ConstantRange.cpp

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1520,15 +1520,72 @@ ConstantRange ConstantRange::binaryNot() const {
15201520
return ConstantRange(APInt::getAllOnes(getBitWidth())).sub(*this);
15211521
}
15221522

1523+
/// Estimate the 'bit-masked AND' operation's lower bound.
1524+
///
1525+
/// E.g., given two ranges as follows (single quotes are separators and
1526+
/// have no meaning here),
1527+
///
1528+
/// LHS = [10'00101'1, ; LLo
1529+
/// 10'10000'0] ; LHi
1530+
/// RHS = [10'11111'0, ; RLo
1531+
/// 10'11111'1] ; RHi
1532+
///
1533+
/// we know that the higher 2 bits of the result is always 10; and we also
1534+
/// notice that RHS[1:6] are always 1, so the result[1:6] cannot be less than
1535+
/// LHS[1:6] (i.e., 00101). Thus, the lower bound is 10'00101'0.
1536+
///
1537+
/// The algorithm is as follows,
1538+
/// 1. we first calculate a mask to find the higher common bits by
1539+
/// Mask = ~((LLo ^ LHi) | (RLo ^ RHi) | (LLo ^ RLo));
1540+
/// Mask = clear all non-leading-ones bits in Mask;
1541+
/// in the example, the Mask is set to 11'00000'0;
1542+
/// 2. calculate a new mask by setting all common leading bits to 1 in RHS, and
1543+
/// keeping the longest leading ones (i.e., 11'11111'0 in the example);
1544+
/// 3. return (LLo & new mask) as the lower bound;
1545+
/// 4. repeat the step 2 and 3 with LHS and RHS swapped, and update the lower
1546+
/// bound with the larger one.
1547+
static APInt estimateBitMaskedAndLowerBound(const ConstantRange &LHS,
1548+
const ConstantRange &RHS) {
1549+
auto BitWidth = LHS.getBitWidth();
1550+
// If either is full set or unsigned wrapped, then the range must contain '0'
1551+
// which leads the lower bound to 0.
1552+
if ((LHS.isFullSet() || RHS.isFullSet()) ||
1553+
(LHS.isWrappedSet() || RHS.isWrappedSet()))
1554+
return APInt::getZero(BitWidth);
1555+
1556+
auto LLo = LHS.getLower();
1557+
auto LHi = LHS.getUpper() - 1;
1558+
auto RLo = RHS.getLower();
1559+
auto RHi = RHS.getUpper() - 1;
1560+
1561+
// Calculate the mask for the higher common bits.
1562+
auto Mask = ~((LLo ^ LHi) | (RLo ^ RHi) | (LLo ^ RLo));
1563+
unsigned LeadingOnes = Mask.countLeadingOnes();
1564+
Mask.clearLowBits(BitWidth - LeadingOnes);
1565+
1566+
auto estimateBound = [BitWidth, &Mask](APInt ALo, const APInt &BLo,
1567+
const APInt &BHi) {
1568+
unsigned LeadingOnes = ((BLo & BHi) | Mask).countLeadingOnes();
1569+
unsigned StartBit = BitWidth - LeadingOnes;
1570+
ALo.clearLowBits(StartBit);
1571+
return ALo;
1572+
};
1573+
1574+
auto LowerBoundByLHS = estimateBound(LLo, RLo, RHi);
1575+
auto LowerBoundByRHS = estimateBound(RLo, LLo, LHi);
1576+
1577+
return APIntOps::umax(LowerBoundByLHS, LowerBoundByRHS);
1578+
}
1579+
15231580
ConstantRange ConstantRange::binaryAnd(const ConstantRange &Other) const {
15241581
if (isEmptySet() || Other.isEmptySet())
15251582
return getEmpty();
15261583

15271584
ConstantRange KnownBitsRange =
15281585
fromKnownBits(toKnownBits() & Other.toKnownBits(), false);
1529-
ConstantRange UMinUMaxRange =
1530-
getNonEmpty(APInt::getZero(getBitWidth()),
1531-
APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()) + 1);
1586+
auto LowerBound = estimateBitMaskedAndLowerBound(*this, Other);
1587+
ConstantRange UMinUMaxRange = getNonEmpty(
1588+
LowerBound, APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()) + 1);
15321589
return KnownBitsRange.intersectWith(UMinUMaxRange);
15331590
}
15341591

@@ -1538,10 +1595,17 @@ ConstantRange ConstantRange::binaryOr(const ConstantRange &Other) const {
15381595

15391596
ConstantRange KnownBitsRange =
15401597
fromKnownBits(toKnownBits() | Other.toKnownBits(), false);
1598+
1599+
// ~a & ~b >= x
1600+
// <=> ~(~a & ~b) <= ~x
1601+
// <=> a | b <= ~x
1602+
// <=> a | b < ~x + 1 = -x
1603+
// thus, UpperBound(a | b) == -LowerBound(~a & ~b)
1604+
auto UpperBound =
1605+
-estimateBitMaskedAndLowerBound(binaryNot(), Other.binaryNot());
15411606
// Upper wrapped range.
1542-
ConstantRange UMaxUMinRange =
1543-
getNonEmpty(APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()),
1544-
APInt::getZero(getBitWidth()));
1607+
ConstantRange UMaxUMinRange = getNonEmpty(
1608+
APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()), UpperBound);
15451609
return KnownBitsRange.intersectWith(UMaxUMinRange);
15461610
}
15471611

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -passes=ipsccp %s | FileCheck %s
3+
4+
declare void @use(i1)
5+
6+
define i1 @test1(i64 %x) {
7+
; CHECK-LABEL: @test1(
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: [[COND:%.*]] = icmp ugt i64 [[X:%.*]], 65535
10+
; CHECK-NEXT: call void @llvm.assume(i1 [[COND]])
11+
; CHECK-NEXT: [[MASK:%.*]] = and i64 [[X]], -65521
12+
; CHECK-NEXT: ret i1 false
13+
;
14+
entry:
15+
%cond = icmp ugt i64 %x, 65535
16+
call void @llvm.assume(i1 %cond)
17+
%mask = and i64 %x, -65521
18+
%cmp = icmp eq i64 %mask, 0
19+
ret i1 %cmp
20+
}
21+
22+
define void @test.and(i64 %x, i64 %y) {
23+
; CHECK-LABEL: @test.and(
24+
; CHECK-NEXT: entry:
25+
; CHECK-NEXT: [[C0:%.*]] = icmp uge i64 [[X:%.*]], 138
26+
; CHECK-NEXT: [[C1:%.*]] = icmp ule i64 [[X]], 161
27+
; CHECK-NEXT: call void @llvm.assume(i1 [[C0]])
28+
; CHECK-NEXT: call void @llvm.assume(i1 [[C1]])
29+
; CHECK-NEXT: [[C2:%.*]] = icmp uge i64 [[Y:%.*]], 186
30+
; CHECK-NEXT: [[C3:%.*]] = icmp ule i64 [[Y]], 188
31+
; CHECK-NEXT: call void @llvm.assume(i1 [[C2]])
32+
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
33+
; CHECK-NEXT: [[AND:%.*]] = and i64 [[X]], [[Y]]
34+
; CHECK-NEXT: call void @use(i1 false)
35+
; CHECK-NEXT: [[R1:%.*]] = icmp ult i64 [[AND]], 137
36+
; CHECK-NEXT: call void @use(i1 [[R1]])
37+
; CHECK-NEXT: ret void
38+
;
39+
entry:
40+
%c0 = icmp uge i64 %x, 138 ; 0b10001010
41+
%c1 = icmp ule i64 %x, 161 ; 0b10100000
42+
call void @llvm.assume(i1 %c0)
43+
call void @llvm.assume(i1 %c1)
44+
%c2 = icmp uge i64 %y, 186 ; 0b10111010
45+
%c3 = icmp ule i64 %y, 188 ; 0b10111110
46+
call void @llvm.assume(i1 %c2)
47+
call void @llvm.assume(i1 %c3)
48+
%and = and i64 %x, %y
49+
%r0 = icmp ult i64 %and, 136 ; 0b10001000
50+
call void @use(i1 %r0) ; false
51+
%r1 = icmp ult i64 %and, 137
52+
call void @use(i1 %r1) ; unknown
53+
ret void
54+
}
55+
56+
define void @test.or(i64 %x, i64 %y) {
57+
; CHECK-LABEL: @test.or(
58+
; CHECK-NEXT: entry:
59+
; CHECK-NEXT: [[C0:%.*]] = icmp ule i64 [[X:%.*]], 117
60+
; CHECK-NEXT: [[C1:%.*]] = icmp uge i64 [[X]], 95
61+
; CHECK-NEXT: call void @llvm.assume(i1 [[C0]])
62+
; CHECK-NEXT: call void @llvm.assume(i1 [[C1]])
63+
; CHECK-NEXT: [[C2:%.*]] = icmp ule i64 [[Y:%.*]], 69
64+
; CHECK-NEXT: [[C3:%.*]] = icmp uge i64 [[Y]], 67
65+
; CHECK-NEXT: call void @llvm.assume(i1 [[C2]])
66+
; CHECK-NEXT: call void @llvm.assume(i1 [[C3]])
67+
; CHECK-NEXT: [[OR:%.*]] = or i64 [[X]], [[Y]]
68+
; CHECK-NEXT: call void @use(i1 false)
69+
; CHECK-NEXT: [[R1:%.*]] = icmp ugt i64 [[OR]], 118
70+
; CHECK-NEXT: call void @use(i1 [[R1]])
71+
; CHECK-NEXT: ret void
72+
;
73+
entry:
74+
%c0 = icmp ule i64 %x, 117 ; 0b01110101
75+
%c1 = icmp uge i64 %x, 95 ; 0b01011111
76+
call void @llvm.assume(i1 %c0)
77+
call void @llvm.assume(i1 %c1)
78+
%c2 = icmp ule i64 %y, 69 ; 0b01000101
79+
%c3 = icmp uge i64 %y, 67 ; 0b01000011
80+
call void @llvm.assume(i1 %c2)
81+
call void @llvm.assume(i1 %c3)
82+
%or = or i64 %x, %y
83+
%r0 = icmp ugt i64 %or, 119 ; 0b01110111
84+
call void @use(i1 %r0) ; false
85+
%r1 = icmp ugt i64 %or, 118
86+
call void @use(i1 %r1) ; unknown
87+
ret void
88+
}

llvm/unittests/IR/ConstantRangeTest.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2720,6 +2720,37 @@ TEST_F(ConstantRangeTest, binaryAnd) {
27202720
EXPECT_EQ(R16_32.binaryAnd(R0_99), R0_32);
27212721
EXPECT_EQ(R0_99.binaryAnd(R16_32), R0_32);
27222722

2723+
// 'And' with leading bits are masked (with common leading bits stripped)
2724+
ConstantRange RMaskedL(APInt(8, 0b10'00101'1), APInt(8, 0b10'10000'0 + 1));
2725+
ConstantRange RMaskedR(APInt(8, 0b10'11111'0), APInt(8, 0b10'11111'1 + 1));
2726+
EXPECT_EQ(RMaskedL.binaryAnd(RMaskedR).getLower(), APInt(8, 0b10'00101'0));
2727+
EXPECT_EQ(RMaskedR.binaryAnd(RMaskedL).getLower(), APInt(8, 0b10'00101'0));
2728+
2729+
ConstantRange RMaskedL1(APInt(8, 0b00'011'010), APInt(8, 0b00'100'100 + 1));
2730+
ConstantRange RMaskedR1(APInt(8, 0b00'111'010), APInt(8, 0b00'111'110 + 1));
2731+
EXPECT_EQ(RMaskedL1.binaryAnd(RMaskedR1).getLower(), APInt(8, 0b00'011'000));
2732+
EXPECT_EQ(RMaskedR1.binaryAnd(RMaskedL1).getLower(), APInt(8, 0b00'011'000));
2733+
2734+
ConstantRange RMaskedL2(APInt(8, 0b0000'0111u), APInt(8, 0b0000'1101u + 1u));
2735+
ConstantRange RMaskedR2(APInt(8, 0xff), APInt(8, 0));
2736+
EXPECT_EQ(RMaskedL2.binaryAnd(RMaskedR2), RMaskedL2);
2737+
EXPECT_EQ(RMaskedR2.binaryAnd(RMaskedL2), RMaskedL2);
2738+
2739+
ConstantRange RMaskedL3(APInt(4, 0b0011u), APInt(4, 0));
2740+
ConstantRange RMaskedR3(APInt(4, 0b1011u), APInt(4, 0));
2741+
APInt Zero_4(4, 0);
2742+
EXPECT_EQ(RMaskedL3.binaryAnd(RMaskedR3).getLower().uge(Zero_4), true);
2743+
EXPECT_EQ(RMaskedR3.binaryAnd(RMaskedL3).getLower().uge(Zero_4), true);
2744+
2745+
// wrapped set
2746+
APInt NegSeven(4, 9); // Also -7
2747+
ConstantRange RMaskedL4(NegSeven, APInt(4, 1));
2748+
ConstantRange RMaskedR4(NegSeven, APInt(4, 0));
2749+
EXPECT_EQ(RMaskedL4.binaryAnd(RMaskedR4).contains(Zero_4), true);
2750+
EXPECT_EQ(RMaskedR4.binaryAnd(RMaskedL4).contains(Zero_4), true);
2751+
EXPECT_EQ(RMaskedL4.binaryAnd(RMaskedR4).contains(NegSeven), true);
2752+
EXPECT_EQ(RMaskedR4.binaryAnd(RMaskedL4).contains(NegSeven), true);
2753+
27232754
TestBinaryOpExhaustive(
27242755
[](const ConstantRange &CR1, const ConstantRange &CR2) {
27252756
return CR1.binaryAnd(CR2);

0 commit comments

Comments
 (0)