Skip to content

Commit 96af114

Browse files
authored
[InstCombine] Preserve the nsw/nuw flags for (X | Op01C) + Op1C --> X + (Op01C + Op1C) (#94586)
This patch simplifies `sdiv` to `udiv` by preserving the `nsw` flag for `(X | Op01C) + Op1C --> X + (Op01C + Op1C)` if the sum of `Op01C` and `Op1C` will not overflow, and preserves the `nuw` flag unconditionally. Alive2 Proofs (provided by @nikic): https://alive2.llvm.org/ce/z/nrdCZT, https://alive2.llvm.org/ce/z/YnJHnH
1 parent dc3f8c2 commit 96af114

File tree

4 files changed

+58
-12
lines changed

4 files changed

+58
-12
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -905,8 +905,14 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
905905

906906
// (X | Op01C) + Op1C --> X + (Op01C + Op1C) iff the `or` is actually an `add`
907907
Constant *Op01C;
908-
if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C))))
909-
return BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C));
908+
if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) {
909+
BinaryOperator *NewAdd =
910+
BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C));
911+
NewAdd->setHasNoSignedWrap(Add.hasNoSignedWrap() &&
912+
willNotOverflowSignedAdd(Op01C, Op1C, Add));
913+
NewAdd->setHasNoUnsignedWrap(Add.hasNoUnsignedWrap());
914+
return NewAdd;
915+
}
910916

911917
// (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C)
912918
const APInt *C2;

llvm/test/Transforms/InstCombine/add.ll

+40
Original file line numberDiff line numberDiff line change
@@ -1510,6 +1510,46 @@ define i8 @add_like_or_t2_extrause(i8 %x) {
15101510
%r = add i8 %i1, 42
15111511
ret i8 %r
15121512
}
1513+
define i8 @fold_add_constant_preserve_nsw(i8 %x) {
1514+
; CHECK-LABEL: @fold_add_constant_preserve_nsw(
1515+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i8 [[X:%.*]], -120
1516+
; CHECK-NEXT: ret i8 [[ADD]]
1517+
;
1518+
%or = or disjoint i8 %x, -128
1519+
%add = add nsw i8 %or, 8
1520+
ret i8 %add
1521+
}
1522+
define i8 @fold_add_constant_no_nsw(i8 %x) {
1523+
; CHECK-LABEL: @fold_add_constant_no_nsw(
1524+
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], 120
1525+
; CHECK-NEXT: ret i8 [[ADD]]
1526+
;
1527+
%or = or disjoint i8 %x, -128
1528+
%add = add nsw i8 %or, -8
1529+
ret i8 %add
1530+
}
1531+
define i8 @fold_add_constant_preserve_nuw(i8 %x) {
1532+
; CHECK-LABEL: @fold_add_constant_preserve_nuw(
1533+
; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[X:%.*]], -116
1534+
; CHECK-NEXT: ret i8 [[ADD]]
1535+
;
1536+
%or = or disjoint i8 %x, 128
1537+
%add = add nuw i8 %or, 12
1538+
ret i8 %add
1539+
}
1540+
define i32 @sdiv_to_udiv(i32 %arg0, i32 %arg1) {
1541+
; CHECK-LABEL: @sdiv_to_udiv(
1542+
; CHECK-NEXT: [[T0:%.*]] = shl nuw nsw i32 [[ARG0:%.*]], 8
1543+
; CHECK-NEXT: [[T2:%.*]] = add nuw nsw i32 [[T0]], 6242049
1544+
; CHECK-NEXT: [[T3:%.*]] = udiv i32 [[T2]], 192
1545+
; CHECK-NEXT: ret i32 [[T3]]
1546+
;
1547+
%t0 = shl nuw nsw i32 %arg0, 8
1548+
%t1 = or disjoint i32 %t0, 1
1549+
%t2 = add nuw nsw i32 %t1, 6242048
1550+
%t3 = sdiv i32 %t2, 192
1551+
ret i32 %t3
1552+
}
15131553

15141554
define i8 @add_like_or_disjoint(i8 %x) {
15151555
; CHECK-LABEL: @add_like_or_disjoint(

llvm/test/Transforms/InstCombine/sadd-with-overflow.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ define { i32, i1 } @fold_sub_simple(i32 %x) {
125125

126126
define { i32, i1 } @fold_with_distjoin_or(i32 %x) {
127127
; CHECK-LABEL: @fold_with_distjoin_or(
128-
; CHECK-NEXT: [[B:%.*]] = add i32 [[X:%.*]], 6
128+
; CHECK-NEXT: [[B:%.*]] = add nsw i32 [[X:%.*]], 6
129129
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[B]], 0
130130
; CHECK-NEXT: ret { i32, i1 } [[TMP1]]
131131
;

llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll

+9-9
Original file line numberDiff line numberDiff line change
@@ -182,11 +182,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
182182
; CHECK: vector.body.1:
183183
; CHECK-NEXT: [[INDEX_1:%.*]] = phi i64 [ 0, [[VECTOR_PH_1]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_1]] ]
184184
; CHECK-NEXT: [[TMP33:%.*]] = add nuw nsw i64 [[INDEX_1]], 15
185-
; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[INDEX_1]], 16
185+
; CHECK-NEXT: [[TMP34:%.*]] = add nuw nsw i64 [[INDEX_1]], 16
186186
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x i64> poison, i64 [[TMP33]], i64 0
187187
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i64> [[TMP35]], i64 [[TMP34]], i64 1
188-
; CHECK-NEXT: [[TMP37:%.*]] = add i64 [[INDEX_1]], 17
189-
; CHECK-NEXT: [[TMP38:%.*]] = add i64 [[INDEX_1]], 18
188+
; CHECK-NEXT: [[TMP37:%.*]] = add nuw nsw i64 [[INDEX_1]], 17
189+
; CHECK-NEXT: [[TMP38:%.*]] = add nuw nsw i64 [[INDEX_1]], 18
190190
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0
191191
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1
192192
; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], <i64 225, i64 225>
@@ -259,11 +259,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
259259
; CHECK: vector.body.2:
260260
; CHECK-NEXT: [[INDEX_2:%.*]] = phi i64 [ 0, [[VECTOR_PH_2]] ], [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY_2]] ]
261261
; CHECK-NEXT: [[TMP64:%.*]] = add nuw nsw i64 [[INDEX_2]], 30
262-
; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX_2]], 31
262+
; CHECK-NEXT: [[TMP65:%.*]] = add nuw nsw i64 [[INDEX_2]], 31
263263
; CHECK-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> poison, i64 [[TMP64]], i64 0
264264
; CHECK-NEXT: [[TMP67:%.*]] = insertelement <2 x i64> [[TMP66]], i64 [[TMP65]], i64 1
265-
; CHECK-NEXT: [[TMP68:%.*]] = add i64 [[INDEX_2]], 32
266-
; CHECK-NEXT: [[TMP69:%.*]] = add i64 [[INDEX_2]], 33
265+
; CHECK-NEXT: [[TMP68:%.*]] = add nuw nsw i64 [[INDEX_2]], 32
266+
; CHECK-NEXT: [[TMP69:%.*]] = add nuw nsw i64 [[INDEX_2]], 33
267267
; CHECK-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0
268268
; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1
269269
; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], <i64 225, i64 225>
@@ -336,11 +336,11 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
336336
; CHECK: vector.body.3:
337337
; CHECK-NEXT: [[INDEX_3:%.*]] = phi i64 [ 0, [[VECTOR_PH_3]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY_3]] ]
338338
; CHECK-NEXT: [[TMP95:%.*]] = add nuw nsw i64 [[INDEX_3]], 45
339-
; CHECK-NEXT: [[TMP96:%.*]] = add i64 [[INDEX_3]], 46
339+
; CHECK-NEXT: [[TMP96:%.*]] = add nuw nsw i64 [[INDEX_3]], 46
340340
; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i64> poison, i64 [[TMP95]], i64 0
341341
; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i64> [[TMP97]], i64 [[TMP96]], i64 1
342-
; CHECK-NEXT: [[TMP99:%.*]] = add i64 [[INDEX_3]], 47
343-
; CHECK-NEXT: [[TMP100:%.*]] = add i64 [[INDEX_3]], 48
342+
; CHECK-NEXT: [[TMP99:%.*]] = add nuw nsw i64 [[INDEX_3]], 47
343+
; CHECK-NEXT: [[TMP100:%.*]] = add nuw nsw i64 [[INDEX_3]], 48
344344
; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0
345345
; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1
346346
; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], <i64 225, i64 225>

0 commit comments

Comments
 (0)