Skip to content

Commit 62cd07f

Browse files
authored
[InstCombine] Canonicalize sub mask, X -> ~X when high bits are ignored (#110635)
Alive2: https://alive2.llvm.org/ce/z/NJgBPL The motivating case of this patch is to emit `andn` on RISC-V with zbb for expressions like `(sub 63, X) & 63`.
1 parent 1202c24 commit 62cd07f

File tree

3 files changed

+93
-16
lines changed

3 files changed

+93
-16
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,15 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
592592
if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(LHSKnown.Zero))
593593
return I->getOperand(1);
594594

595+
// Canonicalize sub mask, X -> ~X
596+
const APInt *LHSC;
597+
if (match(I->getOperand(0), m_LowBitMask(LHSC)) &&
598+
DemandedFromOps.isSubsetOf(*LHSC)) {
599+
IRBuilderBase::InsertPointGuard Guard(Builder);
600+
Builder.SetInsertPoint(I);
601+
return Builder.CreateNot(I->getOperand(1));
602+
}
603+
595604
// Otherwise just compute the known bits of the result.
596605
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
597606
bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();

llvm/test/Transforms/InstCombine/sub.ll

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2797,3 +2797,71 @@ if.then:
27972797
if.else:
27982798
ret i32 0
27992799
}
2800+
2801+
define i32 @fold_sub_and_into_andn(i32 %x) {
2802+
; CHECK-LABEL: @fold_sub_and_into_andn(
2803+
; CHECK-NEXT: entry:
2804+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X:%.*]], 63
2805+
; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP0]], 63
2806+
; CHECK-NEXT: ret i32 [[AND]]
2807+
;
2808+
entry:
2809+
%sub = sub i32 63, %x
2810+
%and = and i32 %sub, 63
2811+
ret i32 %and
2812+
}
2813+
2814+
define i1 @fold_sub_and_into_andn_icmp(i32 %x) {
2815+
; CHECK-LABEL: @fold_sub_and_into_andn_icmp(
2816+
; CHECK-NEXT: entry:
2817+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X:%.*]], 63
2818+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 63
2819+
; CHECK-NEXT: ret i1 [[CMP]]
2820+
;
2821+
entry:
2822+
%sub = sub i32 63, %x
2823+
%and = and i32 %sub, 63
2824+
%cmp = icmp eq i32 %and, 0
2825+
ret i1 %cmp
2826+
}
2827+
2828+
define i32 @fold_sub_and_into_andn_subset(i32 %x) {
2829+
; CHECK-LABEL: @fold_sub_and_into_andn_subset(
2830+
; CHECK-NEXT: entry:
2831+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[X:%.*]], 31
2832+
; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP0]], 31
2833+
; CHECK-NEXT: ret i32 [[AND]]
2834+
;
2835+
entry:
2836+
%sub = sub i32 63, %x
2837+
%and = and i32 %sub, 31
2838+
ret i32 %and
2839+
}
2840+
2841+
; Negative tests
2842+
2843+
define i32 @fold_sub_and_into_andn_nonmask(i32 %x, i32 %y) {
2844+
; CHECK-LABEL: @fold_sub_and_into_andn_nonmask(
2845+
; CHECK-NEXT: entry:
2846+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[Y:%.*]], [[X:%.*]]
2847+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], 63
2848+
; CHECK-NEXT: ret i32 [[AND]]
2849+
;
2850+
entry:
2851+
%sub = sub i32 %y, %x
2852+
%and = and i32 %sub, 63
2853+
ret i32 %and
2854+
}
2855+
2856+
define i32 @fold_sub_and_into_andn_superset(i32 %x) {
2857+
; CHECK-LABEL: @fold_sub_and_into_andn_superset(
2858+
; CHECK-NEXT: entry:
2859+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 63, [[X:%.*]]
2860+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], 127
2861+
; CHECK-NEXT: ret i32 [[AND]]
2862+
;
2863+
entry:
2864+
%sub = sub i32 63, %x
2865+
%and = and i32 %sub, 127
2866+
ret i32 %and
2867+
}

llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@ define noundef i64 @foo(i64 noundef %0) {
1717
; CHECK-NEXT: ret i64 [[TMP3]]
1818
;
1919
; SSE-LABEL: @foo(
20-
; SSE-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 44
21-
; SSE-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
20+
; SSE-NEXT: [[TMP2:%.*]] = xor i64 [[TMP0:%.*]], -1
21+
; SSE-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 44
2222
; SSE-NEXT: ret i64 [[TMP3]]
2323
;
2424
; AVX-LABEL: @foo(
25-
; AVX-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 44
26-
; AVX-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 -17592186044416, [[TMP2]]
25+
; AVX-NEXT: [[TMP2:%.*]] = xor i64 [[TMP0:%.*]], -1
26+
; AVX-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 44
2727
; AVX-NEXT: ret i64 [[TMP3]]
2828
;
2929
%2 = sub i64 1048575, %0
@@ -34,35 +34,35 @@ define noundef i64 @foo(i64 noundef %0) {
3434
define void @bar(ptr noundef %0) {
3535
; SSE-LABEL: @bar(
3636
; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[TMP0:%.*]], align 8
37-
; SSE-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 44, i64 44>
38-
; SSE-NEXT: [[TMP4:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP3]]
37+
; SSE-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], <i64 -1, i64 -1>
38+
; SSE-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[TMP3]], <i64 44, i64 44>
3939
; SSE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP0]], align 8
4040
; SSE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
4141
; SSE-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
42-
; SSE-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], <i64 44, i64 44>
43-
; SSE-NEXT: [[TMP8:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP7]]
42+
; SSE-NEXT: [[TMP7:%.*]] = xor <2 x i64> [[TMP6]], <i64 -1, i64 -1>
43+
; SSE-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], <i64 44, i64 44>
4444
; SSE-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP5]], align 8
4545
; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
4646
; SSE-NEXT: [[TMP10:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
47-
; SSE-NEXT: [[TMP11:%.*]] = shl <2 x i64> [[TMP10]], <i64 44, i64 44>
48-
; SSE-NEXT: [[TMP12:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP11]]
47+
; SSE-NEXT: [[TMP11:%.*]] = xor <2 x i64> [[TMP10]], <i64 -1, i64 -1>
48+
; SSE-NEXT: [[TMP12:%.*]] = shl <2 x i64> [[TMP11]], <i64 44, i64 44>
4949
; SSE-NEXT: store <2 x i64> [[TMP12]], ptr [[TMP9]], align 8
5050
; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 48
5151
; SSE-NEXT: [[TMP14:%.*]] = load <2 x i64>, ptr [[TMP13]], align 8
52-
; SSE-NEXT: [[TMP15:%.*]] = shl <2 x i64> [[TMP14]], <i64 44, i64 44>
53-
; SSE-NEXT: [[TMP16:%.*]] = sub nuw nsw <2 x i64> <i64 -17592186044416, i64 -17592186044416>, [[TMP15]]
52+
; SSE-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP14]], <i64 -1, i64 -1>
53+
; SSE-NEXT: [[TMP16:%.*]] = shl <2 x i64> [[TMP15]], <i64 44, i64 44>
5454
; SSE-NEXT: store <2 x i64> [[TMP16]], ptr [[TMP13]], align 8
5555
; SSE-NEXT: ret void
5656
;
5757
; AVX-LABEL: @bar(
5858
; AVX-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8
59-
; AVX-NEXT: [[TMP3:%.*]] = shl <4 x i64> [[TMP2]], <i64 44, i64 44, i64 44, i64 44>
60-
; AVX-NEXT: [[TMP4:%.*]] = sub nuw nsw <4 x i64> <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>, [[TMP3]]
59+
; AVX-NEXT: [[TMP3:%.*]] = xor <4 x i64> [[TMP2]], <i64 -1, i64 -1, i64 -1, i64 -1>
60+
; AVX-NEXT: [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], <i64 44, i64 44, i64 44, i64 44>
6161
; AVX-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP0]], align 8
6262
; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
6363
; AVX-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
64-
; AVX-NEXT: [[TMP7:%.*]] = shl <4 x i64> [[TMP6]], <i64 44, i64 44, i64 44, i64 44>
65-
; AVX-NEXT: [[TMP8:%.*]] = sub nuw nsw <4 x i64> <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>, [[TMP7]]
64+
; AVX-NEXT: [[TMP7:%.*]] = xor <4 x i64> [[TMP6]], <i64 -1, i64 -1, i64 -1, i64 -1>
65+
; AVX-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP7]], <i64 44, i64 44, i64 44, i64 44>
6666
; AVX-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP5]], align 8
6767
; AVX-NEXT: ret void
6868
;

0 commit comments

Comments
 (0)