Skip to content

Commit f60f7b4

Browse files
committed
[InstCombine][X86] Add multiply-by-one handling for MULH/PMULHU/PMULHRS intrinsics
MULH/PMULHU simplifies to ASHR/ZERO as they just become a SEXT/ZEXT sign-splat instruction PMULHRS doesn't simplify as much so I've not attempted to fold it.
1 parent bf9e9e5 commit f60f7b4

File tree

3 files changed

+22
-18
lines changed

3 files changed

+22
-18
lines changed

llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,16 @@ static Value *simplifyX86pmulh(IntrinsicInst &II,
521521
if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
522522
return ConstantAggregateZero::get(ResTy);
523523

524+
// Multiply by one.
525+
if (!IsRounding) {
526+
if (match(Arg0, PatternMatch::m_One()))
527+
return IsSigned ? Builder.CreateAShr(Arg1, 15)
528+
: ConstantAggregateZero::get(ResTy);
529+
if (match(Arg1, PatternMatch::m_One()))
530+
return IsSigned ? Builder.CreateAShr(Arg0, 15)
531+
: ConstantAggregateZero::get(ResTy);
532+
}
533+
524534
// Constant folding.
525535
if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
526536
return nullptr;

llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) {
111111

112112
define <8 x i16> @one_pmulh_128(<8 x i16> %a0) {
113113
; CHECK-LABEL: @one_pmulh_128(
114-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> [[A0:%.*]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
114+
; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
115115
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
116116
;
117117
%1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -120,7 +120,7 @@ define <8 x i16> @one_pmulh_128(<8 x i16> %a0) {
120120

121121
define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) {
122122
; CHECK-LABEL: @one_pmulh_128_commute(
123-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> [[A0:%.*]])
123+
; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
124124
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
125125
;
126126
%1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0)
@@ -129,7 +129,7 @@ define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) {
129129

130130
define <16 x i16> @one_pmulh_256(<16 x i16> %a0) {
131131
; CHECK-LABEL: @one_pmulh_256(
132-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> [[A0:%.*]], <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
132+
; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
133133
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
134134
;
135135
%1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -138,7 +138,7 @@ define <16 x i16> @one_pmulh_256(<16 x i16> %a0) {
138138

139139
define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) {
140140
; CHECK-LABEL: @one_pmulh_256_commute(
141-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> [[A0:%.*]])
141+
; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
142142
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
143143
;
144144
%1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0)
@@ -147,7 +147,7 @@ define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) {
147147

148148
define <32 x i16> @one_pmulh_512(<32 x i16> %a0) {
149149
; CHECK-LABEL: @one_pmulh_512(
150-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> [[A0:%.*]], <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
150+
; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
151151
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
152152
;
153153
%1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -156,7 +156,7 @@ define <32 x i16> @one_pmulh_512(<32 x i16> %a0) {
156156

157157
define <32 x i16> @one_pmulh_512_commute(<32 x i16> %a0) {
158158
; CHECK-LABEL: @one_pmulh_512_commute(
159-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> [[A0:%.*]])
159+
; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
160160
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
161161
;
162162
%1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0)

llvm/test/Transforms/InstCombine/X86/x86-pmulhu.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,53 +111,47 @@ define <32 x i16> @zero_pmulhu_512_commute(<32 x i16> %a0) {
111111

112112
define <8 x i16> @one_pmulhu_128(<8 x i16> %a0) {
113113
; CHECK-LABEL: @one_pmulhu_128(
114-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
115-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
114+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
116115
;
117116
%1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
118117
ret <8 x i16> %1
119118
}
120119

121120
define <8 x i16> @one_pmulhu_128_commute(<8 x i16> %a0) {
122121
; CHECK-LABEL: @one_pmulhu_128_commute(
123-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> [[A0:%.*]])
124-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
122+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
125123
;
126124
%1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0)
127125
ret <8 x i16> %1
128126
}
129127

130128
define <16 x i16> @one_pmulhu_256(<16 x i16> %a0) {
131129
; CHECK-LABEL: @one_pmulhu_256(
132-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> [[A0:%.*]], <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
133-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
130+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
134131
;
135132
%1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
136133
ret <16 x i16> %1
137134
}
138135

139136
define <16 x i16> @one_pmulhu_256_commute(<16 x i16> %a0) {
140137
; CHECK-LABEL: @one_pmulhu_256_commute(
141-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> [[A0:%.*]])
142-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
138+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
143139
;
144140
%1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0)
145141
ret <16 x i16> %1
146142
}
147143

148144
define <32 x i16> @one_pmulhu_512(<32 x i16> %a0) {
149145
; CHECK-LABEL: @one_pmulhu_512(
150-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> [[A0:%.*]], <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
151-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
146+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
152147
;
153148
%1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
154149
ret <32 x i16> %1
155150
}
156151

157152
define <32 x i16> @one_pmulhu_512_commute(<32 x i16> %a0) {
158153
; CHECK-LABEL: @one_pmulhu_512_commute(
159-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> [[A0:%.*]])
160-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
154+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
161155
;
162156
%1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0)
163157
ret <32 x i16> %1

0 commit comments

Comments
 (0)