@@ -88,43 +88,33 @@ define <4 x i32> @combine_pmaddwd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
88
88
ret <4 x i32 > %4
89
89
}
90
90
91
- ; TODO: [2] = (-5*13)+(6*-15) = -155 = 4294967141
91
+ ; [2]: (-5*13)+(6*-15) = -155 = 4294967141
92
92
define <4 x i32 > @combine_pmaddwd_constant () {
93
93
; SSE-LABEL: combine_pmaddwd_constant:
94
94
; SSE: # %bb.0:
95
- ; SSE-NEXT: pmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
96
- ; SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
95
+ ; SSE-NEXT: movaps {{.*#+}} xmm0 = [19,17,4294967141,271]
97
96
; SSE-NEXT: retq
98
97
;
99
98
; AVX-LABEL: combine_pmaddwd_constant:
100
99
; AVX: # %bb.0:
101
- ; AVX-NEXT: vpmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
102
- ; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
100
+ ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [19,17,4294967141,271]
103
101
; AVX-NEXT: retq
104
102
%1 = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > <i16 -1 , i16 2 , i16 3 , i16 -4 , i16 -5 , i16 6 , i16 7 , i16 -8 >, <8 x i16 > <i16 -5 , i16 7 , i16 -9 , i16 -11 , i16 13 , i16 -15 , i16 17 , i16 -19 >)
105
103
ret <4 x i32 > %1
106
104
}
107
105
108
106
; ensure we don't assume pmaddwd performs add nsw
109
- ; TODO : (-32768*-32768)+(-32768*-32768) = 0x80000000 = 2147483648
107
+ ; [0] : (-32768*-32768)+(-32768*-32768) = 0x80000000 = 2147483648
110
108
define <4 x i32 > @combine_pmaddwd_constant_nsw () {
111
109
; SSE-LABEL: combine_pmaddwd_constant_nsw:
112
110
; SSE: # %bb.0:
113
- ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
114
- ; SSE-NEXT: pmaddwd %xmm0, %xmm0
111
+ ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
115
112
; SSE-NEXT: retq
116
113
;
117
- ; AVX1-LABEL: combine_pmaddwd_constant_nsw:
118
- ; AVX1: # %bb.0:
119
- ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
120
- ; AVX1-NEXT: vpmaddwd %xmm0, %xmm0, %xmm0
121
- ; AVX1-NEXT: retq
122
- ;
123
- ; AVX2-LABEL: combine_pmaddwd_constant_nsw:
124
- ; AVX2: # %bb.0:
125
- ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
126
- ; AVX2-NEXT: vpmaddwd %xmm0, %xmm0, %xmm0
127
- ; AVX2-NEXT: retq
114
+ ; AVX-LABEL: combine_pmaddwd_constant_nsw:
115
+ ; AVX: # %bb.0:
116
+ ; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
117
+ ; AVX-NEXT: retq
128
118
%1 = insertelement <8 x i16 > undef , i16 32768 , i32 0
129
119
%2 = shufflevector <8 x i16 > %1 , <8 x i16 > undef , <8 x i32 > zeroinitializer
130
120
%3 = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > %2 , <8 x i16 > %2 )
@@ -213,51 +203,26 @@ define <8 x i16> @combine_pmaddubsw_demandedelts(<16 x i8> %a0, <16 x i8> %a1) {
213
203
ret <8 x i16 > %4
214
204
}
215
205
216
- ; TODO
206
+ ; [3]: ((uint16_t)-6*7)+(7*-8) = (250*7)+(7*-8) = 1694
217
207
define i32 @combine_pmaddubsw_constant () {
218
- ; SSE-LABEL: combine_pmaddubsw_constant:
219
- ; SSE: # %bb.0:
220
- ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
221
- ; SSE-NEXT: pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
222
- ; SSE-NEXT: pextrw $3, %xmm0, %eax
223
- ; SSE-NEXT: cwtl
224
- ; SSE-NEXT: retq
225
- ;
226
- ; AVX-LABEL: combine_pmaddubsw_constant:
227
- ; AVX: # %bb.0:
228
- ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
229
- ; AVX-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
230
- ; AVX-NEXT: vpextrw $3, %xmm0, %eax
231
- ; AVX-NEXT: cwtl
232
- ; AVX-NEXT: retq
208
+ ; CHECK-LABEL: combine_pmaddubsw_constant:
209
+ ; CHECK: # %bb.0:
210
+ ; CHECK-NEXT: movl $1694, %eax # imm = 0x69E
211
+ ; CHECK-NEXT: retq
233
212
%1 = call <8 x i16 > @llvm.x86.ssse3.pmadd.ub.sw.128 (<16 x i8 > <i8 0 , i8 1 , i8 2 , i8 3 , i8 4 , i8 5 , i8 -6 , i8 7 , i8 8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 >, <16 x i8 > <i8 1 , i8 2 , i8 3 , i8 4 , i8 5 , i8 6 , i8 7 , i8 -8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 , i8 16 >)
234
- %2 = extractelement <8 x i16 > %1 , i32 3 ; ((uint16_t)-6*7)+(7*-8) = (250*7)+(7*-8) = 1694
213
+ %2 = extractelement <8 x i16 > %1 , i32 3
235
214
%3 = sext i16 %2 to i32
236
215
ret i32 %3
237
216
}
238
217
239
- ; TODO
218
+ ; [0]: add_sat_i16(((uint16_t)-1*-128),((uint16_t)-1*-128)_ = add_sat_i16(255*-128),(255*-128)) = sat_i16(-65280) = -32768
240
219
define i32 @combine_pmaddubsw_constant_sat () {
241
- ; SSE-LABEL: combine_pmaddubsw_constant_sat:
242
- ; SSE: # %bb.0:
243
- ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [255,255,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
244
- ; SSE-NEXT: pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,128,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
245
- ; SSE-NEXT: movd %xmm0, %eax
246
- ; SSE-NEXT: cwtl
247
- ; SSE-NEXT: retq
248
- ;
249
- ; AVX-LABEL: combine_pmaddubsw_constant_sat:
250
- ; AVX: # %bb.0:
251
- ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [255,255,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
252
- ; AVX-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,128,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
253
- ; AVX-NEXT: vmovd %xmm0, %eax
254
- ; AVX-NEXT: cwtl
255
- ; AVX-NEXT: retq
220
+ ; CHECK-LABEL: combine_pmaddubsw_constant_sat:
221
+ ; CHECK: # %bb.0:
222
+ ; CHECK-NEXT: movl $-32768, %eax # imm = 0x8000
223
+ ; CHECK-NEXT: retq
256
224
%1 = call <8 x i16 > @llvm.x86.ssse3.pmadd.ub.sw.128 (<16 x i8 > <i8 -1 , i8 -1 , i8 2 , i8 3 , i8 4 , i8 5 , i8 -6 , i8 7 , i8 8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 >, <16 x i8 > <i8 -128 , i8 -128 , i8 3 , i8 4 , i8 5 , i8 6 , i8 7 , i8 -8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 , i8 16 >)
257
- %2 = extractelement <8 x i16 > %1 , i32 0 ; add_sat_i16(((uint16_t)-1*-128),((uint16_t)-1*-128)_ = add_sat_i16(255*-128),(255*-128)) = sat_i16(-65280) = -32768
225
+ %2 = extractelement <8 x i16 > %1 , i32 0
258
226
%3 = sext i16 %2 to i32
259
227
ret i32 %3
260
228
}
261
-
262
- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
263
- ; CHECK: {{.*}}
0 commit comments