@@ -88,53 +88,47 @@ define <4 x i32> @combine_pmaddwd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
88
88
ret <4 x i32 > %4
89
89
}
90
90
91
- ; TODO
92
- define i32 @combine_pmaddwd_constant () {
91
+ ; TODO: [2] = (-5*13)+(6*-15) = -155 = 4294967141
92
+ define < 4 x i32 > @combine_pmaddwd_constant () {
93
93
; SSE-LABEL: combine_pmaddwd_constant:
94
94
; SSE: # %bb.0:
95
95
; SSE-NEXT: pmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
96
96
; SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
97
- ; SSE-NEXT: pextrd $2, %xmm0, %eax
98
97
; SSE-NEXT: retq
99
98
;
100
99
; AVX-LABEL: combine_pmaddwd_constant:
101
100
; AVX: # %bb.0:
102
101
; AVX-NEXT: vpmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
103
102
; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
104
- ; AVX-NEXT: vpextrd $2, %xmm0, %eax
105
103
; AVX-NEXT: retq
106
104
%1 = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > <i16 -1 , i16 2 , i16 3 , i16 -4 , i16 -5 , i16 6 , i16 7 , i16 -8 >, <8 x i16 > <i16 -5 , i16 7 , i16 -9 , i16 -11 , i16 13 , i16 -15 , i16 17 , i16 -19 >)
107
- %2 = extractelement <4 x i32 > %1 , i32 2 ; (-5*13)+(6*-15) = -155
108
- ret i32 %2
105
+ ret <4 x i32 > %1
109
106
}
110
107
111
108
; ensure we don't assume pmaddwd performs add nsw
112
- define i32 @combine_pmaddwd_constant_nsw () {
109
+ ; TODO: (-32768*-32768)+(-32768*-32768) = 0x80000000 = 2147483648
110
+ define <4 x i32 > @combine_pmaddwd_constant_nsw () {
113
111
; SSE-LABEL: combine_pmaddwd_constant_nsw:
114
112
; SSE: # %bb.0:
115
113
; SSE-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
116
114
; SSE-NEXT: pmaddwd %xmm0, %xmm0
117
- ; SSE-NEXT: movd %xmm0, %eax
118
115
; SSE-NEXT: retq
119
116
;
120
117
; AVX1-LABEL: combine_pmaddwd_constant_nsw:
121
118
; AVX1: # %bb.0:
122
119
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
123
120
; AVX1-NEXT: vpmaddwd %xmm0, %xmm0, %xmm0
124
- ; AVX1-NEXT: vmovd %xmm0, %eax
125
121
; AVX1-NEXT: retq
126
122
;
127
123
; AVX2-LABEL: combine_pmaddwd_constant_nsw:
128
124
; AVX2: # %bb.0:
129
125
; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
130
126
; AVX2-NEXT: vpmaddwd %xmm0, %xmm0, %xmm0
131
- ; AVX2-NEXT: vmovd %xmm0, %eax
132
127
; AVX2-NEXT: retq
133
128
%1 = insertelement <8 x i16 > undef , i16 32768 , i32 0
134
129
%2 = shufflevector <8 x i16 > %1 , <8 x i16 > undef , <8 x i32 > zeroinitializer
135
130
%3 = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > %2 , <8 x i16 > %2 )
136
- %4 = extractelement <4 x i32 > %3 , i32 0 ; (-32768*-32768)+(-32768*-32768) = 0x80000000
137
- ret i32 %4
131
+ ret <4 x i32 > %3
138
132
}
139
133
140
134
define <8 x i16 > @combine_pmaddubsw_zero (<16 x i8 > %a0 , <16 x i8 > %a1 ) {
0 commit comments