@@ -31,6 +31,62 @@ pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m51
31
31
vpmadd52luq_512 ( a, b, c)
32
32
}
33
33
34
+ /// Multiply packed unsigned 52-bit integers in each 64-bit element of
35
+ /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
36
+ /// unsigned integer from the intermediate result with the
37
+ /// corresponding unsigned 64-bit integer in `a`, and store the
38
+ /// results in `dst`.
39
+ ///
40
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3485)
41
+ #[ inline]
42
+ #[ target_feature( enable = "avx512ifma,avx512vl" ) ]
43
+ #[ cfg_attr( test, assert_instr( vpmadd52huq) ) ]
44
+ pub unsafe fn _mm256_madd52hi_epu64 ( a : __m256i , b : __m256i , c : __m256i ) -> __m256i {
45
+ vpmadd52huq_256 ( a, b, c)
46
+ }
47
+
48
+ /// Multiply packed unsigned 52-bit integers in each 64-bit element of
49
+ /// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
50
+ /// unsigned integer from the intermediate result with the
51
+ /// corresponding unsigned 64-bit integer in `a`, and store the
52
+ /// results in `dst`.
53
+ ///
54
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3494)
55
+ #[ inline]
56
+ #[ target_feature( enable = "avx512ifma,avx512vl" ) ]
57
+ #[ cfg_attr( test, assert_instr( vpmadd52huq) ) ]
58
+ pub unsafe fn _mm256_madd52lo_epu64 ( a : __m256i , b : __m256i , c : __m256i ) -> __m256i {
59
+ vpmadd52luq_256 ( a, b, c)
60
+ }
61
+
62
+ /// Multiply packed unsigned 52-bit integers in each 64-bit element of
63
+ /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
64
+ /// unsigned integer from the intermediate result with the
65
+ /// corresponding unsigned 64-bit integer in `a`, and store the
66
+ /// results in `dst`.
67
+ ///
68
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3482&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL)
69
+ #[ inline]
70
+ #[ target_feature( enable = "avx512ifma,avx512vl" ) ]
71
+ #[ cfg_attr( test, assert_instr( vpmadd52huq) ) ]
72
+ pub unsafe fn _mm_madd52hi_epu64 ( a : __m128i , b : __m128i , c : __m128i ) -> __m128i {
73
+ vpmadd52huq_128 ( a, b, c)
74
+ }
75
+
76
+ /// Multiply packed unsigned 52-bit integers in each 64-bit element of
77
+ /// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
78
+ /// unsigned integer from the intermediate result with the
79
+ /// corresponding unsigned 64-bit integer in `a`, and store the
80
+ /// results in `dst`.
81
+ ///
82
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3491&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL)
83
+ #[ inline]
84
+ #[ target_feature( enable = "avx512ifma,avx512vl" ) ]
85
+ #[ cfg_attr( test, assert_instr( vpmadd52huq) ) ]
86
+ pub unsafe fn _mm_madd52lo_epu64 ( a : __m128i , b : __m128i , c : __m128i ) -> __m128i {
87
+ vpmadd52luq_128 ( a, b, c)
88
+ }
89
+
34
90
#[ allow( improper_ctypes) ]
35
91
extern "C" {
36
92
#[ link_name = "llvm.x86.avx512.vpmadd52l.uq.128" ]
@@ -81,4 +137,60 @@ mod tests {
81
137
82
138
assert_eq_m512i ( a, expected) ;
83
139
}
140
+
141
+ #[ simd_test( enable = "avx512ifma,avx512vl" ) ]
142
+ unsafe fn test_mm256_madd52hi_epu64 ( ) {
143
+ let mut a = _mm256_set1_epi64x ( 10 << 40 ) ;
144
+ let b = _mm256_set1_epi64x ( ( 11 << 40 ) + 4 ) ;
145
+ let c = _mm256_set1_epi64x ( ( 12 << 40 ) + 3 ) ;
146
+
147
+ a = _mm256_madd52hi_epu64 ( a, b, c) ;
148
+
149
+ // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
150
+ let expected = _mm256_set1_epi64x ( 11030549757952 ) ;
151
+
152
+ assert_eq_m256i ( a, expected) ;
153
+ }
154
+
155
+ #[ simd_test( enable = "avx512ifma,avx512vl" ) ]
156
+ unsafe fn test_mm256_madd52lo_epu64 ( ) {
157
+ let mut a = _mm256_set1_epi64x ( 10 << 40 ) ;
158
+ let b = _mm256_set1_epi64x ( ( 11 << 40 ) + 4 ) ;
159
+ let c = _mm256_set1_epi64x ( ( 12 << 40 ) + 3 ) ;
160
+
161
+ a = _mm256_madd52lo_epu64 ( a, b, c) ;
162
+
163
+ // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
164
+ let expected = _mm256_set1_epi64x ( 100055558127628 ) ;
165
+
166
+ assert_eq_m256i ( a, expected) ;
167
+ }
168
+
169
+ #[ simd_test( enable = "avx512ifma,avx512vl" ) ]
170
+ unsafe fn test_mm_madd52hi_epu64 ( ) {
171
+ let mut a = _mm_set1_epi64x ( 10 << 40 ) ;
172
+ let b = _mm_set1_epi64x ( ( 11 << 40 ) + 4 ) ;
173
+ let c = _mm_set1_epi64x ( ( 12 << 40 ) + 3 ) ;
174
+
175
+ a = _mm_madd52hi_epu64 ( a, b, c) ;
176
+
177
+ // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
178
+ let expected = _mm_set1_epi64x ( 11030549757952 ) ;
179
+
180
+ assert_eq_m128i ( a, expected) ;
181
+ }
182
+
183
+ #[ simd_test( enable = "avx512ifma,avx512vl" ) ]
184
+ unsafe fn test_mm_madd52lo_epu64 ( ) {
185
+ let mut a = _mm_set1_epi64x ( 10 << 40 ) ;
186
+ let b = _mm_set1_epi64x ( ( 11 << 40 ) + 4 ) ;
187
+ let c = _mm_set1_epi64x ( ( 12 << 40 ) + 3 ) ;
188
+
189
+ a = _mm_madd52hi_epu64 ( a, b, c) ;
190
+
191
+ // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
192
+ let expected = _mm_set1_epi64x ( 11030549757952 ) ;
193
+
194
+ assert_eq_m128i ( a, expected) ;
195
+ }
84
196
}
0 commit comments