Skip to content

Commit 86098df

Browse files
TDeckingAmanieu
authored andcommitted
Use generic simd for avx512 popcnt
1 parent fd5fc64 commit 86098df

File tree

2 files changed

+38
-78
lines changed

2 files changed

+38
-78
lines changed

crates/core_arch/src/x86/avx512bitalg.rs

+19-36
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
//!
88
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
99
10-
use crate::core_arch::simd::i16x16;
11-
use crate::core_arch::simd::i16x32;
12-
use crate::core_arch::simd::i16x8;
1310
use crate::core_arch::simd::i8x16;
1411
use crate::core_arch::simd::i8x32;
1512
use crate::core_arch::simd::i8x64;
@@ -26,28 +23,14 @@ use crate::core_arch::x86::_mm_setzero_si128;
2623
use crate::core_arch::x86::m128iExt;
2724
use crate::core_arch::x86::m256iExt;
2825
use crate::core_arch::x86::m512iExt;
29-
use crate::intrinsics::simd::simd_select_bitmask;
26+
use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask};
3027
use crate::mem::transmute;
3128

3229
#[cfg(test)]
3330
use stdarch_test::assert_instr;
3431

3532
#[allow(improper_ctypes)]
3633
extern "C" {
37-
#[link_name = "llvm.ctpop.v32i16"]
38-
fn popcnt_v32i16(x: i16x32) -> i16x32;
39-
#[link_name = "llvm.ctpop.v16i16"]
40-
fn popcnt_v16i16(x: i16x16) -> i16x16;
41-
#[link_name = "llvm.ctpop.v8i16"]
42-
fn popcnt_v8i16(x: i16x8) -> i16x8;
43-
44-
#[link_name = "llvm.ctpop.v64i8"]
45-
fn popcnt_v64i8(x: i8x64) -> i8x64;
46-
#[link_name = "llvm.ctpop.v32i8"]
47-
fn popcnt_v32i8(x: i8x32) -> i8x32;
48-
#[link_name = "llvm.ctpop.v16i8"]
49-
fn popcnt_v16i8(x: i8x16) -> i8x16;
50-
5134
#[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
5235
fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
5336
#[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
@@ -64,7 +47,7 @@ extern "C" {
6447
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6548
#[cfg_attr(test, assert_instr(vpopcntw))]
6649
pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
67-
transmute(popcnt_v32i16(a.as_i16x32()))
50+
transmute(simd_ctpop(a.as_i16x32()))
6851
}
6952

7053
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@@ -79,7 +62,7 @@ pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
7962
#[cfg_attr(test, assert_instr(vpopcntw))]
8063
pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
8164
let zero = _mm512_setzero_si512().as_i16x32();
82-
transmute(simd_select_bitmask(k, popcnt_v32i16(a.as_i16x32()), zero))
65+
transmute(simd_select_bitmask(k, simd_ctpop(a.as_i16x32()), zero))
8366
}
8467

8568
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@@ -95,7 +78,7 @@ pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
9578
pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
9679
transmute(simd_select_bitmask(
9780
k,
98-
popcnt_v32i16(a.as_i16x32()),
81+
simd_ctpop(a.as_i16x32()),
9982
src.as_i16x32(),
10083
))
10184
}
@@ -108,7 +91,7 @@ pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -
10891
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10992
#[cfg_attr(test, assert_instr(vpopcntw))]
11093
pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
111-
transmute(popcnt_v16i16(a.as_i16x16()))
94+
transmute(simd_ctpop(a.as_i16x16()))
11295
}
11396

11497
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@@ -123,7 +106,7 @@ pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
123106
#[cfg_attr(test, assert_instr(vpopcntw))]
124107
pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
125108
let zero = _mm256_setzero_si256().as_i16x16();
126-
transmute(simd_select_bitmask(k, popcnt_v16i16(a.as_i16x16()), zero))
109+
transmute(simd_select_bitmask(k, simd_ctpop(a.as_i16x16()), zero))
127110
}
128111

129112
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@@ -139,7 +122,7 @@ pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
139122
pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
140123
transmute(simd_select_bitmask(
141124
k,
142-
popcnt_v16i16(a.as_i16x16()),
125+
simd_ctpop(a.as_i16x16()),
143126
src.as_i16x16(),
144127
))
145128
}
@@ -152,7 +135,7 @@ pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -
152135
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
153136
#[cfg_attr(test, assert_instr(vpopcntw))]
154137
pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
155-
transmute(popcnt_v8i16(a.as_i16x8()))
138+
transmute(simd_ctpop(a.as_i16x8()))
156139
}
157140

158141
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@@ -167,7 +150,7 @@ pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
167150
#[cfg_attr(test, assert_instr(vpopcntw))]
168151
pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
169152
let zero = _mm_setzero_si128().as_i16x8();
170-
transmute(simd_select_bitmask(k, popcnt_v8i16(a.as_i16x8()), zero))
153+
transmute(simd_select_bitmask(k, simd_ctpop(a.as_i16x8()), zero))
171154
}
172155

173156
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@@ -183,7 +166,7 @@ pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
183166
pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
184167
transmute(simd_select_bitmask(
185168
k,
186-
popcnt_v8i16(a.as_i16x8()),
169+
simd_ctpop(a.as_i16x8()),
187170
src.as_i16x8(),
188171
))
189172
}
@@ -196,7 +179,7 @@ pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __
196179
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
197180
#[cfg_attr(test, assert_instr(vpopcntb))]
198181
pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
199-
transmute(popcnt_v64i8(a.as_i8x64()))
182+
transmute(simd_ctpop(a.as_i8x64()))
200183
}
201184

202185
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@@ -211,7 +194,7 @@ pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
211194
#[cfg_attr(test, assert_instr(vpopcntb))]
212195
pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
213196
let zero = _mm512_setzero_si512().as_i8x64();
214-
transmute(simd_select_bitmask(k, popcnt_v64i8(a.as_i8x64()), zero))
197+
transmute(simd_select_bitmask(k, simd_ctpop(a.as_i8x64()), zero))
215198
}
216199

217200
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@@ -227,7 +210,7 @@ pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
227210
pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
228211
transmute(simd_select_bitmask(
229212
k,
230-
popcnt_v64i8(a.as_i8x64()),
213+
simd_ctpop(a.as_i8x64()),
231214
src.as_i8x64(),
232215
))
233216
}
@@ -240,7 +223,7 @@ pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) ->
240223
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
241224
#[cfg_attr(test, assert_instr(vpopcntb))]
242225
pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
243-
transmute(popcnt_v32i8(a.as_i8x32()))
226+
transmute(simd_ctpop(a.as_i8x32()))
244227
}
245228

246229
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@@ -255,7 +238,7 @@ pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
255238
#[cfg_attr(test, assert_instr(vpopcntb))]
256239
pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
257240
let zero = _mm256_setzero_si256().as_i8x32();
258-
transmute(simd_select_bitmask(k, popcnt_v32i8(a.as_i8x32()), zero))
241+
transmute(simd_select_bitmask(k, simd_ctpop(a.as_i8x32()), zero))
259242
}
260243

261244
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@@ -271,7 +254,7 @@ pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
271254
pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
272255
transmute(simd_select_bitmask(
273256
k,
274-
popcnt_v32i8(a.as_i8x32()),
257+
simd_ctpop(a.as_i8x32()),
275258
src.as_i8x32(),
276259
))
277260
}
@@ -284,7 +267,7 @@ pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) ->
284267
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
285268
#[cfg_attr(test, assert_instr(vpopcntb))]
286269
pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
287-
transmute(popcnt_v16i8(a.as_i8x16()))
270+
transmute(simd_ctpop(a.as_i8x16()))
288271
}
289272

290273
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@@ -299,7 +282,7 @@ pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
299282
#[cfg_attr(test, assert_instr(vpopcntb))]
300283
pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
301284
let zero = _mm_setzero_si128().as_i8x16();
302-
transmute(simd_select_bitmask(k, popcnt_v16i8(a.as_i8x16()), zero))
285+
transmute(simd_select_bitmask(k, simd_ctpop(a.as_i8x16()), zero))
303286
}
304287

305288
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@@ -315,7 +298,7 @@ pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
315298
pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
316299
transmute(simd_select_bitmask(
317300
k,
318-
popcnt_v16i8(a.as_i8x16()),
301+
simd_ctpop(a.as_i8x16()),
319302
src.as_i8x16(),
320303
))
321304
}

0 commit comments

Comments
 (0)