Skip to content

Commit 9873942

Browse files
authored
Fix instruction assertions on LLVM 6 (#321)
Looks like some instructions changed here and there, so this updates the assertions (no behavior appears to have changed though)
1 parent 0d6b868 commit 9873942

File tree

8 files changed

+34
-36
lines changed

8 files changed

+34
-36
lines changed

coresimd/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
1616
integer_atomics, stmt_expr_attributes, core_intrinsics,
1717
crate_in_paths)]
18-
#![cfg_attr(test, feature(proc_macro, test, attr_literals))]
18+
#![cfg_attr(test, feature(proc_macro, test, attr_literals, abi_vectorcall))]
1919
#![cfg_attr(feature = "cargo-clippy",
2020
allow(inline_always, too_many_arguments, cast_sign_loss,
2121
cast_lossless, cast_possible_wrap,

coresimd/src/x86/i586/avx.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1921,7 +1921,6 @@ pub unsafe fn _mm256_set_epi32(
19211921
#[inline]
19221922
#[target_feature(enable = "avx")]
19231923
// This intrinsic has no corresponding instruction.
1924-
#[cfg_attr(test, assert_instr(vinsertf128))]
19251924
pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
19261925
_mm256_setr_epi64x(d, c, b, a)
19271926
}
@@ -2001,7 +2000,6 @@ pub unsafe fn _mm256_setr_epi32(
20012000
#[inline]
20022001
#[target_feature(enable = "avx")]
20032002
// This intrinsic has no corresponding instruction.
2004-
#[cfg_attr(test, assert_instr(vinsertf128))]
20052003
pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
20062004
mem::transmute(i64x4::new(a, b, c, d))
20072005
}

coresimd/src/x86/i586/avx2.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
226226
/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`.
227227
#[inline]
228228
#[target_feature(enable = "avx2")]
229-
#[cfg_attr(test, assert_instr(vpblendd, imm8 = 9))]
229+
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
230230
pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
231231
let imm8 = (imm8 & 0xFF) as u8;
232232
let a = a.as_i32x4();
@@ -258,7 +258,7 @@ pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
258258
/// Blend packed 32-bit integers from `a` and `b` using control mask `imm8`.
259259
#[inline]
260260
#[target_feature(enable = "avx2")]
261-
#[cfg_attr(test, assert_instr(vpblendd, imm8 = 9))]
261+
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
262262
pub unsafe fn _mm256_blend_epi32(
263263
a: __m256i, b: __m256i, imm8: i32
264264
) -> __m256i {
@@ -1790,15 +1790,15 @@ pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
17901790
/// integers of `a`.
17911791
#[inline]
17921792
#[target_feature(enable = "avx2")]
1793-
#[cfg_attr(test, assert_instr(vpermd))]
1793+
#[cfg_attr(test, assert_instr(vpermps))]
17941794
pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
17951795
mem::transmute(permd(a.as_u32x8(), b.as_u32x8()))
17961796
}
17971797

17981798
/// Permutes 64-bit integers from `a` using control mask `imm8`.
17991799
#[inline]
18001800
#[target_feature(enable = "avx2")]
1801-
#[cfg_attr(test, assert_instr(vpermq, imm8 = 9))]
1801+
#[cfg_attr(test, assert_instr(vpermpd, imm8 = 9))]
18021802
pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i {
18031803
let imm8 = (imm8 & 0xFF) as u8;
18041804
let zero = _mm256_setzero_si256().as_i64x4();
@@ -2007,7 +2007,7 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
20072007
/// ```
20082008
#[inline]
20092009
#[target_feature(enable = "avx2")]
2010-
#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
2010+
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
20112011
pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i {
20122012
// simd_shuffleX requires that its selector parameter be made up of
20132013
// constant values, but we can't enforce that here. In spirit, we need
@@ -2762,7 +2762,7 @@ pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
27622762
/// ```
27632763
#[inline]
27642764
#[target_feature(enable = "avx2")]
2765-
#[cfg_attr(test, assert_instr(vpunpckhdq))]
2765+
#[cfg_attr(test, assert_instr(vunpckhps))]
27662766
pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
27672767
let r: i32x8 = simd_shuffle8(
27682768
a.as_i32x8(),
@@ -2802,7 +2802,7 @@ pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
28022802
/// ```
28032803
#[inline]
28042804
#[target_feature(enable = "avx2")]
2805-
#[cfg_attr(test, assert_instr(vpunpckldq))]
2805+
#[cfg_attr(test, assert_instr(vunpcklps))]
28062806
pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
28072807
let r: i32x8 =
28082808
simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
@@ -2839,7 +2839,7 @@ pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
28392839
/// ```
28402840
#[inline]
28412841
#[target_feature(enable = "avx2")]
2842-
#[cfg_attr(test, assert_instr(vpunpckhqdq))]
2842+
#[cfg_attr(test, assert_instr(vunpckhpd))]
28432843
pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
28442844
let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
28452845
mem::transmute(r)
@@ -2875,7 +2875,7 @@ pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
28752875
/// ```
28762876
#[inline]
28772877
#[target_feature(enable = "avx2")]
2878-
#[cfg_attr(test, assert_instr(vpunpcklqdq))]
2878+
#[cfg_attr(test, assert_instr(vunpcklpd))]
28792879
pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
28802880
let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
28812881
mem::transmute(r)

coresimd/src/x86/i586/sse.rs

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -832,8 +832,7 @@ pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
832832
/// lower half of result.
833833
#[inline]
834834
#[target_feature(enable = "sse")]
835-
#[cfg_attr(all(test, not(windows)), assert_instr(movhlps))]
836-
#[cfg_attr(all(test, windows), assert_instr(unpckhpd))]
835+
#[cfg_attr(test, assert_instr(movhlps))]
837836
pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
838837
// TODO; figure why this is a different instruction on Windows?
839838
simd_shuffle4(a, b, [6, 7, 2, 3])
@@ -843,8 +842,7 @@ pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
843842
/// higher half of result.
844843
#[inline]
845844
#[target_feature(enable = "sse")]
846-
#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(unpcklpd))]
847-
#[cfg_attr(all(test, not(target_feature = "sse2")), assert_instr(movlhps))]
845+
#[cfg_attr(test, assert_instr(movlhps))]
848846
pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
849847
simd_shuffle4(a, b, [0, 1, 4, 5])
850848
}
@@ -900,7 +898,7 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
900898
// 32-bit codegen does not generate `movhps` or `movhpd`, but instead
901899
// `movsd` followed by `unpcklpd` (or `movss'/`unpcklps` if there's no SSE2).
902900
#[cfg_attr(all(test, target_arch = "x86", target_feature = "sse2"),
903-
assert_instr(unpcklpd))]
901+
assert_instr(movlhps))]
904902
#[cfg_attr(all(test, target_arch = "x86", not(target_feature = "sse2")),
905903
assert_instr(unpcklps))]
906904
// TODO: This function is actually not limited to floats, but that's what
@@ -1095,13 +1093,8 @@ pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) {
10951093
#[inline]
10961094
#[target_feature(enable = "sse")]
10971095
// On i586 the codegen just generates plane MOVs. No need to test for that.
1098-
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2"),
1099-
not(target_family = "windows")),
1096+
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2")),
11001097
assert_instr(movlps))]
1101-
// Win64 passes `a` by reference, which causes it to generate two 64 bit moves.
1102-
#[cfg_attr(all(test, any(target_arch = "x86_64", target_feature = "sse2"),
1103-
target_family = "windows"),
1104-
assert_instr(movsd))]
11051098
pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) {
11061099
#[cfg(target_arch = "x86")]
11071100
{

coresimd/src/x86/i586/sse2.rs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i {
710710
/// Return the lowest element of `a`.
711711
#[inline]
712712
#[target_feature(enable = "sse2")]
713-
#[cfg_attr(all(test, not(windows)), assert_instr(movd))] // FIXME mov on windows
713+
#[cfg_attr(test, assert_instr(movd))]
714714
pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
715715
simd_extract(a.as_i32x4(), 0)
716716
}
@@ -1207,7 +1207,7 @@ pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
12071207
/// Unpack and interleave 32-bit integers from the high half of `a` and `b`.
12081208
#[inline]
12091209
#[target_feature(enable = "sse2")]
1210-
#[cfg_attr(test, assert_instr(punpckhdq))]
1210+
#[cfg_attr(test, assert_instr(unpckhps))]
12111211
pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
12121212
mem::transmute::<i32x4, _>(simd_shuffle4(
12131213
a.as_i32x4(),
@@ -1219,7 +1219,7 @@ pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
12191219
/// Unpack and interleave 64-bit integers from the high half of `a` and `b`.
12201220
#[inline]
12211221
#[target_feature(enable = "sse2")]
1222-
#[cfg_attr(test, assert_instr(punpckhqdq))]
1222+
#[cfg_attr(test, assert_instr(unpckhpd))]
12231223
pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
12241224
mem::transmute::<i64x2, _>(simd_shuffle2(
12251225
a.as_i64x2(),
@@ -1253,7 +1253,7 @@ pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
12531253
/// Unpack and interleave 32-bit integers from the low half of `a` and `b`.
12541254
#[inline]
12551255
#[target_feature(enable = "sse2")]
1256-
#[cfg_attr(test, assert_instr(punpckldq))]
1256+
#[cfg_attr(test, assert_instr(unpcklps))]
12571257
pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
12581258
mem::transmute::<i32x4, _>(simd_shuffle4(
12591259
a.as_i32x4(),
@@ -1265,7 +1265,7 @@ pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
12651265
/// Unpack and interleave 64-bit integers from the low half of `a` and `b`.
12661266
#[inline]
12671267
#[target_feature(enable = "sse2")]
1268-
#[cfg_attr(test, assert_instr(punpcklqdq))]
1268+
#[cfg_attr(test, assert_instr(movlhps))]
12691269
pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
12701270
mem::transmute::<i64x2, _>(simd_shuffle2(
12711271
a.as_i64x2(),
@@ -1795,7 +1795,6 @@ pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
17951795
/// Return the lower double-precision (64-bit) floating-point element of "a".
17961796
#[inline]
17971797
#[target_feature(enable = "sse2")]
1798-
#[cfg_attr(all(test, windows), assert_instr(movsd))] // FIXME movq/movlps/mov on other platform
17991798
pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 {
18001799
simd_extract(a, 0)
18011800
}
@@ -1953,7 +1952,7 @@ pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
19531952
/// memory location.
19541953
#[inline]
19551954
#[target_feature(enable = "sse2")]
1956-
#[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movsd only on windows
1955+
#[cfg_attr(test, assert_instr(movlps))]
19571956
pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
19581957
*mem_addr = simd_extract(a, 0)
19591958
}
@@ -2022,7 +2021,7 @@ pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
20222021
/// memory location.
20232022
#[inline]
20242023
#[target_feature(enable = "sse2")]
2025-
#[cfg_attr(all(test, not(windows)), assert_instr(movlps))] // FIXME movlpd (movsd on windows)
2024+
#[cfg_attr(test, assert_instr(movlps))] // FIXME movlpd
20262025
pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
20272026
*mem_addr = simd_extract(a, 0);
20282027
}
@@ -2179,7 +2178,7 @@ pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
21792178
/// * The [63:0] bits are copied from the [63:0] bits of the first input
21802179
#[inline]
21812180
#[target_feature(enable = "sse2")]
2182-
#[cfg_attr(test, assert_instr(unpcklpd))]
2181+
#[cfg_attr(test, assert_instr(movlhps))]
21832182
pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
21842183
simd_shuffle2(a, b, [0, 2])
21852184
}

coresimd/src/x86/i586/sse41.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 {
120120
#[inline]
121121
#[target_feature(enable = "sse4.1")]
122122
// TODO: Add test for Windows
123-
#[cfg_attr(all(test, not(windows)), assert_instr(extractps, imm8 = 0))]
123+
#[cfg_attr(test, assert_instr(extractps, imm8 = 0))]
124124
pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 {
125125
mem::transmute(simd_extract::<_, f32>(a, imm8 as u32 & 0b11))
126126
}
@@ -141,7 +141,7 @@ pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 {
141141
#[inline]
142142
#[target_feature(enable = "sse4.1")]
143143
// TODO: Add test for Windows
144-
#[cfg_attr(all(test, not(windows)), assert_instr(pextrd, imm8 = 1))]
144+
#[cfg_attr(test, assert_instr(extractps, imm8 = 1))]
145145
pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 {
146146
let imm8 = (imm8 & 3) as u32;
147147
simd_extract::<_, i32>(a.as_i32x4(), imm8)

coresimd/src/x86/x86_64/sse41.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use stdsimd_test::assert_instr;
1212
#[inline]
1313
#[target_feature(enable = "sse4.1")]
1414
// TODO: Add test for Windows
15-
#[cfg_attr(all(test, not(windows)), assert_instr(pextrq, imm8 = 1))]
15+
#[cfg_attr(test, assert_instr(pextrq, imm8 = 1))]
1616
pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 {
1717
let imm8 = (imm8 & 1) as u32;
1818
simd_extract(a.as_i64x2(), imm8)

stdsimd-test/assert-instr-macro/src/lib.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,17 @@ pub fn assert_instr(
8080
})
8181
.collect::<Vec<_>>();
8282
let attrs = Append(&attrs);
83+
84+
// Use an ABI on Windows that passes SIMD values in registers, like what
85+
// happens on Unix (I think?) by default.
86+
let abi = if cfg!(windows) {
87+
syn::LitStr::new("vectorcall", proc_macro2::Span::call_site())
88+
} else {
89+
syn::LitStr::new("C", proc_macro2::Span::call_site())
90+
};
8391
let to_test = quote! {
8492
#attrs
85-
unsafe extern fn #shim_name(#(#inputs),*) #ret {
93+
unsafe extern #abi fn #shim_name(#(#inputs),*) #ret {
8694
#name(#(#input_vals),*)
8795
}
8896
};

0 commit comments

Comments
 (0)