Skip to content

Commit 97742f0

Browse files
committed
LLVM7 generates different machine than LLVM6 for x86/x86_64 targets for some intrinsics. These are new optimizations
1 parent 5bd3f14 commit 97742f0

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

coresimd/x86/avx.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,10 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
524524
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_pd)
525525
#[inline]
526526
#[target_feature(enable = "avx")]
527-
#[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
527+
// Note: LLVM7 prefers single-precision blend instructions when
528+
// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194
529+
// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
530+
#[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
528531
#[rustc_args_required_const(2)]
529532
#[stable(feature = "simd_x86", since = "1.27.0")]
530533
pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {

coresimd/x86/sse41.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,10 @@ pub unsafe fn _mm_blendv_epi8(
8080
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16)
8181
#[inline]
8282
#[target_feature(enable = "sse4.1")]
83-
#[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
83+
// Note: LLVM7 prefers the single-precision floating-point domain when possible
84+
// see https://bugs.llvm.org/show_bug.cgi?id=38195
85+
// #[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
86+
#[cfg_attr(test, assert_instr(blendps, imm8 = 0xF0))]
8487
#[rustc_args_required_const(2)]
8588
#[stable(feature = "simd_x86", since = "1.27.0")]
8689
pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
@@ -124,7 +127,10 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
124127
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd)
125128
#[inline]
126129
#[target_feature(enable = "sse4.1")]
127-
#[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
130+
// Note: LLVM7 prefers the single-precision floating-point domain when possible
131+
// see https://bugs.llvm.org/show_bug.cgi?id=38195
132+
// #[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
133+
#[cfg_attr(test, assert_instr(blendps, imm2 = 0b10))]
128134
#[rustc_args_required_const(2)]
129135
#[stable(feature = "simd_x86", since = "1.27.0")]
130136
pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {

0 commit comments

Comments
 (0)