Skip to content

Commit 92917a5

Browse files
authored
Migrate a bunch of i586::sse2 to native types (rust-lang#273)
1 parent 6ef0ae2 commit 92917a5

File tree

9 files changed

+1550
-1508
lines changed

9 files changed

+1550
-1508
lines changed

coresimd/src/x86/i586/avx.rs

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ pub unsafe fn _mm256_extractf128_ps(a: f32x8, imm8: i32) -> __m128 {
843843
#[inline(always)]
844844
#[target_feature = "+avx"]
845845
#[cfg_attr(test, assert_instr(vextractf128))]
846-
pub unsafe fn _mm256_extractf128_pd(a: f64x4, imm8: i32) -> f64x2 {
846+
pub unsafe fn _mm256_extractf128_pd(a: f64x4, imm8: i32) -> __m128d {
847847
match imm8 & 1 {
848848
0 => simd_shuffle2(a, _mm256_undefined_pd(), [0, 1]),
849849
_ => simd_shuffle2(a, _mm256_undefined_pd(), [2, 3]),
@@ -1068,9 +1068,7 @@ pub unsafe fn _mm256_permute_pd(a: f64x4, imm8: i32) -> f64x4 {
10681068
#[inline(always)]
10691069
#[target_feature = "+avx,+sse2"]
10701070
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))]
1071-
pub unsafe fn _mm_permute_pd(a: f64x2, imm8: i32) -> f64x2 {
1072-
use x86::i586::sse2::_mm_undefined_pd;
1073-
1071+
pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d {
10741072
let imm8 = (imm8 & 0xFF) as u8;
10751073
macro_rules! shuffle2 {
10761074
($a:expr, $b:expr) => {
@@ -1194,7 +1192,7 @@ pub unsafe fn _mm256_insertf128_ps(a: f32x8, b: __m128, imm8: i32) -> f32x8 {
11941192
#[inline(always)]
11951193
#[target_feature = "+avx"]
11961194
#[cfg_attr(test, assert_instr(vinsertf128, imm8 = 1))]
1197-
pub unsafe fn _mm256_insertf128_pd(a: f64x4, b: f64x2, imm8: i32) -> f64x4 {
1195+
pub unsafe fn _mm256_insertf128_pd(a: f64x4, b: __m128d, imm8: i32) -> f64x4 {
11981196
match imm8 & 1 {
11991197
0 => simd_shuffle4(a, _mm256_castpd128_pd256(b), [4, 5, 2, 3]),
12001198
_ => simd_shuffle4(a, _mm256_castpd128_pd256(b), [0, 1, 4, 5]),
@@ -2139,7 +2137,7 @@ pub unsafe fn _mm256_castps256_ps128(a: f32x8) -> __m128 {
21392137
#[target_feature = "+avx"]
21402138
// This intrinsic is only used for compilation and does not generate any
21412139
// instructions, thus it has zero latency.
2142-
pub unsafe fn _mm256_castpd256_pd128(a: f64x4) -> f64x2 {
2140+
pub unsafe fn _mm256_castpd256_pd128(a: f64x4) -> __m128d {
21432141
simd_shuffle2(a, a, [0, 1])
21442142
}
21452143

@@ -2171,7 +2169,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> f32x8 {
21712169
#[target_feature = "+avx"]
21722170
// This intrinsic is only used for compilation and does not generate any
21732171
// instructions, thus it has zero latency.
2174-
pub unsafe fn _mm256_castpd128_pd256(a: f64x2) -> f64x4 {
2172+
pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> f64x4 {
21752173
// FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
21762174
simd_shuffle4(a, a, [0, 1, 0, 0])
21772175
}
@@ -2221,8 +2219,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
22212219
#[target_feature = "+avx,+sse2"]
22222220
// This intrinsic is only used for compilation and does not generate any
22232221
// instructions, thus it has zero latency.
2224-
pub unsafe fn _mm256_zextpd128_pd256(a: f64x2) -> f64x4 {
2225-
use x86::i586::sse2::_mm_setzero_pd;
2222+
pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> f64x4 {
22262223
simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3])
22272224
}
22282225

@@ -2326,7 +2323,6 @@ pub unsafe fn _mm256_loadu2_m128(
23262323
pub unsafe fn _mm256_loadu2_m128d(
23272324
hiaddr: *const f64, loaddr: *const f64
23282325
) -> f64x4 {
2329-
use x86::i586::sse2::_mm_loadu_pd;
23302326
let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
23312327
_mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1)
23322328
}
@@ -2371,7 +2367,6 @@ pub unsafe fn _mm256_storeu2_m128(
23712367
pub unsafe fn _mm256_storeu2_m128d(
23722368
hiaddr: *mut f64, loaddr: *mut f64, a: f64x4
23732369
) {
2374-
use x86::i586::sse2::_mm_storeu_pd;
23752370
let lo = _mm256_castpd256_pd128(a);
23762371
_mm_storeu_pd(loaddr, lo);
23772372
let hi = _mm256_extractf128_pd(a, 1);
@@ -3104,9 +3099,9 @@ mod tests {
31043099
#[simd_test = "avx"]
31053100
unsafe fn test_mm256_extractf128_pd() {
31063101
let a = f64x4::new(4., 3., 2., 5.);
3107-
let r = avx::_mm256_extractf128_pd(a, 0);
3108-
let e = f64x2::new(4., 3.);
3109-
assert_eq!(r, e);
3102+
let r = _mm256_extractf128_pd(a, 0);
3103+
let e = _mm_setr_pd(4., 3.);
3104+
assert_eq_m128d(r, e);
31103105
}
31113106

31123107
#[simd_test = "avx"]
@@ -3189,10 +3184,10 @@ mod tests {
31893184

31903185
#[simd_test = "avx"]
31913186
unsafe fn test_mm_permute_pd() {
3192-
let a = f64x2::new(4., 3.);
3193-
let r = avx::_mm_permute_pd(a, 1);
3194-
let e = f64x2::new(3., 4.);
3195-
assert_eq!(r, e);
3187+
let a = _mm_setr_pd(4., 3.);
3188+
let r = _mm_permute_pd(a, 1);
3189+
let e = _mm_setr_pd(3., 4.);
3190+
assert_eq_m128d(r, e);
31963191
}
31973192

31983193
#[simd_test = "avx"]
@@ -3271,8 +3266,8 @@ mod tests {
32713266
#[simd_test = "avx"]
32723267
unsafe fn test_mm256_insertf128_pd() {
32733268
let a = f64x4::new(1., 2., 3., 4.);
3274-
let b = f64x2::new(5., 6.);
3275-
let r = avx::_mm256_insertf128_pd(a, b, 0);
3269+
let b = _mm_setr_pd(5., 6.);
3270+
let r = _mm256_insertf128_pd(a, b, 0);
32763271
let e = f64x4::new(5., 6., 3., 4.);
32773272
assert_eq!(r, e);
32783273
}
@@ -4078,8 +4073,8 @@ mod tests {
40784073
#[simd_test = "avx"]
40794074
unsafe fn test_mm256_castpd256_pd128() {
40804075
let a = f64x4::new(1., 2., 3., 4.);
4081-
let r = avx::_mm256_castpd256_pd128(a);
4082-
assert_eq!(r, f64x2::new(1., 2.));
4076+
let r = _mm256_castpd256_pd128(a);
4077+
assert_eq_m128d(r, _mm_setr_pd(1., 2.));
40834078
}
40844079

40854080
#[simd_test = "avx"]
@@ -4107,8 +4102,8 @@ mod tests {
41074102

41084103
#[simd_test = "avx"]
41094104
unsafe fn test_mm256_zextpd128_pd256() {
4110-
let a = f64x2::new(1., 2.);
4111-
let r = avx::_mm256_zextpd128_pd256(a);
4105+
let a = _mm_setr_pd(1., 2.);
4106+
let r = _mm256_zextpd128_pd256(a);
41124107
let e = f64x4::new(1., 2., 0., 0.);
41134108
assert_eq!(r, e);
41144109
}
@@ -4271,8 +4266,8 @@ mod tests {
42714266
&mut lo as *mut _ as *mut f64,
42724267
a,
42734268
);
4274-
assert_eq!(hi, f64x2::new(3., 4.));
4275-
assert_eq!(lo, f64x2::new(1., 2.));
4269+
assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
4270+
assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
42764271
}
42774272

42784273
#[simd_test = "avx"]

coresimd/src/x86/i586/sse.rs

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1711,20 +1711,6 @@ mod tests {
17111711
use stdsimd_test::simd_test;
17121712
use test::black_box; // Used to inhibit constant-folding.
17131713

1714-
#[target_feature = "+sse"]
1715-
unsafe fn assert_eq_m128(a: __m128, b: __m128) {
1716-
let r = _mm_cmpeq_ps(a, b);
1717-
if _mm_movemask_ps(r) != 0b1111 {
1718-
panic!("{:?} != {:?}", a, b);
1719-
}
1720-
}
1721-
1722-
#[target_feature = "+sse"]
1723-
unsafe fn get_m128(a: __m128, idx: usize) -> f32 {
1724-
union A { a: __m128, b: [f32; 4] };
1725-
transmute::<__m128, A>(a).b[idx]
1726-
}
1727-
17281714
#[simd_test = "sse"]
17291715
unsafe fn test_mm_add_ps() {
17301716
let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);

0 commit comments

Comments
 (0)