@@ -843,7 +843,7 @@ pub unsafe fn _mm256_extractf128_ps(a: f32x8, imm8: i32) -> __m128 {
843
843
#[ inline( always) ]
844
844
#[ target_feature = "+avx" ]
845
845
#[ cfg_attr( test, assert_instr( vextractf128) ) ]
846
- pub unsafe fn _mm256_extractf128_pd ( a : f64x4 , imm8 : i32 ) -> f64x2 {
846
+ pub unsafe fn _mm256_extractf128_pd ( a : f64x4 , imm8 : i32 ) -> __m128d {
847
847
match imm8 & 1 {
848
848
0 => simd_shuffle2 ( a, _mm256_undefined_pd ( ) , [ 0 , 1 ] ) ,
849
849
_ => simd_shuffle2 ( a, _mm256_undefined_pd ( ) , [ 2 , 3 ] ) ,
@@ -1068,9 +1068,7 @@ pub unsafe fn _mm256_permute_pd(a: f64x4, imm8: i32) -> f64x4 {
1068
1068
#[ inline( always) ]
1069
1069
#[ target_feature = "+avx,+sse2" ]
1070
1070
#[ cfg_attr( test, assert_instr( vpermilpd, imm8 = 0x1 ) ) ]
1071
- pub unsafe fn _mm_permute_pd ( a : f64x2 , imm8 : i32 ) -> f64x2 {
1072
- use x86:: i586:: sse2:: _mm_undefined_pd;
1073
-
1071
+ pub unsafe fn _mm_permute_pd ( a : __m128d , imm8 : i32 ) -> __m128d {
1074
1072
let imm8 = ( imm8 & 0xFF ) as u8 ;
1075
1073
macro_rules! shuffle2 {
1076
1074
( $a: expr, $b: expr) => {
@@ -1194,7 +1192,7 @@ pub unsafe fn _mm256_insertf128_ps(a: f32x8, b: __m128, imm8: i32) -> f32x8 {
1194
1192
#[ inline( always) ]
1195
1193
#[ target_feature = "+avx" ]
1196
1194
#[ cfg_attr( test, assert_instr( vinsertf128, imm8 = 1 ) ) ]
1197
- pub unsafe fn _mm256_insertf128_pd ( a : f64x4 , b : f64x2 , imm8 : i32 ) -> f64x4 {
1195
+ pub unsafe fn _mm256_insertf128_pd ( a : f64x4 , b : __m128d , imm8 : i32 ) -> f64x4 {
1198
1196
match imm8 & 1 {
1199
1197
0 => simd_shuffle4 ( a, _mm256_castpd128_pd256 ( b) , [ 4 , 5 , 2 , 3 ] ) ,
1200
1198
_ => simd_shuffle4 ( a, _mm256_castpd128_pd256 ( b) , [ 0 , 1 , 4 , 5 ] ) ,
@@ -2139,7 +2137,7 @@ pub unsafe fn _mm256_castps256_ps128(a: f32x8) -> __m128 {
2139
2137
#[ target_feature = "+avx" ]
2140
2138
// This intrinsic is only used for compilation and does not generate any
2141
2139
// instructions, thus it has zero latency.
2142
- pub unsafe fn _mm256_castpd256_pd128 ( a : f64x4 ) -> f64x2 {
2140
+ pub unsafe fn _mm256_castpd256_pd128 ( a : f64x4 ) -> __m128d {
2143
2141
simd_shuffle2 ( a, a, [ 0 , 1 ] )
2144
2142
}
2145
2143
@@ -2171,7 +2169,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> f32x8 {
2171
2169
#[ target_feature = "+avx" ]
2172
2170
// This intrinsic is only used for compilation and does not generate any
2173
2171
// instructions, thus it has zero latency.
2174
- pub unsafe fn _mm256_castpd128_pd256 ( a : f64x2 ) -> f64x4 {
2172
+ pub unsafe fn _mm256_castpd128_pd256 ( a : __m128d ) -> f64x4 {
2175
2173
// FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
2176
2174
simd_shuffle4 ( a, a, [ 0 , 1 , 0 , 0 ] )
2177
2175
}
@@ -2221,8 +2219,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2221
2219
#[ target_feature = "+avx,+sse2" ]
2222
2220
// This intrinsic is only used for compilation and does not generate any
2223
2221
// instructions, thus it has zero latency.
2224
- pub unsafe fn _mm256_zextpd128_pd256 ( a : f64x2 ) -> f64x4 {
2225
- use x86:: i586:: sse2:: _mm_setzero_pd;
2222
+ pub unsafe fn _mm256_zextpd128_pd256 ( a : __m128d ) -> f64x4 {
2226
2223
simd_shuffle4 ( a, _mm_setzero_pd ( ) , [ 0 , 1 , 2 , 3 ] )
2227
2224
}
2228
2225
@@ -2326,7 +2323,6 @@ pub unsafe fn _mm256_loadu2_m128(
2326
2323
pub unsafe fn _mm256_loadu2_m128d (
2327
2324
hiaddr : * const f64 , loaddr : * const f64
2328
2325
) -> f64x4 {
2329
- use x86:: i586:: sse2:: _mm_loadu_pd;
2330
2326
let a = _mm256_castpd128_pd256 ( _mm_loadu_pd ( loaddr) ) ;
2331
2327
_mm256_insertf128_pd ( a, _mm_loadu_pd ( hiaddr) , 1 )
2332
2328
}
@@ -2371,7 +2367,6 @@ pub unsafe fn _mm256_storeu2_m128(
2371
2367
pub unsafe fn _mm256_storeu2_m128d (
2372
2368
hiaddr : * mut f64 , loaddr : * mut f64 , a : f64x4
2373
2369
) {
2374
- use x86:: i586:: sse2:: _mm_storeu_pd;
2375
2370
let lo = _mm256_castpd256_pd128 ( a) ;
2376
2371
_mm_storeu_pd ( loaddr, lo) ;
2377
2372
let hi = _mm256_extractf128_pd ( a, 1 ) ;
@@ -3104,9 +3099,9 @@ mod tests {
3104
3099
#[ simd_test = "avx" ]
3105
3100
unsafe fn test_mm256_extractf128_pd ( ) {
3106
3101
let a = f64x4:: new ( 4. , 3. , 2. , 5. ) ;
3107
- let r = avx :: _mm256_extractf128_pd ( a, 0 ) ;
3108
- let e = f64x2 :: new ( 4. , 3. ) ;
3109
- assert_eq ! ( r, e) ;
3102
+ let r = _mm256_extractf128_pd ( a, 0 ) ;
3103
+ let e = _mm_setr_pd ( 4. , 3. ) ;
3104
+ assert_eq_m128d ( r, e) ;
3110
3105
}
3111
3106
3112
3107
#[ simd_test = "avx" ]
@@ -3189,10 +3184,10 @@ mod tests {
3189
3184
3190
3185
#[ simd_test = "avx" ]
3191
3186
unsafe fn test_mm_permute_pd ( ) {
3192
- let a = f64x2 :: new ( 4. , 3. ) ;
3193
- let r = avx :: _mm_permute_pd ( a, 1 ) ;
3194
- let e = f64x2 :: new ( 3. , 4. ) ;
3195
- assert_eq ! ( r, e) ;
3187
+ let a = _mm_setr_pd ( 4. , 3. ) ;
3188
+ let r = _mm_permute_pd ( a, 1 ) ;
3189
+ let e = _mm_setr_pd ( 3. , 4. ) ;
3190
+ assert_eq_m128d ( r, e) ;
3196
3191
}
3197
3192
3198
3193
#[ simd_test = "avx" ]
@@ -3271,8 +3266,8 @@ mod tests {
3271
3266
#[ simd_test = "avx" ]
3272
3267
unsafe fn test_mm256_insertf128_pd ( ) {
3273
3268
let a = f64x4:: new ( 1. , 2. , 3. , 4. ) ;
3274
- let b = f64x2 :: new ( 5. , 6. ) ;
3275
- let r = avx :: _mm256_insertf128_pd ( a, b, 0 ) ;
3269
+ let b = _mm_setr_pd ( 5. , 6. ) ;
3270
+ let r = _mm256_insertf128_pd ( a, b, 0 ) ;
3276
3271
let e = f64x4:: new ( 5. , 6. , 3. , 4. ) ;
3277
3272
assert_eq ! ( r, e) ;
3278
3273
}
@@ -4078,8 +4073,8 @@ mod tests {
4078
4073
#[ simd_test = "avx" ]
4079
4074
unsafe fn test_mm256_castpd256_pd128 ( ) {
4080
4075
let a = f64x4:: new ( 1. , 2. , 3. , 4. ) ;
4081
- let r = avx :: _mm256_castpd256_pd128 ( a) ;
4082
- assert_eq ! ( r, f64x2 :: new ( 1. , 2. ) ) ;
4076
+ let r = _mm256_castpd256_pd128 ( a) ;
4077
+ assert_eq_m128d ( r, _mm_setr_pd ( 1. , 2. ) ) ;
4083
4078
}
4084
4079
4085
4080
#[ simd_test = "avx" ]
@@ -4107,8 +4102,8 @@ mod tests {
4107
4102
4108
4103
#[ simd_test = "avx" ]
4109
4104
unsafe fn test_mm256_zextpd128_pd256 ( ) {
4110
- let a = f64x2 :: new ( 1. , 2. ) ;
4111
- let r = avx :: _mm256_zextpd128_pd256 ( a) ;
4105
+ let a = _mm_setr_pd ( 1. , 2. ) ;
4106
+ let r = _mm256_zextpd128_pd256 ( a) ;
4112
4107
let e = f64x4:: new ( 1. , 2. , 0. , 0. ) ;
4113
4108
assert_eq ! ( r, e) ;
4114
4109
}
@@ -4271,8 +4266,8 @@ mod tests {
4271
4266
& mut lo as * mut _ as * mut f64 ,
4272
4267
a,
4273
4268
) ;
4274
- assert_eq ! ( hi, f64x2 :: new ( 3. , 4. ) ) ;
4275
- assert_eq ! ( lo, f64x2 :: new ( 1. , 2. ) ) ;
4269
+ assert_eq_m128d ( hi, _mm_setr_pd ( 3. , 4. ) ) ;
4270
+ assert_eq_m128d ( lo, _mm_setr_pd ( 1. , 2. ) ) ;
4276
4271
}
4277
4272
4278
4273
#[ simd_test = "avx" ]
0 commit comments