@@ -1639,7 +1639,7 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
1639
1639
#[ cfg_attr( test, assert_instr( vmovshdup) ) ]
1640
1640
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1641
1641
pub unsafe fn _mm256_movehdup_ps ( a : __m256 ) -> __m256 {
1642
- simd_shuffle8 ( a, a, [ 1 , 1 , 3 , 3 , 5 , 5 , 7 , 7 ] )
1642
+ simd_shuffle8 ! ( a, a, [ 1 , 1 , 3 , 3 , 5 , 5 , 7 , 7 ] )
1643
1643
}
1644
1644
1645
1645
/// Duplicate even-indexed single-precision (32-bit) floating-point elements
@@ -1651,7 +1651,7 @@ pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1651
1651
#[ cfg_attr( test, assert_instr( vmovsldup) ) ]
1652
1652
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1653
1653
pub unsafe fn _mm256_moveldup_ps ( a : __m256 ) -> __m256 {
1654
- simd_shuffle8 ( a, a, [ 0 , 0 , 2 , 2 , 4 , 4 , 6 , 6 ] )
1654
+ simd_shuffle8 ! ( a, a, [ 0 , 0 , 2 , 2 , 4 , 4 , 6 , 6 ] )
1655
1655
}
1656
1656
1657
1657
/// Duplicate even-indexed double-precision (64-bit) floating-point elements
@@ -1663,7 +1663,7 @@ pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1663
1663
#[ cfg_attr( test, assert_instr( vmovddup) ) ]
1664
1664
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1665
1665
pub unsafe fn _mm256_movedup_pd ( a : __m256d ) -> __m256d {
1666
- simd_shuffle4 ( a, a, [ 0 , 0 , 2 , 2 ] )
1666
+ simd_shuffle4 ! ( a, a, [ 0 , 0 , 2 , 2 ] )
1667
1667
}
1668
1668
1669
1669
/// Loads 256-bits of integer data from unaligned memory into result.
@@ -1756,7 +1756,7 @@ pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
1756
1756
#[ cfg_attr( test, assert_instr( vunpckhpd) ) ]
1757
1757
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1758
1758
pub unsafe fn _mm256_unpackhi_pd ( a : __m256d , b : __m256d ) -> __m256d {
1759
- simd_shuffle4 ( a, b, [ 1 , 5 , 3 , 7 ] )
1759
+ simd_shuffle4 ! ( a, b, [ 1 , 5 , 3 , 7 ] )
1760
1760
}
1761
1761
1762
1762
/// Unpacks and interleave single-precision (32-bit) floating-point elements
@@ -1768,7 +1768,7 @@ pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
1768
1768
#[ cfg_attr( test, assert_instr( vunpckhps) ) ]
1769
1769
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1770
1770
pub unsafe fn _mm256_unpackhi_ps ( a : __m256 , b : __m256 ) -> __m256 {
1771
- simd_shuffle8 ( a, b, [ 2 , 10 , 3 , 11 , 6 , 14 , 7 , 15 ] )
1771
+ simd_shuffle8 ! ( a, b, [ 2 , 10 , 3 , 11 , 6 , 14 , 7 , 15 ] )
1772
1772
}
1773
1773
1774
1774
/// Unpacks and interleave double-precision (64-bit) floating-point elements
@@ -1780,7 +1780,7 @@ pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
1780
1780
#[ cfg_attr( test, assert_instr( vunpcklpd) ) ]
1781
1781
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1782
1782
pub unsafe fn _mm256_unpacklo_pd ( a : __m256d , b : __m256d ) -> __m256d {
1783
- simd_shuffle4 ( a, b, [ 0 , 4 , 2 , 6 ] )
1783
+ simd_shuffle4 ! ( a, b, [ 0 , 4 , 2 , 6 ] )
1784
1784
}
1785
1785
1786
1786
/// Unpacks and interleave single-precision (32-bit) floating-point elements
@@ -1792,7 +1792,7 @@ pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
1792
1792
#[ cfg_attr( test, assert_instr( vunpcklps) ) ]
1793
1793
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1794
1794
pub unsafe fn _mm256_unpacklo_ps ( a : __m256 , b : __m256 ) -> __m256 {
1795
- simd_shuffle8 ( a, b, [ 0 , 8 , 1 , 9 , 4 , 12 , 5 , 13 ] )
1795
+ simd_shuffle8 ! ( a, b, [ 0 , 8 , 1 , 9 , 4 , 12 , 5 , 13 ] )
1796
1796
}
1797
1797
1798
1798
/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
@@ -2584,7 +2584,7 @@ pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
2584
2584
// instructions, thus it has zero latency.
2585
2585
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2586
2586
pub unsafe fn _mm256_castpd256_pd128 ( a : __m256d ) -> __m128d {
2587
- simd_shuffle2 ( a, a, [ 0 , 1 ] )
2587
+ simd_shuffle2 ! ( a, a, [ 0 , 1 ] )
2588
2588
}
2589
2589
2590
2590
/// Casts vector of type __m256i to type __m128i.
@@ -2597,7 +2597,7 @@ pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2597
2597
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2598
2598
pub unsafe fn _mm256_castsi256_si128 ( a : __m256i ) -> __m128i {
2599
2599
let a = a. as_i64x4 ( ) ;
2600
- let dst: i64x2 = simd_shuffle2 ( a, a, [ 0 , 1 ] ) ;
2600
+ let dst: i64x2 = simd_shuffle2 ! ( a, a, [ 0 , 1 ] ) ;
2601
2601
transmute ( dst)
2602
2602
}
2603
2603
@@ -2612,7 +2612,7 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2612
2612
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2613
2613
pub unsafe fn _mm256_castps128_ps256 ( a : __m128 ) -> __m256 {
2614
2614
// FIXME simd_shuffle8(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
2615
- simd_shuffle8 ( a, a, [ 0 , 1 , 2 , 3 , 0 , 0 , 0 , 0 ] )
2615
+ simd_shuffle8 ! ( a, a, [ 0 , 1 , 2 , 3 , 0 , 0 , 0 , 0 ] )
2616
2616
}
2617
2617
2618
2618
/// Casts vector of type __m128d to type __m256d;
@@ -2626,7 +2626,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2626
2626
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2627
2627
pub unsafe fn _mm256_castpd128_pd256 ( a : __m128d ) -> __m256d {
2628
2628
// FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
2629
- simd_shuffle4 ( a, a, [ 0 , 1 , 0 , 0 ] )
2629
+ simd_shuffle4 ! ( a, a, [ 0 , 1 , 0 , 0 ] )
2630
2630
}
2631
2631
2632
2632
/// Casts vector of type __m128i to type __m256i;
@@ -2641,7 +2641,7 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2641
2641
pub unsafe fn _mm256_castsi128_si256 ( a : __m128i ) -> __m256i {
2642
2642
let a = a. as_i64x2 ( ) ;
2643
2643
// FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
2644
- let dst: i64x4 = simd_shuffle4 ( a, a, [ 0 , 1 , 0 , 0 ] ) ;
2644
+ let dst: i64x4 = simd_shuffle4 ! ( a, a, [ 0 , 1 , 0 , 0 ] ) ;
2645
2645
transmute ( dst)
2646
2646
}
2647
2647
@@ -2656,7 +2656,7 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
2656
2656
// instructions, thus it has zero latency.
2657
2657
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2658
2658
pub unsafe fn _mm256_zextps128_ps256 ( a : __m128 ) -> __m256 {
2659
- simd_shuffle8 ( a, _mm_setzero_ps ( ) , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] )
2659
+ simd_shuffle8 ! ( a, _mm_setzero_ps( ) , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] )
2660
2660
}
2661
2661
2662
2662
/// Constructs a 256-bit integer vector from a 128-bit integer vector.
@@ -2671,7 +2671,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
2671
2671
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2672
2672
pub unsafe fn _mm256_zextsi128_si256 ( a : __m128i ) -> __m256i {
2673
2673
let b = _mm_setzero_si128 ( ) . as_i64x2 ( ) ;
2674
- let dst: i64x4 = simd_shuffle4 ( a. as_i64x2 ( ) , b, [ 0 , 1 , 2 , 3 ] ) ;
2674
+ let dst: i64x4 = simd_shuffle4 ! ( a. as_i64x2( ) , b, [ 0 , 1 , 2 , 3 ] ) ;
2675
2675
transmute ( dst)
2676
2676
}
2677
2677
@@ -2687,7 +2687,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2687
2687
// instructions, thus it has zero latency.
2688
2688
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2689
2689
pub unsafe fn _mm256_zextpd128_pd256 ( a : __m128d ) -> __m256d {
2690
- simd_shuffle4 ( a, _mm_setzero_pd ( ) , [ 0 , 1 , 2 , 3 ] )
2690
+ simd_shuffle4 ! ( a, _mm_setzero_pd( ) , [ 0 , 1 , 2 , 3 ] )
2691
2691
}
2692
2692
2693
2693
/// Returns vector of type `__m256` with undefined elements.
@@ -2732,7 +2732,7 @@ pub unsafe fn _mm256_undefined_si256() -> __m256i {
2732
2732
#[ cfg_attr( test, assert_instr( vinsertf128) ) ]
2733
2733
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2734
2734
pub unsafe fn _mm256_set_m128 ( hi : __m128 , lo : __m128 ) -> __m256 {
2735
- simd_shuffle8 ( lo, hi, [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] )
2735
+ simd_shuffle8 ! ( lo, hi, [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] )
2736
2736
}
2737
2737
2738
2738
/// Sets packed __m256d returned vector with the supplied values.
0 commit comments