@@ -1057,7 +1057,98 @@ pub unsafe fn _mm256_shuffle_epi8(a: u8x32, b: u8x32) -> u8x32 {
1057
1057
pshufb ( a, b)
1058
1058
}
1059
1059
1060
- // TODO _mm256_shuffle_epi32 (__m256i a, const int imm8)
1060
+ /// Shuffle 32-bit integers in 128-bit lanes of `a` using the control in `imm8`.
1061
+ ///
1062
+ /// ```rust
1063
+ /// # #![feature(cfg_target_feature)]
1064
+ /// # #![feature(target_feature)]
1065
+ /// #
1066
+ /// # #[macro_use] extern crate stdsimd;
1067
+ /// #
1068
+ /// # fn main() {
1069
+ /// # if cfg_feature_enabled!("avx2") {
1070
+ /// # #[target_feature = "+avx2"]
1071
+ /// # fn worker() {
1072
+ /// use stdsimd::simd::i32x8;
1073
+ /// use stdsimd::vendor::_mm256_shuffle_epi32;
1074
+ ///
1075
+ /// let a = i32x8::new(0, 1, 2, 3, 4, 5, 6, 7);
1076
+ ///
1077
+ /// let shuffle1 = 0b00_11_10_01;
1078
+ /// let shuffle2 = 0b01_00_10_11;
1079
+ ///
1080
+ /// let c1: i32x8; let c2: i32x8;
1081
+ /// unsafe {
1082
+ /// c1 = _mm256_shuffle_epi32(a, shuffle1);
1083
+ /// c2 = _mm256_shuffle_epi32(a, shuffle2);
1084
+ /// }
1085
+ ///
1086
+ /// let expected1 = i32x8::new(1, 2, 3, 0, 5, 6, 7, 4);
1087
+ /// let expected2 = i32x8::new(3, 2, 0, 1, 7, 6, 4, 5);
1088
+ ///
1089
+ /// assert_eq!(c1, expected1);
1090
+ /// assert_eq!(c2, expected2);
1091
+ /// # }
1092
+ /// # worker();
1093
+ /// # }
1094
+ /// # }
1095
+ /// ```
1096
+ #[ inline( always) ]
1097
+ #[ target_feature = "+avx2" ]
1098
+ #[ cfg_attr( test, assert_instr( vpshufd, imm8 = 9 ) ) ]
1099
+ pub unsafe fn _mm256_shuffle_epi32 ( a : i32x8 , imm8 : i32 ) -> i32x8 {
1100
+ // simd_shuffleX requires that its selector parameter be made up of
1101
+ // constant values, but we can't enforce that here. In spirit, we need
1102
+ // to write a `match` on all possible values of a byte, and for each value,
1103
+ // hard-code the correct `simd_shuffleX` call using only constants. We
1104
+ // then hope for LLVM to do the rest.
1105
+ //
1106
+ // Of course, that's... awful. So we try to use macros to do it for us.
1107
+ let imm8 = ( imm8 & 0xFF ) as u8 ;
1108
+
1109
+ macro_rules! shuffle_done {
1110
+ ( $x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
1111
+ simd_shuffle8( a, a, [ $x01, $x23, $x45, $x67, 4 +$x01, 4 +$x23, 4 +$x45, 4 +$x67] )
1112
+ }
1113
+ }
1114
+ macro_rules! shuffle_x67 {
1115
+ ( $x01: expr, $x23: expr, $x45: expr) => {
1116
+ match ( imm8 >> 6 ) & 0b11 {
1117
+ 0b00 => shuffle_done!( $x01, $x23, $x45, 0 ) ,
1118
+ 0b01 => shuffle_done!( $x01, $x23, $x45, 1 ) ,
1119
+ 0b10 => shuffle_done!( $x01, $x23, $x45, 2 ) ,
1120
+ _ => shuffle_done!( $x01, $x23, $x45, 3 ) ,
1121
+ }
1122
+ }
1123
+ }
1124
+ macro_rules! shuffle_x45 {
1125
+ ( $x01: expr, $x23: expr) => {
1126
+ match ( imm8 >> 4 ) & 0b11 {
1127
+ 0b00 => shuffle_x67!( $x01, $x23, 0 ) ,
1128
+ 0b01 => shuffle_x67!( $x01, $x23, 1 ) ,
1129
+ 0b10 => shuffle_x67!( $x01, $x23, 2 ) ,
1130
+ _ => shuffle_x67!( $x01, $x23, 3 ) ,
1131
+ }
1132
+ }
1133
+ }
1134
+ macro_rules! shuffle_x23 {
1135
+ ( $x01: expr) => {
1136
+ match ( imm8 >> 2 ) & 0b11 {
1137
+ 0b00 => shuffle_x45!( $x01, 0 ) ,
1138
+ 0b01 => shuffle_x45!( $x01, 1 ) ,
1139
+ 0b10 => shuffle_x45!( $x01, 2 ) ,
1140
+ _ => shuffle_x45!( $x01, 3 ) ,
1141
+ }
1142
+ }
1143
+ }
1144
+ match imm8 & 0b11 {
1145
+ 0b00 => shuffle_x23 ! ( 0 ) ,
1146
+ 0b01 => shuffle_x23 ! ( 1 ) ,
1147
+ 0b10 => shuffle_x23 ! ( 2 ) ,
1148
+ _ => shuffle_x23 ! ( 3 ) ,
1149
+ }
1150
+ }
1151
+
1061
1152
// TODO _mm256_shufflehi_epi16 (__m256i a, const int imm8)
1062
1153
// TODO _mm256_shufflelo_epi16 (__m256i a, const int imm8)
1063
1154
0 commit comments