Skip to content

Commit a38077c

Browse files
committed
WIP: const-ify shuffle arguments
1 parent 6c4f4e1 commit a38077c

File tree

6 files changed

+197
-96
lines changed

6 files changed

+197
-96
lines changed

crates/core_arch/src/macros.rs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,103 @@ macro_rules! types {
9292
pub struct $name($($fields)*);
9393
)*)
9494
}
95+
96+
#[allow(unused_macros)]
97+
macro_rules! simd_shuffle2 {
98+
($x:expr, $y:expr, $idx:expr $(,)?) => {{
99+
const IDX: [u32; 2] = $idx;
100+
simd_shuffle2($x, $y, IDX)
101+
}}
102+
}
103+
104+
#[allow(unused_macros)]
105+
macro_rules! simd_shuffle2_param {
106+
($x:expr, $y:expr, <const $imm:ident : $ty:ty> $idx:expr $(,)?) => {{
107+
struct ConstParam<const $imm: $ty>;
108+
impl<const $imm: $ty> ConstParam<$imm> {
109+
const IDX: [u32; 2] = $idx;
110+
}
111+
112+
simd_shuffle2($x, $y, ConstParam::<$imm>::IDX)
113+
}}
114+
}
115+
116+
#[allow(unused_macros)]
117+
macro_rules! simd_shuffle4 {
118+
($x:expr, $y:expr, $idx:expr $(,)?) => {{
119+
const IDX: [u32; 4] = $idx;
120+
simd_shuffle4($x, $y, IDX)
121+
}}
122+
}
123+
124+
#[allow(unused_macros)]
125+
macro_rules! simd_shuffle4_param {
126+
($x:expr, $y:expr, <const $imm:ident : $ty:ty> $idx:expr $(,)?) => {{
127+
struct ConstParam<const $imm: $ty>;
128+
impl<const $imm: $ty> ConstParam<$imm> {
129+
const IDX: [u32; 4] = $idx;
130+
}
131+
132+
simd_shuffle4($x, $y, ConstParam::<$imm>::IDX)
133+
}}
134+
}
135+
136+
#[allow(unused_macros)]
137+
macro_rules! simd_shuffle8 {
138+
($x:expr, $y:expr, $idx:expr $(,)?) => {{
139+
const IDX: [u32; 8] = $idx;
140+
simd_shuffle8($x, $y, IDX)
141+
}}
142+
}
143+
144+
#[allow(unused_macros)]
145+
macro_rules! simd_shuffle8_param {
146+
($x:expr, $y:expr, <const $imm:ident : $ty:ty> $idx:expr $(,)?) => {{
147+
struct ConstParam<const $imm: $ty>;
148+
impl<const $imm: $ty> ConstParam<$imm> {
149+
const IDX: [u32; 8] = $idx;
150+
}
151+
152+
simd_shuffle8($x, $y, ConstParam::<$imm>::IDX)
153+
}}
154+
}
155+
156+
#[allow(unused_macros)]
157+
macro_rules! simd_shuffle16 {
158+
($x:expr, $y:expr, $idx:expr $(,)?) => {{
159+
const IDX: [u32; 16] = $idx;
160+
simd_shuffle16($x, $y, IDX)
161+
}}
162+
}
163+
164+
#[allow(unused_macros)]
165+
macro_rules! simd_shuffle16_param {
166+
($x:expr, $y:expr, <const $imm:ident : $ty:ty> $idx:expr $(,)?) => {{
167+
struct ConstParam<const $imm: $ty>;
168+
impl<const $imm: $ty> ConstParam<$imm> {
169+
const IDX: [u32; 16] = $idx;
170+
}
171+
172+
simd_shuffle16($x, $y, ConstParam::<$imm>::IDX)
173+
}}
174+
}
175+
176+
#[allow(unused_macros)]
177+
macro_rules! simd_shuffle32 {
178+
($x:expr, $y:expr, $idx:expr $(,)?) => {{
179+
const IDX: [u32; 32] = $idx;
180+
simd_shuffle32($x, $y, IDX)
181+
}}
182+
}
183+
184+
#[allow(unused_macros)]
185+
macro_rules! simd_shuffle32_param {
186+
($x:expr, $y:expr, <const $imm:ident : $ty:ty> $idx:expr $(,)?) => {{
187+
struct ConstParam<const $imm: $ty>;
188+
impl<const $imm: $ty> ConstParam<$imm> {
189+
const IDX: [u32; 32] = $idx;
190+
}
191+
192+
simd_shuffle32($x, $y, ConstParam::<$imm>::IDX)
193+
}}
194+
}

crates/core_arch/src/x86/avx.rs

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,7 +1639,7 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
16391639
#[cfg_attr(test, assert_instr(vmovshdup))]
16401640
#[stable(feature = "simd_x86", since = "1.27.0")]
16411641
pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1642-
simd_shuffle8(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
1642+
simd_shuffle8!(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
16431643
}
16441644

16451645
/// Duplicate even-indexed single-precision (32-bit) floating-point elements
@@ -1651,7 +1651,7 @@ pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
16511651
#[cfg_attr(test, assert_instr(vmovsldup))]
16521652
#[stable(feature = "simd_x86", since = "1.27.0")]
16531653
pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1654-
simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
1654+
simd_shuffle8!(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
16551655
}
16561656

16571657
/// Duplicate even-indexed double-precision (64-bit) floating-point elements
@@ -1663,7 +1663,7 @@ pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
16631663
#[cfg_attr(test, assert_instr(vmovddup))]
16641664
#[stable(feature = "simd_x86", since = "1.27.0")]
16651665
pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d {
1666-
simd_shuffle4(a, a, [0, 0, 2, 2])
1666+
simd_shuffle4!(a, a, [0, 0, 2, 2])
16671667
}
16681668

16691669
/// Loads 256-bits of integer data from unaligned memory into result.
@@ -1756,7 +1756,7 @@ pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
17561756
#[cfg_attr(test, assert_instr(vunpckhpd))]
17571757
#[stable(feature = "simd_x86", since = "1.27.0")]
17581758
pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
1759-
simd_shuffle4(a, b, [1, 5, 3, 7])
1759+
simd_shuffle4!(a, b, [1, 5, 3, 7])
17601760
}
17611761

17621762
/// Unpacks and interleave single-precision (32-bit) floating-point elements
@@ -1768,7 +1768,7 @@ pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
17681768
#[cfg_attr(test, assert_instr(vunpckhps))]
17691769
#[stable(feature = "simd_x86", since = "1.27.0")]
17701770
pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
1771-
simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
1771+
simd_shuffle8!(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
17721772
}
17731773

17741774
/// Unpacks and interleave double-precision (64-bit) floating-point elements
@@ -1780,7 +1780,7 @@ pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
17801780
#[cfg_attr(test, assert_instr(vunpcklpd))]
17811781
#[stable(feature = "simd_x86", since = "1.27.0")]
17821782
pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
1783-
simd_shuffle4(a, b, [0, 4, 2, 6])
1783+
simd_shuffle4!(a, b, [0, 4, 2, 6])
17841784
}
17851785

17861786
/// Unpacks and interleave single-precision (32-bit) floating-point elements
@@ -1792,7 +1792,7 @@ pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
17921792
#[cfg_attr(test, assert_instr(vunpcklps))]
17931793
#[stable(feature = "simd_x86", since = "1.27.0")]
17941794
pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
1795-
simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
1795+
simd_shuffle8!(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
17961796
}
17971797

17981798
/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
@@ -2584,7 +2584,7 @@ pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
25842584
// instructions, thus it has zero latency.
25852585
#[stable(feature = "simd_x86", since = "1.27.0")]
25862586
pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2587-
simd_shuffle2(a, a, [0, 1])
2587+
simd_shuffle2!(a, a, [0, 1])
25882588
}
25892589

25902590
/// Casts vector of type __m256i to type __m128i.
@@ -2597,7 +2597,7 @@ pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
25972597
#[stable(feature = "simd_x86", since = "1.27.0")]
25982598
pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
25992599
let a = a.as_i64x4();
2600-
let dst: i64x2 = simd_shuffle2(a, a, [0, 1]);
2600+
let dst: i64x2 = simd_shuffle2!(a, a, [0, 1]);
26012601
transmute(dst)
26022602
}
26032603

@@ -2612,7 +2612,7 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
26122612
#[stable(feature = "simd_x86", since = "1.27.0")]
26132613
pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
26142614
// FIXME simd_shuffle8(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
2615-
simd_shuffle8(a, a, [0, 1, 2, 3, 0, 0, 0, 0])
2615+
simd_shuffle8!(a, a, [0, 1, 2, 3, 0, 0, 0, 0])
26162616
}
26172617

26182618
/// Casts vector of type __m128d to type __m256d;
@@ -2626,7 +2626,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
26262626
#[stable(feature = "simd_x86", since = "1.27.0")]
26272627
pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
26282628
// FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
2629-
simd_shuffle4(a, a, [0, 1, 0, 0])
2629+
simd_shuffle4!(a, a, [0, 1, 0, 0])
26302630
}
26312631

26322632
/// Casts vector of type __m128i to type __m256i;
@@ -2641,7 +2641,7 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
26412641
pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
26422642
let a = a.as_i64x2();
26432643
// FIXME simd_shuffle4(a, a, [0, 1, -1, -1])
2644-
let dst: i64x4 = simd_shuffle4(a, a, [0, 1, 0, 0]);
2644+
let dst: i64x4 = simd_shuffle4!(a, a, [0, 1, 0, 0]);
26452645
transmute(dst)
26462646
}
26472647

@@ -2656,7 +2656,7 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
26562656
// instructions, thus it has zero latency.
26572657
#[stable(feature = "simd_x86", since = "1.27.0")]
26582658
pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
2659-
simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
2659+
simd_shuffle8!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
26602660
}
26612661

26622662
/// Constructs a 256-bit integer vector from a 128-bit integer vector.
@@ -2671,7 +2671,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
26712671
#[stable(feature = "simd_x86", since = "1.27.0")]
26722672
pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
26732673
let b = _mm_setzero_si128().as_i64x2();
2674-
let dst: i64x4 = simd_shuffle4(a.as_i64x2(), b, [0, 1, 2, 3]);
2674+
let dst: i64x4 = simd_shuffle4!(a.as_i64x2(), b, [0, 1, 2, 3]);
26752675
transmute(dst)
26762676
}
26772677

@@ -2687,7 +2687,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
26872687
// instructions, thus it has zero latency.
26882688
#[stable(feature = "simd_x86", since = "1.27.0")]
26892689
pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
2690-
simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3])
2690+
simd_shuffle4!(a, _mm_setzero_pd(), [0, 1, 2, 3])
26912691
}
26922692

26932693
/// Returns vector of type `__m256` with undefined elements.
@@ -2732,7 +2732,7 @@ pub unsafe fn _mm256_undefined_si256() -> __m256i {
27322732
#[cfg_attr(test, assert_instr(vinsertf128))]
27332733
#[stable(feature = "simd_x86", since = "1.27.0")]
27342734
pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
2735-
simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
2735+
simd_shuffle8!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
27362736
}
27372737

27382738
/// Sets packed __m256d returned vector with the supplied values.

0 commit comments

Comments
 (0)