Skip to content

Commit d1550b0

Browse files
committed
Add feature detection for new amx variants and movrs
1 parent d45f31e commit d1550b0

File tree

3 files changed

+53
-10
lines changed

3 files changed

+53
-10
lines changed

crates/std_detect/src/detect/arch/x86.rs

+19-2
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,7 @@ features! {
177177
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi: "avx512vbmi";
178178
/// AVX-512 VBMI (Vector Byte Manipulation Instructions)
179179
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpopcntdq: "avx512vpopcntdq";
180-
/// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
181-
/// Quadword)
180+
/// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and Quadword)
182181
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2";
183182
/// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities)
184183
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] gfni: "gfni";
@@ -217,6 +216,21 @@ features! {
217216
/// AMX-FP16 (Float16 Operations)
218217
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex";
219218
/// AMX-COMPLEX (Complex number Operations)
219+
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_avx512: "amx-avx512";
220+
without cfg check: true;
221+
/// AMX-AVX512 (AVX512 operations extended to matrices)
222+
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp8: "amx-fp8";
223+
without cfg check: true;
224+
/// AMX-FP8 (Float8 Operations)
225+
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_movrs: "amx-movrs";
226+
without cfg check: true;
227+
/// AMX-MOVRS (Matrix MOVERS operations)
228+
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32";
229+
without cfg check: true;
230+
/// AMX-TF32 (TensorFloat32 Operations)
231+
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose";
232+
without cfg check: true;
233+
/// AMX-TRANSPOSE (Matrix Transpose Operations)
220234
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
221235
/// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
222236
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";
@@ -253,6 +267,9 @@ features! {
253267
/// RTM, Intel (Restricted Transactional Memory)
254268
@FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe";
255269
/// MOVBE (Move Data After Swapping Bytes)
270+
@FEATURE: #[unstable(feature = "movrs_target_feature", issue = "137976")] movrs: "movrs";
271+
without cfg check: true;
272+
/// MOVRS (Move data with the read-shared hint)
256273
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb";
257274
/// ERMSB, Enhanced REP MOVSB and STOSB
258275
@FEATURE: #[unstable(feature = "xop_target_feature", issue = "127208")] xop: "xop";

crates/std_detect/src/detect/os/x86.rs

+23-7
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
141141

142142
enable(extended_features_ebx, 9, Feature::ermsb);
143143

144+
enable(extended_features_eax_leaf_1, 31, Feature::movrs);
145+
144146
// Detect if CPUID.19h available
145147
if bit::test(extended_features_ecx as usize, 23) {
146148
let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) };
@@ -250,14 +252,28 @@ pub(crate) fn detect_features() -> cache::Initializer {
250252
enable(extended_features_edx, 8, Feature::avx512vp2intersect);
251253
enable(extended_features_edx, 23, Feature::avx512fp16);
252254
enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16);
255+
}
256+
}
253257

254-
if os_amx_support {
255-
enable(extended_features_edx, 24, Feature::amx_tile);
256-
enable(extended_features_edx, 25, Feature::amx_int8);
257-
enable(extended_features_edx, 22, Feature::amx_bf16);
258-
enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
259-
enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
260-
}
258+
if os_amx_support {
259+
enable(extended_features_edx, 24, Feature::amx_tile);
260+
enable(extended_features_edx, 25, Feature::amx_int8);
261+
enable(extended_features_edx, 22, Feature::amx_bf16);
262+
enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
263+
enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
264+
265+
if max_basic_leaf >= 0x1e {
266+
let CpuidResult {
267+
eax: amx_feature_flags_eax,
268+
..
269+
} = unsafe { __cpuid_count(0x1e_u32, 1) };
270+
271+
// TODO: This sub-leaf also supports detection of older AMX variants, so do we also add a backup for older AMX here?
272+
enable(amx_feature_flags_eax, 4, Feature::amx_fp8);
273+
enable(amx_feature_flags_eax, 5, Feature::amx_transpose);
274+
enable(amx_feature_flags_eax, 6, Feature::amx_tf32);
275+
enable(amx_feature_flags_eax, 7, Feature::amx_avx512);
276+
enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
261277
}
262278
}
263279
}

crates/std_detect/tests/x86-specific.rs

+11-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
sha512_sm_x86,
77
x86_amx_intrinsics,
88
xop_target_feature,
9-
keylocker_x86
9+
keylocker_x86,
10+
movrs_target_feature
1011
)]
1112

1213
extern crate cupid;
@@ -97,6 +98,15 @@ fn dump() {
9798
println!("xop: {:?}", is_x86_feature_detected!("xop"));
9899
println!("kl: {:?}", is_x86_feature_detected!("kl"));
99100
println!("widekl: {:?}", is_x86_feature_detected!("widekl"));
101+
println!("movrs: {:?}", is_x86_feature_detected!("movrs"));
102+
println!("amx-fp8: {:?}", is_x86_feature_detected!("amx-fp8"));
103+
println!(
104+
"amx-transpose: {:?}",
105+
is_x86_feature_detected!("amx-transpose")
106+
);
107+
println!("amx-tf32: {:?}", is_x86_feature_detected!("amx-tf32"));
108+
println!("amx-avx512: {:?}", is_x86_feature_detected!("amx-avx512"));
109+
println!("amx-movrs: {:?}", is_x86_feature_detected!("amx-movrs"));
100110
}
101111

102112
#[cfg(feature = "std_detect_env_override")]

0 commit comments

Comments
 (0)