Closed
Description
Below is a lot of code because to replicate the bug you need to have an amdgpu that has at least one sensor not implemented. For me its RX 6700 XT and it has 7 unsupported sensors. You can probably also replicate it by using another kernel module that can return EOPNOTSUPP or write your own.
Replace <N> with your amdgpu number, probably 0
I tried this code:
use std::fs::File;
use std::io::{ErrorKind, Read, Seek};
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub enum AmdPpSensors {
AmdgpuPpSensorGfxSclk = 0,
AmdgpuPpSensorCpuClk,
AmdgpuPpSensorVddnb,
AmdgpuPpSensorVddgfx,
AmdgpuPpSensorUvdVclk,
AmdgpuPpSensorUvdDclk,
AmdgpuPpSensorVceEcclk,
AmdgpuPpSensorGpuLoad,
AmdgpuPpSensorMemLoad,
AmdgpuPpSensorGfxMclk,
AmdgpuPpSensorGpuTemp,
//AmdgpuPpSensorEdgeTemp = 10, // Same as AmdgpuPpSensorGpuTemp
AmdgpuPpSensorHotspotTemp,
AmdgpuPpSensorMemTemp,
AmdgpuPpSensorVcePower,
AmdgpuPpSensorUvdPower,
AmdgpuPpSensorGpuAvgPower,
AmdgpuPpSensorGpuInputPower,
AmdgpuPpSensorSsApuShare,
AmdgpuPpSensorSsDgpuShare,
AmdgpuPpSensorStablePstateSclk,
AmdgpuPpSensorStablePstateMclk,
AmdgpuPpSensorEnabledSmcFeaturesMask,
AmdgpuPpSensorMinFanRpm,
AmdgpuPpSensorMaxFanRpm,
AmdgpuPpSensorVcnPowerState,
AmdgpuPpSensorPeakPstateSclk,
AmdgpuPpSensorPeakPstateMclk,
AmdgpuPpSensorVcnLoad,
}
const AMDGPU_PP_SENSORS: usize = 28; // TODO: This value should not be hardcoded, but derived
impl AmdPpSensors {
pub const ALL_VARIANTS: [Self; AMDGPU_PP_SENSORS] = [
Self::AmdgpuPpSensorGfxSclk,
Self::AmdgpuPpSensorCpuClk,
Self::AmdgpuPpSensorVddnb,
Self::AmdgpuPpSensorVddgfx,
Self::AmdgpuPpSensorUvdVclk,
Self::AmdgpuPpSensorUvdDclk,
Self::AmdgpuPpSensorVceEcclk,
Self::AmdgpuPpSensorGpuLoad,
Self::AmdgpuPpSensorMemLoad,
Self::AmdgpuPpSensorGfxMclk,
Self::AmdgpuPpSensorGpuTemp,
Self::AmdgpuPpSensorHotspotTemp,
Self::AmdgpuPpSensorMemTemp,
Self::AmdgpuPpSensorVcePower,
Self::AmdgpuPpSensorUvdPower,
Self::AmdgpuPpSensorGpuAvgPower,
Self::AmdgpuPpSensorGpuInputPower,
Self::AmdgpuPpSensorSsApuShare,
Self::AmdgpuPpSensorSsDgpuShare,
Self::AmdgpuPpSensorStablePstateSclk,
Self::AmdgpuPpSensorStablePstateMclk,
Self::AmdgpuPpSensorEnabledSmcFeaturesMask,
Self::AmdgpuPpSensorMinFanRpm,
Self::AmdgpuPpSensorMaxFanRpm,
Self::AmdgpuPpSensorVcnPowerState,
Self::AmdgpuPpSensorPeakPstateSclk,
Self::AmdgpuPpSensorPeakPstateMclk,
Self::AmdgpuPpSensorVcnLoad,
];
}
fn main() -> () {
let path = "/sys/kernel/debug/dri/<N>/amdgpu_sensors";
let mut file = File::open(path).expect("Opening this file");
let mut supported_variants: [u8; AMDGPU_PP_SENSORS] = [0u8; AMDGPU_PP_SENSORS];
let mut buf = [0u8; 16];
for size in [0, 4, 8] {
for (variant, variant_size) in supported_variants
.iter_mut()
.enumerate()
.filter(|&(_, &mut variant_size)| variant_size == size)
{
let view = &mut buf[..size as usize + 4];
let _ = file
.seek(std::io::SeekFrom::Start(variant as u64 * 4))
.expect("Seek to succeed, since I used an amdgpu enum variant. (as of 2025-04-14)");
let red = file.read_exact(view);
match red {
Ok(_) => *variant_size += 4,
Err(e) => match e.kind() {
ErrorKind::InvalidInput => (),
ErrorKind::Unsupported => (),
_ => eprintln!("{:?}: {:?}", variant, e),
},
}
}
}
}
I expected to see this happen: Nothing gets printed since I ran this program as root and catch expected errors. According to amdgpu kernel module source code EINV and EOPNOTSUPP are expected.
Instead, this happened:
1: Os { code: 95, kind: Uncategorized, message: "Operation not supported" }
[...]
Meta
rustc --version --verbose
:
rustc 1.86.0 (05f9846f8 2025-03-31) (Fedora 1.86.0-1.fc41)
binary: rustc
commit-hash: 05f9846f893b09a1be1fc8560e33fc3c815cfecb
commit-date: 2025-03-31
host: x86_64-unknown-linux-gnu
release: 1.86.0
LLVM version: 19.1.7
rustc 1.88.0-nightly (092a284ba 2025-04-13)
binary: rustc
commit-hash: 092a284ba0421695f2032c947765429fd7095796
commit-date: 2025-04-13
host: x86_64-unknown-linux-gnu
release: 1.88.0-nightly