Skip to content

Commit 2d957e4

Browse files
committed
feat(riscv): implement ctz in SRAM
Prior, the code for `intrinsics::cttz` (e.g. `u128.trailing_zeros()`) was being inlined into the call site, but that code was referencing a lookup table from the `.rodata` section (placed in flash): ``` 40380xxx: 3c0066b7 lui a3,0x3c006 40380xxx: ed168293 addi t0,a3,-303 # 3c005ed1 <str.1+0x81> 40380xxx: 03810813 addi a6,sp,56 ... 40380xxx: 41e00633 neg a2,t5 40380xxx: 00cf7633 and a2,t5,a2 40380xxx: 03160633 mul a2,a2,a7 40380xxx: 826d srli a2,a2,0x1b 40380xxx: 9616 add a2,a2,t0 40380xxx: 00064603 lbu a2,0(a2) ``` Caution: The `str.1` symbol name there is a red herring: `str.1` has a length of just 0x21 bytes, there's just seemingly no symbolic name associated with the jump table and `str.1` happens to be the closest preceding label. I can't claim credit for the `ctz32` algorithm here, all I've done is copy it from Wikipedia and modify the operations to the Rust equivalents, and then applied it four times to extend it to a `u128`. I have tested it both for `0b0..010..0` (single bit set) and `0xff...100` (all non-trailing bits set) for every bit position, which did catch at least two mistakes in the translation.
1 parent 0c86171 commit 2d957e4

File tree

2 files changed

+40
-2
lines changed

2 files changed

+40
-2
lines changed

esp-hal-common/src/interrupt/riscv.rs

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,14 +166,50 @@ mod vectored {
166166
}
167167
}
168168

169+
/// Similar to [intrinsics::cttz], except we are able to place the lookup
170+
/// table in SRAM
171+
#[link_section = ".rwtext"]
172+
fn ctz(n: u128) -> u32 {
173+
// this isn't quite what LLVM does, they lower cttz to `ctpop(~X & (X-1))`,
174+
// which use some kind of binary-search-like-algorithm plus a lookup
175+
// table
176+
#[inline(always)]
177+
fn ctz32(n: u32) -> u32 {
178+
// cf. https://en.wikipedia.org/wiki/Find_first_set#CTZ
179+
#[link_section = ".data"]
180+
static TABLE: [u8; 32] = {
181+
let mut t = [0u8; 32];
182+
let mut i = 0;
183+
while i < 32 {
184+
t[((0x077CB531_u32.wrapping_mul(1 << i)) >> 27) as usize] = i;
185+
i += 1;
186+
}
187+
t
188+
};
189+
190+
let n = n & (n as i32).wrapping_neg() as u32; // isolate lowest 1 bit
191+
TABLE[(n.wrapping_mul(0x077CB531) >> 27) as usize] as u32
192+
}
193+
194+
let n = n.to_ne_bytes();
195+
let (n, _) = n.as_chunks::<4>();
196+
for (i, b) in n.iter().enumerate() {
197+
let n = u32::from_ne_bytes(*b);
198+
if n != 0 {
199+
return ctz32(n) + i as u32 * 32;
200+
}
201+
}
202+
u128::BITS
203+
}
204+
169205
/// Get the interrupts configured for the core
170206
#[inline]
171207
fn get_configured_interrupts(_core: Cpu, mut status: u128) -> [u128; 16] {
172208
unsafe {
173209
let mut prios = [0u128; 16];
174210

175211
while status != 0 {
176-
let interrupt_nr = status.trailing_zeros() as u16;
212+
let interrupt_nr = ctz(status) as u16;
177213
// safety: cast is safe because of repr(u16)
178214
let cpu_interrupt: CpuInterrupt =
179215
get_assigned_cpu_interrupt(core::mem::transmute(interrupt_nr as u16));
@@ -222,7 +258,7 @@ mod vectored {
222258
let mut interrupt_mask =
223259
status & configured_interrupts[interrupt_to_priority(cpu_intr as usize)];
224260
while interrupt_mask != 0 {
225-
let interrupt_nr = interrupt_mask.trailing_zeros();
261+
let interrupt_nr = ctz(interrupt_mask);
226262
// Interrupt::try_from can fail if interrupt already de-asserted:
227263
// silently ignore
228264
if let Ok(interrupt) = peripherals::Interrupt::try_from(interrupt_nr as u8) {

esp-hal-common/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
feature(impl_trait_projections)
3232
)]
3333
#![doc(html_logo_url = "https://avatars.githubusercontent.com/u/46717278")]
34+
#![feature(const_mut_refs)]
35+
#![feature(slice_as_chunks)]
3436

3537
#[cfg(riscv)]
3638
pub use esp_riscv_rt::{self, entry, riscv};

0 commit comments

Comments
 (0)