Skip to content

Commit 93fb12e

Browse files
committed
auto merge of #11498 : c-a/rust/optimize_vuint_at, r=alexcrichton
Use a lookup table, SHIFT_MASK_TABLE, that for every possible four bit prefix holds the number of times the value should be right shifted and what the right shifted value should be masked with. This way we can get rid of the branches which in my testing gives approximately a 2x speedup. Timings on Intel(R) Core(TM) i5-3570K CPU @ 3.40GHz -- Before -- running 5 tests test ebml::tests::test_vuint_at ... ok test ebml::bench::vuint_at_A_aligned ... bench: 494 ns/iter (+/- 3) test ebml::bench::vuint_at_A_unaligned ... bench: 494 ns/iter (+/- 4) test ebml::bench::vuint_at_D_aligned ... bench: 467 ns/iter (+/- 5) test ebml::bench::vuint_at_D_unaligned ... bench: 467 ns/iter (+/- 5) -- After -- running 5 tests test ebml::tests::test_vuint_at ... ok test ebml::bench::vuint_at_A_aligned ... bench: 181 ns/iter (+/- 2) test ebml::bench::vuint_at_A_unaligned ... bench: 192 ns/iter (+/- 1) test ebml::bench::vuint_at_D_aligned ... bench: 181 ns/iter (+/- 3) test ebml::bench::vuint_at_D_unaligned ... bench: 197 ns/iter (+/- 6)
2 parents 5fdc812 + f4c9ed4 commit 93fb12e

File tree

1 file changed

+79
-23
lines changed

1 file changed

+79
-23
lines changed

src/libextra/ebml.rs

+79-23
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ pub mod reader {
9090

9191
// ebml reading
9292

93-
struct Res {
93+
pub struct Res {
9494
val: uint,
9595
next: uint
9696
}
@@ -130,32 +130,40 @@ pub mod reader {
130130
return vuint_at_slow(data, start);
131131
}
132132

133+
// Lookup table for parsing EBML Element IDs as per http://ebml.sourceforge.net/specs/
134+
// The Element IDs are parsed by reading a big endian u32 positioned at data[start].
135+
// Using the four most significant bits of the u32 we lookup in the table below how the
136+
// element ID should be derived from it.
137+
//
138+
// The table stores tuples (shift, mask) where shift is the number the u32 should be right
139+
// shifted with and mask is the value the right shifted value should be masked with.
140+
// If for example the most significant bit is set this means it's a class A ID and the u32
141+
// should be right shifted with 24 and masked with 0x7f. Therefore we store (24, 0x7f) at
142+
// index 0x8 - 0xF (four bit numbers where the most significant bit is set).
143+
//
144+
// By storing the number of shifts and masks in a table instead of checking in order if
145+
// the most significant bit is set, the second most significant bit is set etc. we can
146+
// replace up to three "and+branch" with a single table lookup which gives us a measured
147+
// speedup of around 2x on x86_64.
148+
static SHIFT_MASK_TABLE: [(u32, u32), ..16] = [
149+
(0, 0x0), (0, 0x0fffffff),
150+
(8, 0x1fffff), (8, 0x1fffff),
151+
(16, 0x3fff), (16, 0x3fff), (16, 0x3fff), (16, 0x3fff),
152+
(24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f),
153+
(24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f)
154+
];
155+
133156
unsafe {
134157
let (ptr, _): (*u8, uint) = transmute(data);
135158
let ptr = offset(ptr, start as int);
136159
let ptr: *i32 = transmute(ptr);
137-
let val = from_be32(*ptr);
138-
let val: u32 = transmute(val);
139-
if (val & 0x80000000) != 0 {
140-
Res {
141-
val: ((val >> 24) & 0x7f) as uint,
142-
next: start + 1
143-
}
144-
} else if (val & 0x40000000) != 0 {
145-
Res {
146-
val: ((val >> 16) & 0x3fff) as uint,
147-
next: start + 2
148-
}
149-
} else if (val & 0x20000000) != 0 {
150-
Res {
151-
val: ((val >> 8) & 0x1fffff) as uint,
152-
next: start + 3
153-
}
154-
} else {
155-
Res {
156-
val: (val & 0x0fffffff) as uint,
157-
next: start + 4
158-
}
160+
let val = from_be32(*ptr) as u32;
161+
162+
let i = (val >> 28u) as uint;
163+
let (shift, mask) = SHIFT_MASK_TABLE[i];
164+
Res {
165+
val: ((val >> shift) & mask) as uint,
166+
next: start + (((32 - shift) >> 3) as uint)
159167
}
160168
}
161169
}
@@ -938,6 +946,54 @@ mod tests {
938946
use std::io::mem::MemWriter;
939947
use std::option::{None, Option, Some};
940948

949+
#[test]
950+
fn test_vuint_at() {
951+
let data = [
952+
0x80,
953+
0xff,
954+
0x40, 0x00,
955+
0x7f, 0xff,
956+
0x20, 0x00, 0x00,
957+
0x3f, 0xff, 0xff,
958+
0x10, 0x00, 0x00, 0x00,
959+
0x1f, 0xff, 0xff, 0xff
960+
];
961+
962+
let mut res: reader::Res;
963+
964+
// Class A
965+
res = reader::vuint_at(data, 0);
966+
assert_eq!(res.val, 0);
967+
assert_eq!(res.next, 1);
968+
res = reader::vuint_at(data, res.next);
969+
assert_eq!(res.val, (1 << 7) - 1);
970+
assert_eq!(res.next, 2);
971+
972+
// Class B
973+
res = reader::vuint_at(data, res.next);
974+
assert_eq!(res.val, 0);
975+
assert_eq!(res.next, 4);
976+
res = reader::vuint_at(data, res.next);
977+
assert_eq!(res.val, (1 << 14) - 1);
978+
assert_eq!(res.next, 6);
979+
980+
// Class C
981+
res = reader::vuint_at(data, res.next);
982+
assert_eq!(res.val, 0);
983+
assert_eq!(res.next, 9);
984+
res = reader::vuint_at(data, res.next);
985+
assert_eq!(res.val, (1 << 21) - 1);
986+
assert_eq!(res.next, 12);
987+
988+
// Class D
989+
res = reader::vuint_at(data, res.next);
990+
assert_eq!(res.val, 0);
991+
assert_eq!(res.next, 16);
992+
res = reader::vuint_at(data, res.next);
993+
assert_eq!(res.val, (1 << 28) - 1);
994+
assert_eq!(res.next, 20);
995+
}
996+
941997
#[test]
942998
fn test_option_int() {
943999
fn test_v(v: Option<int>) {

0 commit comments

Comments
 (0)