Skip to content

Commit 49b6acd

Browse files
author
Markus Westerlind
committed
perf: Avoid storing compiled dfa states twice
1 parent 0b38aaf commit 49b6acd

File tree

1 file changed

+84
-21
lines changed

1 file changed

+84
-21
lines changed

src/dfa.rs

+84-21
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,17 @@ implementation.)
4747
*/
4848

4949
use std::borrow::Borrow;
50-
use std::collections::HashMap;
5150
use std::fmt;
5251
use std::iter::repeat;
5352
use std::mem;
53+
use std::sync::Arc;
5454

5555
use exec::ProgramCache;
5656
use prog::{Inst, Program};
5757
use sparse::SparseSet;
5858

59+
use self::state_map::StateMap;
60+
5961
/// Return true if and only if the given program can be executed by a DFA.
6062
///
6163
/// Generally, a DFA is always possible. A pathological case where it is not
@@ -118,7 +120,7 @@ struct CacheInner {
118120
/// things, we just pass indexes around manually. The performance impact of
119121
/// this is probably an instruction or two in the inner loop. However, on
120122
/// 64 bit, each StatePtr is half the size of a *State.
121-
compiled: HashMap<State, StatePtr>,
123+
compiled: StateMap,
122124
/// The transition table.
123125
///
124126
/// The transition table is laid out in row-major order, where states are
@@ -135,9 +137,6 @@ struct CacheInner {
135137
/// bytes that never discriminate a distinct path through the DFA from each
136138
/// other.
137139
trans: Transitions,
138-
/// Our set of states. Note that `StatePtr / num_byte_classes` indexes
139-
/// this Vec rather than just a `StatePtr`.
140-
states: Vec<State>,
141140
/// A set of cached start states, which are limited to the number of
142141
/// permutations of flags set just before the initial byte of input. (The
143142
/// index into this vec is a `EmptyFlags`.)
@@ -270,8 +269,8 @@ impl<T> Result<T> {
270269
/// it is packed into a single byte; Otherwise the byte 128 (-128 as an i8)
271270
/// is coded as a flag, followed by 4 bytes encoding the delta.
272271
#[derive(Clone, Eq, Hash, PartialEq)]
273-
struct State {
274-
data: Box<[u8]>,
272+
pub struct State {
273+
data: Arc<[u8]>,
275274
}
276275

277276
impl Borrow<[u8]> for State {
@@ -280,6 +279,13 @@ impl Borrow<[u8]> for State {
280279
}
281280
}
282281

282+
impl State {
283+
fn heap_size(&self) -> usize {
284+
// 2 * Reference counters
285+
2 * mem::size_of::<usize>() + self.data.len()
286+
}
287+
}
288+
283289
/// `InstPtr` is a 32 bit pointer into a sequence of opcodes (i.e., it indexes
284290
/// an NFA state).
285291
///
@@ -437,9 +443,8 @@ impl Cache {
437443
let starts = vec![STATE_UNKNOWN; 256];
438444
let mut cache = Cache {
439445
inner: CacheInner {
440-
compiled: HashMap::new(),
446+
compiled: StateMap::new(),
441447
trans: Transitions::new(num_byte_classes),
442-
states: vec![],
443448
start_states: starts,
444449
stack: vec![],
445450
flush_count: 0,
@@ -1157,7 +1162,11 @@ impl<'a> Fsm<'a> {
11571162
Some(v) => v,
11581163
}
11591164
// In the cache? Cool. Done.
1160-
if let Some(&si) = self.cache.compiled.get(&self.cache.insts_scratch_space[..]) {
1165+
if let Some(si) = self
1166+
.cache
1167+
.compiled
1168+
.get_ptr(&self.cache.insts_scratch_space[..])
1169+
{
11611170
return Some(si);
11621171
}
11631172

@@ -1170,7 +1179,7 @@ impl<'a> Fsm<'a> {
11701179
}
11711180

11721181
let key = State {
1173-
data: self.cache.insts_scratch_space.clone().into_boxed_slice(),
1182+
data: Arc::from(&self.cache.insts_scratch_space[..]),
11741183
};
11751184
// Allocate room for our state and add it.
11761185
self.add_state(key)
@@ -1246,7 +1255,7 @@ impl<'a> Fsm<'a> {
12461255
/// This returns false if the cache is not cleared and the DFA should
12471256
/// give up.
12481257
fn clear_cache_and_save(&mut self, current_state: Option<&mut StatePtr>) -> bool {
1249-
if self.cache.states.is_empty() {
1258+
if self.cache.compiled.is_empty() {
12501259
// Nothing to clear...
12511260
return true;
12521261
}
@@ -1276,7 +1285,7 @@ impl<'a> Fsm<'a> {
12761285
// 10 or fewer bytes per state.
12771286
// Additionally, we permit the cache to be flushed a few times before
12781287
// caling it quits.
1279-
let nstates = self.cache.states.len();
1288+
let nstates = self.cache.compiled.len();
12801289
if self.cache.flush_count >= 3
12811290
&& self.at >= self.last_cache_flush
12821291
&& (self.at - self.last_cache_flush) <= 10 * nstates
@@ -1296,7 +1305,6 @@ impl<'a> Fsm<'a> {
12961305
};
12971306
self.cache.reset_size();
12981307
self.cache.trans.clear();
1299-
self.cache.states.clear();
13001308
self.cache.compiled.clear();
13011309
for s in &mut self.cache.start_states {
13021310
*s = STATE_UNKNOWN;
@@ -1316,7 +1324,7 @@ impl<'a> Fsm<'a> {
13161324
fn restore_state(&mut self, state: State) -> Option<StatePtr> {
13171325
// If we've already stored this state, just return a pointer to it.
13181326
// None will be the wiser.
1319-
if let Some(&si) = self.cache.compiled.get(&state) {
1327+
if let Some(si) = self.cache.compiled.get_ptr(&state.data) {
13201328
return Some(si);
13211329
}
13221330
self.add_state(state)
@@ -1451,7 +1459,10 @@ impl<'a> Fsm<'a> {
14511459

14521460
/// Returns a reference to a State given a pointer to it.
14531461
fn state(&self, si: StatePtr) -> &State {
1454-
&self.cache.states[si as usize / self.num_byte_classes()]
1462+
self.cache
1463+
.compiled
1464+
.get_state(si as usize / self.num_byte_classes())
1465+
.unwrap()
14551466
}
14561467

14571468
/// Adds the given state to the DFA.
@@ -1483,14 +1494,12 @@ impl<'a> Fsm<'a> {
14831494
// Finally, put our actual state on to our heap of states and index it
14841495
// so we can find it later.
14851496
self.cache.size += self.cache.trans.state_heap_size()
1486-
+ (2 * state.data.len())
1497+
+ state.heap_size()
14871498
+ (2 * mem::size_of::<State>())
14881499
+ mem::size_of::<StatePtr>();
1489-
self.cache.states.push(state.clone());
14901500
self.cache.compiled.insert(state, si);
14911501
// Transition table and set of states and map should all be in sync.
1492-
debug_assert!(self.cache.states.len() == self.cache.trans.num_states());
1493-
debug_assert!(self.cache.states.len() == self.cache.compiled.len());
1502+
debug_assert!(self.cache.compiled.len() == self.cache.trans.num_states());
14941503
Some(si)
14951504
}
14961505

@@ -1818,10 +1827,64 @@ fn read_varu32(data: &[u8]) -> (u32, usize) {
18181827
(0, 0)
18191828
}
18201829

1830+
mod state_map {
1831+
use std::collections::HashMap;
1832+
1833+
use super::{State, StatePtr};
1834+
1835+
#[derive(Debug)]
1836+
pub struct StateMap {
1837+
/// The keys are not actually static but rely on always pointing to a buffer in `states`
1838+
/// which will never be moved except when clearing the map or on drop, in which case the
1839+
/// keys of this map will be removed before
1840+
map: HashMap<State, StatePtr>,
1841+
/// Our set of states. Note that `StatePtr / num_byte_classes` indexes
1842+
/// this Vec rather than just a `StatePtr`.
1843+
states: Vec<State>,
1844+
}
1845+
1846+
impl StateMap {
1847+
pub fn new() -> StateMap {
1848+
StateMap {
1849+
map: HashMap::new(),
1850+
states: Vec::new(),
1851+
}
1852+
}
1853+
1854+
pub fn len(&self) -> usize {
1855+
self.states.len()
1856+
}
1857+
1858+
pub fn is_empty(&self) -> bool {
1859+
self.states.is_empty()
1860+
}
1861+
1862+
pub fn get_ptr(&self, index: &[u8]) -> Option<StatePtr> {
1863+
self.map.get(index).cloned()
1864+
}
1865+
1866+
pub fn get_state(&self, index: usize) -> Option<&State> {
1867+
self.states.get(index)
1868+
}
1869+
1870+
pub fn insert(&mut self, state: State, si: StatePtr) {
1871+
self.map.insert(state.clone(), si);
1872+
self.states.push(state);
1873+
}
1874+
1875+
pub fn clear(&mut self) {
1876+
self.map.clear();
1877+
self.states.clear();
1878+
}
1879+
}
1880+
}
1881+
18211882
#[cfg(test)]
18221883
mod tests {
18231884
extern crate rand;
18241885

1886+
use std::sync::Arc;
1887+
18251888
use super::{
18261889
push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32, State, StateFlags,
18271890
};
@@ -1836,7 +1899,7 @@ mod tests {
18361899
push_inst_ptr(&mut data, &mut prev, ip);
18371900
}
18381901
let state = State {
1839-
data: data.into_boxed_slice(),
1902+
data: Arc::from(&data[..]),
18401903
};
18411904

18421905
let expected: Vec<usize> = ips.into_iter().map(|ip| ip as usize).collect();

0 commit comments

Comments
 (0)