@@ -47,15 +47,17 @@ implementation.)
47
47
*/
48
48
49
49
use std:: borrow:: Borrow ;
50
- use std:: collections:: HashMap ;
51
50
use std:: fmt;
52
51
use std:: iter:: repeat;
53
52
use std:: mem;
53
+ use std:: sync:: Arc ;
54
54
55
55
use exec:: ProgramCache ;
56
56
use prog:: { Inst , Program } ;
57
57
use sparse:: SparseSet ;
58
58
59
+ use self :: state_map:: StateMap ;
60
+
59
61
/// Return true if and only if the given program can be executed by a DFA.
60
62
///
61
63
/// Generally, a DFA is always possible. A pathological case where it is not
@@ -118,7 +120,7 @@ struct CacheInner {
118
120
/// things, we just pass indexes around manually. The performance impact of
119
121
/// this is probably an instruction or two in the inner loop. However, on
120
122
/// 64 bit, each StatePtr is half the size of a *State.
121
- compiled : HashMap < State , StatePtr > ,
123
+ compiled : StateMap ,
122
124
/// The transition table.
123
125
///
124
126
/// The transition table is laid out in row-major order, where states are
@@ -135,9 +137,6 @@ struct CacheInner {
135
137
/// bytes that never discriminate a distinct path through the DFA from each
136
138
/// other.
137
139
trans : Transitions ,
138
- /// Our set of states. Note that `StatePtr / num_byte_classes` indexes
139
- /// this Vec rather than just a `StatePtr`.
140
- states : Vec < State > ,
141
140
/// A set of cached start states, which are limited to the number of
142
141
/// permutations of flags set just before the initial byte of input. (The
143
142
/// index into this vec is a `EmptyFlags`.)
@@ -270,8 +269,8 @@ impl<T> Result<T> {
270
269
/// it is packed into a single byte; Otherwise the byte 128 (-128 as an i8)
271
270
/// is coded as a flag, followed by 4 bytes encoding the delta.
272
271
#[ derive( Clone , Eq , Hash , PartialEq ) ]
273
- struct State {
274
- data : Box < [ u8 ] > ,
272
+ pub struct State {
273
+ data : Arc < [ u8 ] > ,
275
274
}
276
275
277
276
impl Borrow < [ u8 ] > for State {
@@ -280,6 +279,13 @@ impl Borrow<[u8]> for State {
280
279
}
281
280
}
282
281
282
+ impl State {
283
+ fn heap_size ( & self ) -> usize {
284
+ // 2 * Reference counters
285
+ 2 * mem:: size_of :: < usize > ( ) + self . data . len ( )
286
+ }
287
+ }
288
+
283
289
/// `InstPtr` is a 32 bit pointer into a sequence of opcodes (i.e., it indexes
284
290
/// an NFA state).
285
291
///
@@ -437,9 +443,8 @@ impl Cache {
437
443
let starts = vec ! [ STATE_UNKNOWN ; 256 ] ;
438
444
let mut cache = Cache {
439
445
inner : CacheInner {
440
- compiled : HashMap :: new ( ) ,
446
+ compiled : StateMap :: new ( ) ,
441
447
trans : Transitions :: new ( num_byte_classes) ,
442
- states : vec ! [ ] ,
443
448
start_states : starts,
444
449
stack : vec ! [ ] ,
445
450
flush_count : 0 ,
@@ -1157,7 +1162,11 @@ impl<'a> Fsm<'a> {
1157
1162
Some ( v) => v,
1158
1163
}
1159
1164
// In the cache? Cool. Done.
1160
- if let Some ( & si) = self . cache . compiled . get ( & self . cache . insts_scratch_space [ ..] ) {
1165
+ if let Some ( si) = self
1166
+ . cache
1167
+ . compiled
1168
+ . get_ptr ( & self . cache . insts_scratch_space [ ..] )
1169
+ {
1161
1170
return Some ( si) ;
1162
1171
}
1163
1172
@@ -1170,7 +1179,7 @@ impl<'a> Fsm<'a> {
1170
1179
}
1171
1180
1172
1181
let key = State {
1173
- data : self . cache . insts_scratch_space . clone ( ) . into_boxed_slice ( ) ,
1182
+ data : Arc :: from ( & self . cache . insts_scratch_space [ .. ] ) ,
1174
1183
} ;
1175
1184
// Allocate room for our state and add it.
1176
1185
self . add_state ( key)
@@ -1246,7 +1255,7 @@ impl<'a> Fsm<'a> {
1246
1255
/// This returns false if the cache is not cleared and the DFA should
1247
1256
/// give up.
1248
1257
fn clear_cache_and_save ( & mut self , current_state : Option < & mut StatePtr > ) -> bool {
1249
- if self . cache . states . is_empty ( ) {
1258
+ if self . cache . compiled . is_empty ( ) {
1250
1259
// Nothing to clear...
1251
1260
return true ;
1252
1261
}
@@ -1276,7 +1285,7 @@ impl<'a> Fsm<'a> {
1276
1285
// 10 or fewer bytes per state.
1277
1286
// Additionally, we permit the cache to be flushed a few times before
1278
1287
// caling it quits.
1279
- let nstates = self . cache . states . len ( ) ;
1288
+ let nstates = self . cache . compiled . len ( ) ;
1280
1289
if self . cache . flush_count >= 3
1281
1290
&& self . at >= self . last_cache_flush
1282
1291
&& ( self . at - self . last_cache_flush ) <= 10 * nstates
@@ -1296,7 +1305,6 @@ impl<'a> Fsm<'a> {
1296
1305
} ;
1297
1306
self . cache . reset_size ( ) ;
1298
1307
self . cache . trans . clear ( ) ;
1299
- self . cache . states . clear ( ) ;
1300
1308
self . cache . compiled . clear ( ) ;
1301
1309
for s in & mut self . cache . start_states {
1302
1310
* s = STATE_UNKNOWN ;
@@ -1316,7 +1324,7 @@ impl<'a> Fsm<'a> {
1316
1324
fn restore_state ( & mut self , state : State ) -> Option < StatePtr > {
1317
1325
// If we've already stored this state, just return a pointer to it.
1318
1326
// None will be the wiser.
1319
- if let Some ( & si) = self . cache . compiled . get ( & state) {
1327
+ if let Some ( si) = self . cache . compiled . get_ptr ( & state. data ) {
1320
1328
return Some ( si) ;
1321
1329
}
1322
1330
self . add_state ( state)
@@ -1451,7 +1459,10 @@ impl<'a> Fsm<'a> {
1451
1459
1452
1460
/// Returns a reference to a State given a pointer to it.
1453
1461
fn state ( & self , si : StatePtr ) -> & State {
1454
- & self . cache . states [ si as usize / self . num_byte_classes ( ) ]
1462
+ self . cache
1463
+ . compiled
1464
+ . get_state ( si as usize / self . num_byte_classes ( ) )
1465
+ . unwrap ( )
1455
1466
}
1456
1467
1457
1468
/// Adds the given state to the DFA.
@@ -1483,14 +1494,12 @@ impl<'a> Fsm<'a> {
1483
1494
// Finally, put our actual state on to our heap of states and index it
1484
1495
// so we can find it later.
1485
1496
self . cache . size += self . cache . trans . state_heap_size ( )
1486
- + ( 2 * state. data . len ( ) )
1497
+ + state. heap_size ( )
1487
1498
+ ( 2 * mem:: size_of :: < State > ( ) )
1488
1499
+ mem:: size_of :: < StatePtr > ( ) ;
1489
- self . cache . states . push ( state. clone ( ) ) ;
1490
1500
self . cache . compiled . insert ( state, si) ;
1491
1501
// Transition table and set of states and map should all be in sync.
1492
- debug_assert ! ( self . cache. states. len( ) == self . cache. trans. num_states( ) ) ;
1493
- debug_assert ! ( self . cache. states. len( ) == self . cache. compiled. len( ) ) ;
1502
+ debug_assert ! ( self . cache. compiled. len( ) == self . cache. trans. num_states( ) ) ;
1494
1503
Some ( si)
1495
1504
}
1496
1505
@@ -1818,10 +1827,64 @@ fn read_varu32(data: &[u8]) -> (u32, usize) {
1818
1827
( 0 , 0 )
1819
1828
}
1820
1829
1830
+ mod state_map {
1831
+ use std:: collections:: HashMap ;
1832
+
1833
+ use super :: { State , StatePtr } ;
1834
+
1835
+ #[ derive( Debug ) ]
1836
+ pub struct StateMap {
1837
+ /// The keys are not actually static but rely on always pointing to a buffer in `states`
1838
+ /// which will never be moved except when clearing the map or on drop, in which case the
1839
+ /// keys of this map will be removed before
1840
+ map : HashMap < State , StatePtr > ,
1841
+ /// Our set of states. Note that `StatePtr / num_byte_classes` indexes
1842
+ /// this Vec rather than just a `StatePtr`.
1843
+ states : Vec < State > ,
1844
+ }
1845
+
1846
+ impl StateMap {
1847
+ pub fn new ( ) -> StateMap {
1848
+ StateMap {
1849
+ map : HashMap :: new ( ) ,
1850
+ states : Vec :: new ( ) ,
1851
+ }
1852
+ }
1853
+
1854
+ pub fn len ( & self ) -> usize {
1855
+ self . states . len ( )
1856
+ }
1857
+
1858
+ pub fn is_empty ( & self ) -> bool {
1859
+ self . states . is_empty ( )
1860
+ }
1861
+
1862
+ pub fn get_ptr ( & self , index : & [ u8 ] ) -> Option < StatePtr > {
1863
+ self . map . get ( index) . cloned ( )
1864
+ }
1865
+
1866
+ pub fn get_state ( & self , index : usize ) -> Option < & State > {
1867
+ self . states . get ( index)
1868
+ }
1869
+
1870
+ pub fn insert ( & mut self , state : State , si : StatePtr ) {
1871
+ self . map . insert ( state. clone ( ) , si) ;
1872
+ self . states . push ( state) ;
1873
+ }
1874
+
1875
+ pub fn clear ( & mut self ) {
1876
+ self . map . clear ( ) ;
1877
+ self . states . clear ( ) ;
1878
+ }
1879
+ }
1880
+ }
1881
+
1821
1882
#[ cfg( test) ]
1822
1883
mod tests {
1823
1884
extern crate rand;
1824
1885
1886
+ use std:: sync:: Arc ;
1887
+
1825
1888
use super :: {
1826
1889
push_inst_ptr, read_vari32, read_varu32, write_vari32, write_varu32, State , StateFlags ,
1827
1890
} ;
@@ -1836,7 +1899,7 @@ mod tests {
1836
1899
push_inst_ptr ( & mut data, & mut prev, ip) ;
1837
1900
}
1838
1901
let state = State {
1839
- data : data. into_boxed_slice ( ) ,
1902
+ data : Arc :: from ( & data[ .. ] ) ,
1840
1903
} ;
1841
1904
1842
1905
let expected: Vec < usize > = ips. into_iter ( ) . map ( |ip| ip as usize ) . collect ( ) ;
0 commit comments