@@ -275,25 +275,12 @@ struct State{
275
275
/// `u32` here for the DFA to save on space.
276
276
type InstPtr = u32 ;
277
277
278
- // Used to construct new states.
278
+ /// Adds ip to data using delta encoding with respect to prev.
279
+ ///
280
+ /// After completion, `data` will contain `ip` and `prev` will be set to `ip`.
279
281
fn push_inst_ptr ( data : & mut Vec < u8 > , prev : & mut InstPtr , ip : InstPtr ) {
280
282
let delta = ( ip as i32 ) - ( * prev as i32 ) ;
281
- if delta. abs ( ) <= 127 {
282
- data. push ( delta as u8 ) ;
283
- * prev = ip;
284
- return ;
285
- }
286
- let delta = delta as u32 ;
287
- // Write 4 bytes in little-endian format.
288
- let a = ( delta & ( 0xFF << 0 * 8 ) ) >> 0 * 8 ;
289
- let b = ( delta & ( 0xFF << 1 * 8 ) ) >> 1 * 8 ;
290
- let c = ( delta & ( 0xFF << 2 * 8 ) ) >> 2 * 8 ;
291
- let d = ( delta & ( 0xFF << 3 * 8 ) ) >> 3 * 8 ;
292
- data. push ( 128 ) ;
293
- data. push ( a as u8 ) ;
294
- data. push ( b as u8 ) ;
295
- data. push ( c as u8 ) ;
296
- data. push ( d as u8 ) ;
283
+ write_vari32 ( data, delta) ;
297
284
* prev = ip;
298
285
}
299
286
@@ -306,31 +293,20 @@ impl <'a>Iterator for InstPtrs<'a> {
306
293
type Item = usize ;
307
294
308
295
fn next ( & mut self ) -> Option < usize > {
309
- let x = match self . data . get ( 0 ) {
310
- Some ( & x) => x,
311
- None => return None ,
312
- } ;
313
- let delta = if x == 128 {
314
- //Read 4 bytes in little-endian format.
315
- let a = self . data [ 1 ] as u32 ;
316
- let b = self . data [ 2 ] as u32 ;
317
- let c = self . data [ 3 ] as u32 ;
318
- let d = self . data [ 4 ] as u32 ;
319
- self . data = & self . data [ 5 ..] ;
320
- ( a << 0 * 8 | b << 1 * 8 | c << 2 * 8 | d << 3 * 8 ) as i32 as isize
321
- } else {
322
- self . data = & self . data [ 1 ..] ;
323
- x as i8 as isize
324
- } ;
325
- let base = self . base as isize + delta;
296
+ if self . data . is_empty ( ) {
297
+ return None ;
298
+ }
299
+ let ( delta, nread) = read_vari32 ( self . data ) ;
300
+ let base = self . base as i32 + delta;
326
301
debug_assert ! ( base >= 0 ) ;
302
+ debug_assert ! ( nread > 0 ) ;
303
+ self . data = & self . data [ nread..] ;
327
304
self . base = base as usize ;
328
305
Some ( self . base )
329
306
}
330
307
}
331
308
332
309
impl State {
333
-
334
310
fn flags ( & self ) -> StateFlags {
335
311
StateFlags ( self . data [ 0 ] )
336
312
}
@@ -1566,14 +1542,15 @@ impl<'a> Fsm<'a> {
1566
1542
fn approximate_size ( & self ) -> usize {
1567
1543
use std:: mem:: size_of as size;
1568
1544
// Estimate that there are about 16 instructions per state consuming
1569
- // 64 = 16 * 4 bytes of space.
1545
+ // 20 = 4 + (15 * 1) bytes of space (1 byte because of delta encoding).
1546
+ const STATE_HEAP : usize = 20 + 1 ; // one extra byte for flags
1570
1547
let compiled =
1571
- ( self . cache . compiled . len ( ) * ( size :: < State > ( ) + 64 ) )
1548
+ ( self . cache . compiled . len ( ) * ( size :: < State > ( ) + STATE_HEAP ) )
1572
1549
+ ( self . cache . compiled . len ( ) * size :: < StatePtr > ( ) ) ;
1573
1550
let states =
1574
1551
self . cache . states . len ( )
1575
1552
* ( size :: < State > ( )
1576
- + 64
1553
+ + STATE_HEAP
1577
1554
+ ( self . num_byte_classes ( ) * size :: < StatePtr > ( ) ) ) ;
1578
1555
let start_states = self . cache . start_states . len ( ) * size :: < StatePtr > ( ) ;
1579
1556
self . prog . approximate_size ( ) + compiled + states + start_states
@@ -1802,11 +1779,56 @@ fn show_state_ptr(si: StatePtr) -> String {
1802
1779
s
1803
1780
}
1804
1781
1782
+ /// https://developers.google.com/protocol-buffers/docs/encoding#varints
1783
+ fn write_vari32 ( data : & mut Vec < u8 > , n : i32 ) {
1784
+ let mut un = ( n as u32 ) << 1 ;
1785
+ if n < 0 {
1786
+ un = !un;
1787
+ }
1788
+ write_varu32 ( data, un)
1789
+ }
1790
+
1791
+ /// https://developers.google.com/protocol-buffers/docs/encoding#varints
1792
+ fn read_vari32 ( data : & [ u8 ] ) -> ( i32 , usize ) {
1793
+ let ( un, i) = read_varu32 ( data) ;
1794
+ let mut n = ( un >> 1 ) as i32 ;
1795
+ if un & 1 != 0 {
1796
+ n = !n;
1797
+ }
1798
+ ( n, i)
1799
+ }
1800
+
1801
+ /// https://developers.google.com/protocol-buffers/docs/encoding#varints
1802
+ fn write_varu32 ( data : & mut Vec < u8 > , mut n : u32 ) {
1803
+ while n >= 0b1000_0000 {
1804
+ data. push ( ( n as u8 ) | 0b1000_0000 ) ;
1805
+ n >>= 7 ;
1806
+ }
1807
+ data. push ( n as u8 ) ;
1808
+ }
1809
+
1810
+ /// https://developers.google.com/protocol-buffers/docs/encoding#varints
1811
+ fn read_varu32 ( data : & [ u8 ] ) -> ( u32 , usize ) {
1812
+ let mut n: u32 = 0 ;
1813
+ let mut shift: u32 = 0 ;
1814
+ for ( i, & b) in data. iter ( ) . enumerate ( ) {
1815
+ if b < 0b1000_0000 {
1816
+ return ( n | ( ( b as u32 ) << shift) , i + 1 ) ;
1817
+ }
1818
+ n |= ( ( b as u32 ) & 0b0111_1111 ) << shift;
1819
+ shift += 7 ;
1820
+ }
1821
+ ( 0 , 0 )
1822
+ }
1823
+
1805
1824
#[ cfg( test) ]
1806
1825
mod tests {
1807
- use quickcheck:: quickcheck;
1826
+ extern crate rand;
1827
+
1828
+ use quickcheck:: { QuickCheck , StdGen , quickcheck} ;
1808
1829
use super :: {
1809
1830
StateFlags , State , push_inst_ptr,
1831
+ write_varu32, read_varu32, write_vari32, read_vari32,
1810
1832
} ;
1811
1833
1812
1834
#[ test]
@@ -1818,10 +1840,36 @@ mod tests {
1818
1840
push_inst_ptr ( & mut data, & mut prev, ip) ;
1819
1841
}
1820
1842
let state = State { data : data. into_boxed_slice ( ) } ;
1821
- state. inst_ptrs ( ) . zip ( ips. iter ( ) ) . all ( |( x, & y) | x == y as usize )
1822
- &&
1823
- state. flags ( ) == StateFlags ( flags)
1843
+
1844
+ let expected: Vec < usize > =
1845
+ ips. into_iter ( ) . map ( |ip| ip as usize ) . collect ( ) ;
1846
+ let got: Vec < usize > = state. inst_ptrs ( ) . collect ( ) ;
1847
+ expected == got && state. flags ( ) == StateFlags ( flags)
1848
+ }
1849
+ QuickCheck :: new ( )
1850
+ . gen ( StdGen :: new ( self :: rand:: thread_rng ( ) , 70_000 ) )
1851
+ . quickcheck ( p as fn ( Vec < u32 > , u8 ) -> bool ) ;
1852
+ }
1853
+
1854
+ #[ test]
1855
+ fn prop_read_write_u32 ( ) {
1856
+ fn p ( n : u32 ) -> bool {
1857
+ let mut buf = vec ! [ ] ;
1858
+ write_varu32 ( & mut buf, n) ;
1859
+ let ( got, nread) = read_varu32 ( & buf) ;
1860
+ nread == buf. len ( ) && got == n
1861
+ }
1862
+ quickcheck ( p as fn ( u32 ) -> bool ) ;
1863
+ }
1864
+
1865
+ #[ test]
1866
+ fn prop_read_write_i32 ( ) {
1867
+ fn p ( n : i32 ) -> bool {
1868
+ let mut buf = vec ! [ ] ;
1869
+ write_vari32 ( & mut buf, n) ;
1870
+ let ( got, nread) = read_vari32 ( & buf) ;
1871
+ nread == buf. len ( ) && got == n
1824
1872
}
1825
- quickcheck ( p as fn ( Vec < u32 > , u8 ) -> bool )
1873
+ quickcheck ( p as fn ( i32 ) -> bool ) ;
1826
1874
}
1827
1875
}
0 commit comments