1
1
use crate :: leb128:: { self , max_leb128_len} ;
2
2
use crate :: serialize:: { self , Decoder as _, Encoder as _} ;
3
3
use std:: borrow:: Cow ;
4
+ use std:: convert:: TryInto ;
4
5
use std:: fs:: File ;
5
- use std:: io:: { self , BufRead , BufReader , Read , Write } ;
6
+ use std:: io:: { self , BufReader , Read , Seek , SeekFrom , Write } ;
6
7
use std:: mem:: MaybeUninit ;
7
8
use std:: path:: Path ;
8
9
use std:: ptr;
@@ -681,29 +682,138 @@ impl<'a> serialize::Decoder for Decoder<'a> {
681
682
}
682
683
683
684
pub struct FileDecoder {
684
- pub file : BufReader < File > ,
685
+ file : File ,
686
+ buf : Box < [ u8 ] > ,
687
+ pos : usize ,
688
+ cap : usize ,
685
689
}
686
690
687
691
impl FileDecoder {
688
692
#[ inline]
689
693
pub fn new ( file : BufReader < File > ) -> Self {
690
- FileDecoder { file }
694
+ const CAP : usize = 8 * 1024 ;
695
+ let mut buf = Vec :: with_capacity ( CAP ) ;
696
+ buf. resize ( CAP , 0u8 ) ;
697
+ let old_buf = file. buffer ( ) ;
698
+ let len = old_buf. len ( ) ;
699
+ buf[ ..len] . copy_from_slice ( old_buf) ;
700
+ let file = file. into_inner ( ) ;
701
+ FileDecoder { file, buf : buf. into ( ) , pos : 0 , cap : len }
691
702
}
692
703
693
704
#[ inline]
694
705
pub fn advance ( & mut self , bytes : usize ) {
695
- self . file . consume ( bytes)
706
+ self . pos += bytes;
707
+ debug_assert ! ( self . pos <= self . cap) ;
708
+ }
709
+
710
+ #[ inline]
711
+ pub fn read_all ( self ) -> Result < ( Box < [ u8 ] > , usize ) , io:: Error > {
712
+ let mut file = self . file ;
713
+ let start_pos = file. seek ( SeekFrom :: Current ( 0 ) ) ?;
714
+ let start_pos = start_pos. try_into ( ) . unwrap ( ) ;
715
+ file. seek ( SeekFrom :: Start ( 0 ) ) ?;
716
+ let mut bytes = Vec :: new ( ) ;
717
+ file. read_to_end ( & mut bytes) ?;
718
+ Ok ( ( bytes. into ( ) , start_pos) )
719
+ }
720
+
721
+ #[ inline]
722
+ fn read_byte ( & mut self ) -> Result < u8 , io:: Error > {
723
+ if self . pos < self . cap {
724
+ let c = self . buf [ self . pos ] ;
725
+ self . pos += 1 ;
726
+ Ok ( c)
727
+ } else {
728
+ let read = self . file . read ( & mut self . buf ) ?;
729
+ self . pos = 0 ;
730
+ self . cap = read;
731
+ Ok ( self . buf [ 0 ] )
732
+ }
733
+ }
734
+
735
+ fn read_exact ( & mut self , mut out : & mut [ u8 ] ) -> Result < ( ) , io:: Error > {
736
+ loop {
737
+ let len = out. len ( ) ;
738
+ if len == 0 {
739
+ return Ok ( ( ) ) ;
740
+ } else if self . pos + len < self . cap {
741
+ out. copy_from_slice ( & self . buf [ self . pos ..self . pos + len] ) ;
742
+ self . pos += len;
743
+ return Ok ( ( ) ) ;
744
+ }
745
+
746
+ let available = self . cap - self . pos ;
747
+ out[ ..available] . copy_from_slice ( & self . buf [ self . pos ..self . cap ] ) ;
748
+ self . pos += len;
749
+
750
+ // Re-fill the buffer starting from zero.
751
+ let read = self . file . read ( & mut self . buf ) ?;
752
+ self . pos = 0 ;
753
+ self . cap = read;
754
+ out = & mut out[ available..] ;
755
+ }
756
+ }
757
+
758
+ /// Read the buffer until we encounter a byte with its top bit unset.
759
+ #[ inline]
760
+ fn read_for_leb128 ( & mut self ) -> Result < & [ u8 ] , io:: Error > {
761
+ self . fill_for_leb128 ( ) ?;
762
+ Ok ( & self . buf [ self . pos ..self . cap ] )
763
+ }
764
+
765
+ /// Fill the buffer until we encounter a byte with its top bit unset.
766
+ /// Fast path.
767
+ #[ inline]
768
+ fn fill_for_leb128 ( & mut self ) -> Result < ( ) , io:: Error > {
769
+ let buf = & mut self . buf [ ..] ;
770
+ let known = & buf[ self . pos ..self . cap ] ;
771
+ if std:: intrinsics:: likely ( known. iter ( ) . any ( |c| c & 0x80 == 0 ) ) {
772
+ return Ok ( ( ) ) ;
773
+ }
774
+
775
+ self . fill_more_for_leb128 ( )
776
+ }
777
+
778
+ /// Fill the buffer until we encounter a byte with its top bit unset.
779
+ /// Slow path.
780
+ #[ cold]
781
+ fn fill_more_for_leb128 ( & mut self ) -> Result < ( ) , io:: Error > {
782
+ let buf = & mut self . buf [ ..] ;
783
+ let max = leb128:: max_leb128_len ( ) ;
784
+ if self . pos + max >= self . cap {
785
+ // The buffer should be large enough.
786
+ debug_assert ! ( self . pos > max) ;
787
+ let len = self . cap - self . pos ;
788
+ let ( start, end) = buf. split_at_mut ( self . pos ) ;
789
+ start[ ..len] . copy_from_slice ( & end[ ..len] ) ;
790
+ self . pos = 0 ;
791
+ self . cap = len;
792
+ }
793
+
794
+ // We've reached the end of our internal buffer then we need to fetch
795
+ // some more data from the file.
796
+ loop {
797
+ let read = self . file . read ( & mut buf[ self . cap ..] ) ?;
798
+ self . cap += read;
799
+
800
+ if read == 0 {
801
+ return Err ( io:: Error :: new ( io:: ErrorKind :: UnexpectedEof , "" ) ) ;
802
+ }
803
+
804
+ let known = & mut buf[ self . pos ..self . cap ] ;
805
+ if known. iter ( ) . any ( |c| c & 0x80 == 0 ) {
806
+ return Ok ( ( ) ) ;
807
+ }
808
+ }
696
809
}
697
810
}
698
811
699
812
macro_rules! read_leb128 {
700
813
( $dec: expr, $fun: ident, $ty: ty) => { {
701
- let mut buf = $dec. file. buffer( ) ;
702
- if buf. len( ) < max_leb128_len!( $ty) {
703
- buf = $dec. file. fill_buf( ) ?;
704
- }
705
- let ( value, bytes_read) : ( $ty, usize ) = leb128:: $fun( & buf) ;
706
- $dec. file. consume( bytes_read) ;
814
+ let buf = $dec. read_for_leb128( ) ?;
815
+ let ( value, bytes_read) : ( $ty, usize ) = leb128:: $fun( buf) ;
816
+ $dec. advance( bytes_read) ;
707
817
Ok ( value)
708
818
} } ;
709
819
}
@@ -738,9 +848,7 @@ impl serialize::Decoder for FileDecoder {
738
848
739
849
#[ inline]
740
850
fn read_u8 ( & mut self ) -> Result < u8 , Self :: Error > {
741
- let mut value = [ 0 ; 1 ] ;
742
- self . file . read_exact ( & mut value) ?;
743
- let [ value] = value;
851
+ let value = self . read_byte ( ) ?;
744
852
Ok ( value)
745
853
}
746
854
@@ -809,7 +917,7 @@ impl serialize::Decoder for FileDecoder {
809
917
let len = self . read_usize ( ) ?;
810
918
let mut buf = Vec :: new ( ) ;
811
919
buf. resize ( len, 0u8 ) ;
812
- self . file . read_exact ( & mut buf) ?;
920
+ self . read_exact ( & mut buf) ?;
813
921
let s = String :: from_utf8 ( buf) . unwrap ( ) ;
814
922
Ok ( Cow :: Owned ( s) )
815
923
}
@@ -821,7 +929,7 @@ impl serialize::Decoder for FileDecoder {
821
929
822
930
#[ inline]
823
931
fn read_raw_bytes ( & mut self , s : & mut [ MaybeUninit < u8 > ] ) -> Result < ( ) , Self :: Error > {
824
- self . file . read_exact ( unsafe { MaybeUninit :: slice_assume_init_mut ( s) } )
932
+ self . read_exact ( unsafe { MaybeUninit :: slice_assume_init_mut ( s) } )
825
933
}
826
934
}
827
935
0 commit comments