8
8
// option. This file may not be copied, modified, or distributed
9
9
// except according to those terms.
10
10
11
- // FIXME(Gankro): Bitv and BitvSet are very tightly coupled. Ideally (for maintenance),
12
- // they should be in separate files/modules, with BitvSet only using Bitv's public API.
13
-
14
- // First rule of Bitv club: almost everything can actually overflow because we're working with
15
- // bits and not bytes.
16
- //
17
- // Second rule of Bitv club: the last "block" of bits may be partially used. We must ensure that
18
- // those unused bits are zeroed out, as other methods will assume this is the case. It may be
19
- // the case that this isn't a great design, but having "undefined" bits is headache-inducing.
20
- //
21
- // Third rule of Bitv club: BitvSet is fairly tightly coupled to Bitv's implementation details.
22
- // Make sure any changes to Bitv are properly addressed in BitvSet.
11
+ // FIXME(Gankro): Bitv and BitvSet are very tightly coupled. Ideally (for
12
+ // maintenance), they should be in separate files/modules, with BitvSet only
13
+ // using Bitv's public API. This will be hard for performance though, because
14
+ // `Bitv` will not want to leak its internal representation while its internal
15
+ // representation as `u32`s must be assumed for best performance.
16
+
17
+ // FIXME(tbu-): `Bitv`'s methods shouldn't be `union`, `intersection`, but
18
+ // rather `or` and `and`.
19
+
20
+ // (1) Be careful, most things can overflow here because the amount of bits in
21
+ // memory can overflow `uint`.
22
+ // (2) Make sure that the underlying vector has no excess length:
23
+ // E. g. `nbits == 16`, `storage.len() == 2` would be excess length,
24
+ // because the last word isn't used at all. This is important because some
25
+ // methods rely on it (for *CORRECTNESS*).
26
+ // (3) Make sure that the unused bits in the last word are zeroed out, again
27
+ // other methods rely on it for *CORRECTNESS*.
28
+ // (4) `BitvSet` is tightly coupled with `Bitv`, so any changes you make in
29
+ // `Bitv` will need to be reflected in `BitvSet`.
23
30
24
31
//! Collections implemented with bit vectors.
25
32
//!
@@ -82,10 +89,10 @@ use core::iter::{Cloned, Chain, Enumerate, Repeat, Skip, Take};
82
89
use core:: iter;
83
90
use core:: num:: Int ;
84
91
use core:: slice:: { Items , MutItems } ;
85
- use core:: { u32, uint} ;
86
- use std:: hash;
92
+ use core:: { u8, u32, uint} ;
87
93
88
- use vec:: Vec ;
94
+ use hash;
95
+ use Vec ;
89
96
90
97
type Blocks < ' a > = Cloned < Items < ' a , u32 > > ;
91
98
type MutBlocks < ' a > = MutItems < ' a , u32 > ;
@@ -181,17 +188,15 @@ fn mask_for_bits(bits: uint) -> u32 {
181
188
}
182
189
183
190
impl Bitv {
184
- /// Applies the given operation to the blocks of self and other, and sets self to
185
- /// be the result.
191
+ /// Applies the given operation to the blocks of self and other, and sets
192
+ /// self to be the result. This relies on the caller not to corrupt the
193
+ /// last word.
186
194
#[ inline]
187
195
fn process < F > ( & mut self , other : & Bitv , mut op : F ) -> bool where F : FnMut ( u32 , u32 ) -> u32 {
188
- let len = other. storage . len ( ) ;
189
- assert_eq ! ( self . storage. len( ) , len) ;
196
+ assert_eq ! ( self . len( ) , other. len( ) ) ;
197
+ // This could theoretically be a `debug_assert!`.
198
+ assert_eq ! ( self . storage. len( ) , other. storage. len( ) ) ;
190
199
let mut changed = false ;
191
- // Notice: `a` is *not* masked here, which is fine as long as
192
- // `op` is a bitwise operation, since any bits that should've
193
- // been masked were fine to change anyway. `b` is masked to
194
- // make sure its unmasked bits do not cause damage.
195
200
for ( a, b) in self . blocks_mut ( ) . zip ( other. blocks ( ) ) {
196
201
let w = op ( * a, b) ;
197
202
if * a != w {
@@ -204,21 +209,20 @@ impl Bitv {
204
209
205
210
/// Iterator over mutable refs to the underlying blocks of data.
206
211
fn blocks_mut ( & mut self ) -> MutBlocks {
207
- let blocks = blocks_for_bits ( self . len ( ) ) ;
208
- self . storage . slice_to_mut ( blocks ) . iter_mut ( )
212
+ // (2)
213
+ self . storage . iter_mut ( )
209
214
}
210
215
211
216
/// Iterator over the underlying blocks of data
212
217
fn blocks ( & self ) -> Blocks {
213
- let blocks = blocks_for_bits ( self . len ( ) ) ;
214
- self . storage [ ..blocks ] . iter ( ) . cloned ( )
218
+ // (2)
219
+ self . storage . iter ( ) . cloned ( )
215
220
}
216
221
217
- /// An operation might screw up the unused bits in the last block of the Bitv.
218
- /// It 's assumed to be all 0's . This fixes it up.
222
+ /// An operation might screw up the unused bits in the last block of the
223
+ /// `Bitv`. As per (3), it 's assumed to be all 0s . This method fixes it up.
219
224
fn fix_last_block ( & mut self ) {
220
- let len = self . len ( ) ;
221
- let extra_bits = len % u32:: BITS ;
225
+ let extra_bits = self . len ( ) % u32:: BITS ;
222
226
if extra_bits > 0 {
223
227
let mask = ( 1 << extra_bits) - 1 ;
224
228
let storage_len = self . storage . len ( ) ;
@@ -259,7 +263,6 @@ impl Bitv {
259
263
storage : Vec :: from_elem ( nblocks, if bit { !0u32 } else { 0u32 } ) ,
260
264
nbits : nbits
261
265
} ;
262
-
263
266
bitv. fix_last_block ( ) ;
264
267
bitv
265
268
}
@@ -295,15 +298,33 @@ impl Bitv {
295
298
/// false, false, true, false]));
296
299
/// ```
297
300
pub fn from_bytes ( bytes : & [ u8 ] ) -> Bitv {
298
- Bitv :: from_fn ( bytes. len ( ) * 8 , |i| {
299
- let b = bytes[ i / 8 ] as u32 ;
300
- let offset = i % 8 ;
301
- b >> ( 7 - offset) & 1 == 1
302
- } )
301
+ let len = bytes. len ( ) . checked_mul ( u8:: BITS ) . expect ( "capacity overflow" ) ;
302
+ let mut bitv = Bitv :: with_capacity ( len) ;
303
+ let complete_words = bytes. len ( ) / 4 ;
304
+ let extra_bytes = bytes. len ( ) % 4 ;
305
+
306
+ for i in range ( 0 , complete_words) {
307
+ bitv. storage . push (
308
+ ( bytes[ i * 4 + 0 ] as u32 << 0 ) |
309
+ ( bytes[ i * 4 + 1 ] as u32 << 8 ) |
310
+ ( bytes[ i * 4 + 2 ] as u32 << 16 ) |
311
+ ( bytes[ i * 4 + 3 ] as u32 << 24 )
312
+ ) ;
313
+ }
314
+
315
+ if extra_bytes > 0 {
316
+ let mut last_word = 0u32 ;
317
+ for ( i, & byte) in bytes[ complete_words* 4 ..] . iter ( ) . enumerate ( ) {
318
+ last_word |= byte as u32 << ( i * 8 ) ;
319
+ }
320
+ bitv. storage . push ( last_word) ;
321
+ }
322
+
323
+ bitv
303
324
}
304
325
305
- /// Creates a `Bitv` of the specified length where the value at each
306
- /// index is `f(index)`.
326
+ /// Creates a `Bitv` of the specified length where the value at each index
327
+ /// is `f(index)`.
307
328
///
308
329
/// # Examples
309
330
///
@@ -339,7 +360,9 @@ impl Bitv {
339
360
#[ inline]
340
361
#[ unstable = "panic semantics are likely to change in the future" ]
341
362
pub fn get ( & self , i : uint ) -> Option < bool > {
342
- assert ! ( i < self . nbits) ;
363
+ if i >= self . nbits {
364
+ return None ;
365
+ }
343
366
let w = i / u32:: BITS ;
344
367
let b = i % u32:: BITS ;
345
368
self . storage . get ( w) . map ( |& block|
@@ -548,7 +571,7 @@ impl Bitv {
548
571
#[ inline]
549
572
#[ unstable = "matches collection reform specification, waiting for dust to settle" ]
550
573
pub fn iter < ' a > ( & ' a self ) -> Bits < ' a > {
551
- Bits { bitv : self , next_idx : 0 , end_idx : self . nbits }
574
+ Bits { bitv : self , next_idx : 0 , end_idx : self . nbits }
552
575
}
553
576
554
577
/// Returns `true` if all bits are 0.
@@ -608,7 +631,7 @@ impl Bitv {
608
631
/// assert_eq!(bv.to_bytes(), vec!(0b00100000, 0b10000000));
609
632
/// ```
610
633
pub fn to_bytes ( & self ) -> Vec < u8 > {
611
- fn bit ( bitv : & Bitv , byte : uint , bit : uint ) -> u8 {
634
+ fn bit ( bitv : & Bitv , byte : uint , bit : uint ) -> u8 {
612
635
let offset = byte * 8 + bit;
613
636
if offset >= bitv. nbits {
614
637
0
@@ -634,7 +657,7 @@ impl Bitv {
634
657
/// Deprecated: Use `iter().collect()`.
635
658
#[ deprecated = "Use `iter().collect()`" ]
636
659
pub fn to_bools ( & self ) -> Vec < bool > {
637
- Vec :: from_fn ( self . nbits , |i| self [ i ] )
660
+ self . iter ( ) . collect ( )
638
661
}
639
662
640
663
/// Compares a `Bitv` to a slice of `bool`s.
@@ -656,12 +679,7 @@ impl Bitv {
656
679
/// ```
657
680
pub fn eq_vec ( & self , v : & [ bool ] ) -> bool {
658
681
assert_eq ! ( self . nbits, v. len( ) ) ;
659
- let mut i = 0 ;
660
- while i < self . nbits {
661
- if self [ i] != v[ i] { return false ; }
662
- i = i + 1 ;
663
- }
664
- true
682
+ iter:: order:: eq ( self . iter ( ) , v. iter ( ) . cloned ( ) )
665
683
}
666
684
667
685
/// Shortens a `Bitv`, dropping excess elements.
@@ -682,6 +700,7 @@ impl Bitv {
682
700
pub fn truncate ( & mut self , len : uint ) {
683
701
if len < self . len ( ) {
684
702
self . nbits = len;
703
+ // This fixes (2).
685
704
self . storage . truncate ( blocks_for_bits ( len) ) ;
686
705
self . fix_last_block ( ) ;
687
706
}
@@ -707,13 +726,9 @@ impl Bitv {
707
726
#[ unstable = "matches collection reform specification, waiting for dust to settle" ]
708
727
pub fn reserve ( & mut self , additional : uint ) {
709
728
let desired_cap = self . len ( ) . checked_add ( additional) . expect ( "capacity overflow" ) ;
710
- match self . storage . len ( ) . checked_mul ( u32:: BITS ) {
711
- None => { } // Vec has more initialized capacity than we can ever use
712
- Some ( initialized_cap) => {
713
- if desired_cap > initialized_cap {
714
- self . storage . reserve ( blocks_for_bits ( desired_cap - initialized_cap) ) ;
715
- }
716
- }
729
+ let storage_len = self . storage . len ( ) ;
730
+ if desired_cap > self . capacity ( ) {
731
+ self . storage . reserve ( blocks_for_bits ( desired_cap) - storage_len) ;
717
732
}
718
733
}
719
734
@@ -741,13 +756,9 @@ impl Bitv {
741
756
#[ unstable = "matches collection reform specification, waiting for dust to settle" ]
742
757
pub fn reserve_exact ( & mut self , additional : uint ) {
743
758
let desired_cap = self . len ( ) . checked_add ( additional) . expect ( "capacity overflow" ) ;
744
- match self . storage . len ( ) . checked_mul ( u32:: BITS ) {
745
- None => { } // Vec has more initialized capacity than we can ever use
746
- Some ( initialized_cap) => {
747
- if desired_cap > initialized_cap {
748
- self . storage . reserve_exact ( blocks_for_bits ( desired_cap - initialized_cap) ) ;
749
- }
750
- }
759
+ let storage_len = self . storage . len ( ) ;
760
+ if desired_cap > self . capacity ( ) {
761
+ self . storage . reserve_exact ( blocks_for_bits ( desired_cap) - storage_len) ;
751
762
}
752
763
}
753
764
@@ -801,8 +812,7 @@ impl Bitv {
801
812
if value {
802
813
self . storage [ old_last_word] |= !mask;
803
814
} else {
804
- // Extra bits are already supposed to be zero by invariant, but play it safe...
805
- self . storage [ old_last_word] &= mask;
815
+ // Extra bits are already zero by invariant.
806
816
}
807
817
}
808
818
@@ -843,9 +853,13 @@ impl Bitv {
843
853
} else {
844
854
let i = self . nbits - 1 ;
845
855
let ret = self [ i] ;
846
- // Second rule of Bitv Club
856
+ // (3)
847
857
self . set ( i, false ) ;
848
858
self . nbits = i;
859
+ if self . nbits % u32:: BITS == 0 {
860
+ // (2)
861
+ self . storage . pop ( ) ;
862
+ }
849
863
Some ( ret)
850
864
}
851
865
}
@@ -864,11 +878,11 @@ impl Bitv {
864
878
/// ```
865
879
#[ unstable = "matches collection reform specification, waiting for dust to settle" ]
866
880
pub fn push ( & mut self , elem : bool ) {
867
- let insert_pos = self . nbits ;
868
- self . nbits = self . nbits . checked_add ( 1 ) . expect ( "Capacity overflow" ) ;
869
- if self . storage . len ( ) . checked_mul ( u32:: BITS ) . unwrap_or ( uint:: MAX ) < self . nbits {
881
+ if self . nbits % u32:: BITS == 0 {
870
882
self . storage . push ( 0 ) ;
871
883
}
884
+ let insert_pos = self . nbits ;
885
+ self . nbits = self . nbits . checked_add ( 1 ) . expect ( "Capacity overflow" ) ;
872
886
self . set ( insert_pos, elem) ;
873
887
}
874
888
@@ -958,7 +972,7 @@ impl Ord for Bitv {
958
972
impl fmt:: Show for Bitv {
959
973
fn fmt ( & self , fmt : & mut fmt:: Formatter ) -> fmt:: Result {
960
974
for bit in self . iter ( ) {
961
- try!( write ! ( fmt, "{}" , if bit { 1 u } else { 0 u } ) ) ;
975
+ try!( write ! ( fmt, "{}" , if bit { 1u32 } else { 0u32 } ) ) ;
962
976
}
963
977
Ok ( ( ) )
964
978
}
0 commit comments