Skip to content

Commit e84a383

Browse files
tbu-Gankra
authored andcommitted
Add a new invariant to Bitv
The length of the underlying vector must now be exactly as long as it needs to be.
1 parent 3deb97f commit e84a383

File tree

1 file changed

+84
-70
lines changed

1 file changed

+84
-70
lines changed

src/libcollections/bit.rs

+84-70
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,25 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
// FIXME(Gankro): Bitv and BitvSet are very tightly coupled. Ideally (for maintenance),
12-
// they should be in separate files/modules, with BitvSet only using Bitv's public API.
13-
14-
// First rule of Bitv club: almost everything can actually overflow because we're working with
15-
// bits and not bytes.
16-
//
17-
// Second rule of Bitv club: the last "block" of bits may be partially used. We must ensure that
18-
// those unused bits are zeroed out, as other methods will assume this is the case. It may be
19-
// the case that this isn't a great design, but having "undefined" bits is headache-inducing.
20-
//
21-
// Third rule of Bitv club: BitvSet is fairly tightly coupled to Bitv's implementation details.
22-
// Make sure any changes to Bitv are properly addressed in BitvSet.
11+
// FIXME(Gankro): Bitv and BitvSet are very tightly coupled. Ideally (for
12+
// maintenance), they should be in separate files/modules, with BitvSet only
13+
// using Bitv's public API. This will be hard for performance though, because
14+
// `Bitv` will not want to leak its internal representation while its internal
15+
// representation as `u32`s must be assumed for best performance.
16+
17+
// FIXME(tbu-): `Bitv`'s methods shouldn't be `union`, `intersection`, but
18+
// rather `or` and `and`.
19+
20+
// (1) Be careful, most things can overflow here because the amount of bits in
21+
// memory can overflow `uint`.
22+
// (2) Make sure that the underlying vector has no excess length:
23+
// E. g. `nbits == 16`, `storage.len() == 2` would be excess length,
24+
// because the last word isn't used at all. This is important because some
25+
// methods rely on it (for *CORRECTNESS*).
26+
// (3) Make sure that the unused bits in the last word are zeroed out, again
27+
// other methods rely on it for *CORRECTNESS*.
28+
// (4) `BitvSet` is tightly coupled with `Bitv`, so any changes you make in
29+
// `Bitv` will need to be reflected in `BitvSet`.
2330

2431
//! Collections implemented with bit vectors.
2532
//!
@@ -82,10 +89,10 @@ use core::iter::{Cloned, Chain, Enumerate, Repeat, Skip, Take};
8289
use core::iter;
8390
use core::num::Int;
8491
use core::slice::{Items, MutItems};
85-
use core::{u32, uint};
86-
use std::hash;
92+
use core::{u8, u32, uint};
8793

88-
use vec::Vec;
94+
use hash;
95+
use Vec;
8996

9097
type Blocks<'a> = Cloned<Items<'a, u32>>;
9198
type MutBlocks<'a> = MutItems<'a, u32>;
@@ -181,17 +188,15 @@ fn mask_for_bits(bits: uint) -> u32 {
181188
}
182189

183190
impl Bitv {
184-
/// Applies the given operation to the blocks of self and other, and sets self to
185-
/// be the result.
191+
/// Applies the given operation to the blocks of self and other, and sets
192+
/// self to be the result. This relies on the caller not to corrupt the
193+
/// last word.
186194
#[inline]
187195
fn process<F>(&mut self, other: &Bitv, mut op: F) -> bool where F: FnMut(u32, u32) -> u32 {
188-
let len = other.storage.len();
189-
assert_eq!(self.storage.len(), len);
196+
assert_eq!(self.len(), other.len());
197+
// This could theoretically be a `debug_assert!`.
198+
assert_eq!(self.storage.len(), other.storage.len());
190199
let mut changed = false;
191-
// Notice: `a` is *not* masked here, which is fine as long as
192-
// `op` is a bitwise operation, since any bits that should've
193-
// been masked were fine to change anyway. `b` is masked to
194-
// make sure its unmasked bits do not cause damage.
195200
for (a, b) in self.blocks_mut().zip(other.blocks()) {
196201
let w = op(*a, b);
197202
if *a != w {
@@ -204,21 +209,20 @@ impl Bitv {
204209

205210
/// Iterator over mutable refs to the underlying blocks of data.
206211
fn blocks_mut(&mut self) -> MutBlocks {
207-
let blocks = blocks_for_bits(self.len());
208-
self.storage.slice_to_mut(blocks).iter_mut()
212+
// (2)
213+
self.storage.iter_mut()
209214
}
210215

211216
/// Iterator over the underlying blocks of data
212217
fn blocks(&self) -> Blocks {
213-
let blocks = blocks_for_bits(self.len());
214-
self.storage[..blocks].iter().cloned()
218+
// (2)
219+
self.storage.iter().cloned()
215220
}
216221

217-
/// An operation might screw up the unused bits in the last block of the Bitv.
218-
/// It's assumed to be all 0's. This fixes it up.
222+
/// An operation might screw up the unused bits in the last block of the
223+
/// `Bitv`. As per (3), it's assumed to be all 0s. This method fixes it up.
219224
fn fix_last_block(&mut self) {
220-
let len = self.len();
221-
let extra_bits = len % u32::BITS;
225+
let extra_bits = self.len() % u32::BITS;
222226
if extra_bits > 0 {
223227
let mask = (1 << extra_bits) - 1;
224228
let storage_len = self.storage.len();
@@ -259,7 +263,6 @@ impl Bitv {
259263
storage: Vec::from_elem(nblocks, if bit { !0u32 } else { 0u32 }),
260264
nbits: nbits
261265
};
262-
263266
bitv.fix_last_block();
264267
bitv
265268
}
@@ -295,15 +298,33 @@ impl Bitv {
295298
/// false, false, true, false]));
296299
/// ```
297300
pub fn from_bytes(bytes: &[u8]) -> Bitv {
298-
Bitv::from_fn(bytes.len() * 8, |i| {
299-
let b = bytes[i / 8] as u32;
300-
let offset = i % 8;
301-
b >> (7 - offset) & 1 == 1
302-
})
301+
let len = bytes.len().checked_mul(u8::BITS).expect("capacity overflow");
302+
let mut bitv = Bitv::with_capacity(len);
303+
let complete_words = bytes.len() / 4;
304+
let extra_bytes = bytes.len() % 4;
305+
306+
for i in range(0, complete_words) {
307+
bitv.storage.push(
308+
(bytes[i * 4 + 0] as u32 << 0) |
309+
(bytes[i * 4 + 1] as u32 << 8) |
310+
(bytes[i * 4 + 2] as u32 << 16) |
311+
(bytes[i * 4 + 3] as u32 << 24)
312+
);
313+
}
314+
315+
if extra_bytes > 0 {
316+
let mut last_word = 0u32;
317+
for (i, &byte) in bytes[complete_words*4..].iter().enumerate() {
318+
last_word |= byte as u32 << (i * 8);
319+
}
320+
bitv.storage.push(last_word);
321+
}
322+
323+
bitv
303324
}
304325

305-
/// Creates a `Bitv` of the specified length where the value at each
306-
/// index is `f(index)`.
326+
/// Creates a `Bitv` of the specified length where the value at each index
327+
/// is `f(index)`.
307328
///
308329
/// # Examples
309330
///
@@ -339,7 +360,9 @@ impl Bitv {
339360
#[inline]
340361
#[unstable = "panic semantics are likely to change in the future"]
341362
pub fn get(&self, i: uint) -> Option<bool> {
342-
assert!(i < self.nbits);
363+
if i >= self.nbits {
364+
return None;
365+
}
343366
let w = i / u32::BITS;
344367
let b = i % u32::BITS;
345368
self.storage.get(w).map(|&block|
@@ -548,7 +571,7 @@ impl Bitv {
548571
#[inline]
549572
#[unstable = "matches collection reform specification, waiting for dust to settle"]
550573
pub fn iter<'a>(&'a self) -> Bits<'a> {
551-
Bits {bitv: self, next_idx: 0, end_idx: self.nbits}
574+
Bits { bitv: self, next_idx: 0, end_idx: self.nbits }
552575
}
553576

554577
/// Returns `true` if all bits are 0.
@@ -608,7 +631,7 @@ impl Bitv {
608631
/// assert_eq!(bv.to_bytes(), vec!(0b00100000, 0b10000000));
609632
/// ```
610633
pub fn to_bytes(&self) -> Vec<u8> {
611-
fn bit (bitv: &Bitv, byte: uint, bit: uint) -> u8 {
634+
fn bit(bitv: &Bitv, byte: uint, bit: uint) -> u8 {
612635
let offset = byte * 8 + bit;
613636
if offset >= bitv.nbits {
614637
0
@@ -634,7 +657,7 @@ impl Bitv {
634657
/// Deprecated: Use `iter().collect()`.
635658
#[deprecated = "Use `iter().collect()`"]
636659
pub fn to_bools(&self) -> Vec<bool> {
637-
Vec::from_fn(self.nbits, |i| self[i])
660+
self.iter().collect()
638661
}
639662

640663
/// Compares a `Bitv` to a slice of `bool`s.
@@ -656,12 +679,7 @@ impl Bitv {
656679
/// ```
657680
pub fn eq_vec(&self, v: &[bool]) -> bool {
658681
assert_eq!(self.nbits, v.len());
659-
let mut i = 0;
660-
while i < self.nbits {
661-
if self[i] != v[i] { return false; }
662-
i = i + 1;
663-
}
664-
true
682+
iter::order::eq(self.iter(), v.iter().cloned())
665683
}
666684

667685
/// Shortens a `Bitv`, dropping excess elements.
@@ -682,6 +700,7 @@ impl Bitv {
682700
pub fn truncate(&mut self, len: uint) {
683701
if len < self.len() {
684702
self.nbits = len;
703+
// This fixes (2).
685704
self.storage.truncate(blocks_for_bits(len));
686705
self.fix_last_block();
687706
}
@@ -707,13 +726,9 @@ impl Bitv {
707726
#[unstable = "matches collection reform specification, waiting for dust to settle"]
708727
pub fn reserve(&mut self, additional: uint) {
709728
let desired_cap = self.len().checked_add(additional).expect("capacity overflow");
710-
match self.storage.len().checked_mul(u32::BITS) {
711-
None => {} // Vec has more initialized capacity than we can ever use
712-
Some(initialized_cap) => {
713-
if desired_cap > initialized_cap {
714-
self.storage.reserve(blocks_for_bits(desired_cap - initialized_cap));
715-
}
716-
}
729+
let storage_len = self.storage.len();
730+
if desired_cap > self.capacity() {
731+
self.storage.reserve(blocks_for_bits(desired_cap) - storage_len);
717732
}
718733
}
719734

@@ -741,13 +756,9 @@ impl Bitv {
741756
#[unstable = "matches collection reform specification, waiting for dust to settle"]
742757
pub fn reserve_exact(&mut self, additional: uint) {
743758
let desired_cap = self.len().checked_add(additional).expect("capacity overflow");
744-
match self.storage.len().checked_mul(u32::BITS) {
745-
None => {} // Vec has more initialized capacity than we can ever use
746-
Some(initialized_cap) => {
747-
if desired_cap > initialized_cap {
748-
self.storage.reserve_exact(blocks_for_bits(desired_cap - initialized_cap));
749-
}
750-
}
759+
let storage_len = self.storage.len();
760+
if desired_cap > self.capacity() {
761+
self.storage.reserve_exact(blocks_for_bits(desired_cap) - storage_len);
751762
}
752763
}
753764

@@ -801,8 +812,7 @@ impl Bitv {
801812
if value {
802813
self.storage[old_last_word] |= !mask;
803814
} else {
804-
// Extra bits are already supposed to be zero by invariant, but play it safe...
805-
self.storage[old_last_word] &= mask;
815+
// Extra bits are already zero by invariant.
806816
}
807817
}
808818

@@ -843,9 +853,13 @@ impl Bitv {
843853
} else {
844854
let i = self.nbits - 1;
845855
let ret = self[i];
846-
// Second rule of Bitv Club
856+
// (3)
847857
self.set(i, false);
848858
self.nbits = i;
859+
if self.nbits % u32::BITS == 0 {
860+
// (2)
861+
self.storage.pop();
862+
}
849863
Some(ret)
850864
}
851865
}
@@ -864,11 +878,11 @@ impl Bitv {
864878
/// ```
865879
#[unstable = "matches collection reform specification, waiting for dust to settle"]
866880
pub fn push(&mut self, elem: bool) {
867-
let insert_pos = self.nbits;
868-
self.nbits = self.nbits.checked_add(1).expect("Capacity overflow");
869-
if self.storage.len().checked_mul(u32::BITS).unwrap_or(uint::MAX) < self.nbits {
881+
if self.nbits % u32::BITS == 0 {
870882
self.storage.push(0);
871883
}
884+
let insert_pos = self.nbits;
885+
self.nbits = self.nbits.checked_add(1).expect("Capacity overflow");
872886
self.set(insert_pos, elem);
873887
}
874888

@@ -958,7 +972,7 @@ impl Ord for Bitv {
958972
impl fmt::Show for Bitv {
959973
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
960974
for bit in self.iter() {
961-
try!(write!(fmt, "{}", if bit { 1u } else { 0u }));
975+
try!(write!(fmt, "{}", if bit { 1u32 } else { 0u32 }));
962976
}
963977
Ok(())
964978
}

0 commit comments

Comments
 (0)