Skip to content

Commit 6a9b5e4

Browse files
committed
Clarify HashMap's capacity handling.
This commit does the following. - Changes the terminology for capacities used within HashMap's code. "Internal capacity" is now consistently "raw capacity", and "usable capacity" is now consistently just "capacity". This makes the code easier to understand. - Reworks capacity and raw capacity computations. Raw capacity computations are now handled in a single place: `DefaultResizePolicy::raw_capacity()`. This function correctly returns zero when given zero, which means that the following cases now result in a capacity of zero when they previously did not. * `Hash{Map,Set}::with_capacity(0)` * `Hash{Map,Set}::with_capacity_and_hasher(0)` * `Hash{Map,Set}::shrink_to_fit()`, when used with a hash map/set whose elements have all been removed - Strengthens the language used in the comments describing the above functions, to make it clearer when they will result in a map/set with a capacity of zero. The new language is based on the language used for the corresponding functions in `Vec`. - Adds tests for the above zero-capacity cases. - Removes `test_resize_policy` because it is no longer useful.
1 parent 8ccfc69 commit 6a9b5e4

File tree

2 files changed

+121
-90
lines changed

2 files changed

+121
-90
lines changed

src/libstd/collections/hash/map.rs

+93-85
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,9 @@ use super::table::BucketState::{
3434
Full,
3535
};
3636

37-
const INITIAL_LOG2_CAP: usize = 5;
38-
const INITIAL_CAPACITY: usize = 1 << INITIAL_LOG2_CAP; // 2^5
37+
const MIN_NONZERO_RAW_CAPACITY: usize = 32; // must be a power of two
3938

40-
/// The default behavior of HashMap implements a load factor of 90.9%.
41-
/// This behavior is characterized by the following condition:
42-
///
43-
/// - if size > 0.909 * capacity: grow the map
39+
/// The default behavior of HashMap implements a maximum load factor of 90.9%.
4440
#[derive(Clone)]
4541
struct DefaultResizePolicy;
4642

@@ -49,40 +45,35 @@ impl DefaultResizePolicy {
4945
DefaultResizePolicy
5046
}
5147

48+
/// A hash map's "capacity" is the number of elements it can hold without
49+
/// being resized. Its "raw capacity" is the number of slots required to
50+
/// provide that capacity, accounting for maximum loading. The raw capacity
51+
/// is always zero or a power of two.
5252
#[inline]
53-
fn min_capacity(&self, usable_size: usize) -> usize {
54-
// Here, we are rephrasing the logic by specifying the lower limit
55-
// on capacity:
56-
//
57-
// - if `cap < size * 1.1`: grow the map
58-
usable_size * 11 / 10
53+
fn raw_capacity(&self, len: usize) -> usize {
54+
if len == 0 {
55+
0
56+
} else {
57+
// 1. Account for loading: `raw_capacity >= len * 1.1`.
58+
// 2. Ensure it is a power of two.
59+
// 3. Ensure it is at least the minimum size.
60+
let mut raw_cap = len * 11 / 10;
61+
assert!(raw_cap >= len, "raw_cap overflow");
62+
raw_cap = raw_cap.checked_next_power_of_two().expect("raw_capacity overflow");
63+
raw_cap = max(MIN_NONZERO_RAW_CAPACITY, raw_cap);
64+
raw_cap
65+
}
5966
}
6067

61-
/// An inverse of `min_capacity`, approximately.
68+
/// The capacity of the given raw capacity.
6269
#[inline]
63-
fn usable_capacity(&self, cap: usize) -> usize {
64-
// As the number of entries approaches usable capacity,
65-
// min_capacity(size) must be smaller than the internal capacity,
66-
// so that the map is not resized:
67-
// `min_capacity(usable_capacity(x)) <= x`.
68-
// The left-hand side can only be smaller due to flooring by integer
69-
// division.
70-
//
70+
fn capacity(&self, raw_cap: usize) -> usize {
7171
// This doesn't have to be checked for overflow since allocation size
7272
// in bytes will overflow earlier than multiplication by 10.
7373
//
7474
// As per https://github.com/rust-lang/rust/pull/30991 this is updated
75-
// to be: (cap * den + den - 1) / num
76-
(cap * 10 + 10 - 1) / 11
77-
}
78-
}
79-
80-
#[test]
81-
fn test_resize_policy() {
82-
let rp = DefaultResizePolicy;
83-
for n in 0..1000 {
84-
assert!(rp.min_capacity(rp.usable_capacity(n)) <= n);
85-
assert!(rp.usable_capacity(rp.min_capacity(n)) <= n);
75+
// to be: (raw_cap * den + den - 1) / num
76+
(raw_cap * 10 + 10 - 1) / 11
8677
}
8778
}
8879

@@ -510,11 +501,11 @@ impl<K, V, S> HashMap<K, V, S>
510501

511502
// The caller should ensure that invariants by Robin Hood Hashing hold.
512503
fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) {
513-
let cap = self.table.capacity();
504+
let raw_cap = self.raw_capacity();
514505
let mut buckets = Bucket::new(&mut self.table, hash);
515506
let ib = buckets.index();
516507

517-
while buckets.index() != ib + cap {
508+
while buckets.index() != ib + raw_cap {
518509
// We don't need to compare hashes for value swap.
519510
// Not even DIBs for Robin Hood.
520511
buckets = match buckets.peek() {
@@ -545,7 +536,10 @@ impl<K: Hash + Eq, V> HashMap<K, V, RandomState> {
545536
Default::default()
546537
}
547538

548-
/// Creates an empty `HashMap` with the given initial capacity.
539+
/// Creates an empty `HashMap` with the specified capacity.
540+
///
541+
/// The hash map will be able to hold at least `capacity` elements without
542+
/// reallocating. If `capacity` is 0, the hash map will not allocate.
549543
///
550544
/// # Examples
551545
///
@@ -593,9 +587,11 @@ impl<K, V, S> HashMap<K, V, S>
593587
}
594588
}
595589

596-
/// Creates an empty `HashMap` with space for at least `capacity`
597-
/// elements, using `hasher` to hash the keys.
590+
/// Creates an empty `HashMap` with the specified capacity, using `hasher`
591+
/// to hash the keys.
598592
///
593+
/// The hash map will be able to hold at least `capacity` elements without
594+
/// reallocating. If `capacity` is 0, the hash map will not allocate.
599595
/// Warning: `hasher` is normally randomly generated, and
600596
/// is designed to allow HashMaps to be resistant to attacks that
601597
/// cause many collisions and very poor performance. Setting it
@@ -616,13 +612,11 @@ impl<K, V, S> HashMap<K, V, S>
616612
pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S)
617613
-> HashMap<K, V, S> {
618614
let resize_policy = DefaultResizePolicy::new();
619-
let min_cap = max(INITIAL_CAPACITY, resize_policy.min_capacity(capacity));
620-
let internal_cap = min_cap.checked_next_power_of_two().expect("capacity overflow");
621-
assert!(internal_cap >= capacity, "capacity overflow");
615+
let raw_cap = resize_policy.raw_capacity(capacity);
622616
HashMap {
623617
hash_builder: hash_builder,
624618
resize_policy: resize_policy,
625-
table: RawTable::new(internal_cap),
619+
table: RawTable::new(raw_cap),
626620
}
627621
}
628622

@@ -647,7 +641,13 @@ impl<K, V, S> HashMap<K, V, S>
647641
#[inline]
648642
#[stable(feature = "rust1", since = "1.0.0")]
649643
pub fn capacity(&self) -> usize {
650-
self.resize_policy.usable_capacity(self.table.capacity())
644+
self.resize_policy.capacity(self.raw_capacity())
645+
}
646+
647+
/// Returns the hash map's raw capacity.
648+
#[inline]
649+
fn raw_capacity(&self) -> usize {
650+
self.table.capacity()
651651
}
652652

653653
/// Reserves capacity for at least `additional` more elements to be inserted
@@ -667,28 +667,23 @@ impl<K, V, S> HashMap<K, V, S>
667667
/// ```
668668
#[stable(feature = "rust1", since = "1.0.0")]
669669
pub fn reserve(&mut self, additional: usize) {
670-
let new_size = self.len().checked_add(additional).expect("capacity overflow");
671-
let min_cap = self.resize_policy.min_capacity(new_size);
672-
673-
// An invalid value shouldn't make us run out of space. This includes
674-
// an overflow check.
675-
assert!(new_size <= min_cap);
676-
677-
if self.table.capacity() < min_cap {
678-
let new_capacity = max(min_cap.next_power_of_two(), INITIAL_CAPACITY);
679-
self.resize(new_capacity);
670+
let min_cap = self.len().checked_add(additional).expect("reserve overflow");
671+
if self.capacity() < min_cap {
672+
let raw_cap = self.resize_policy.raw_capacity(min_cap);
673+
self.resize(raw_cap);
680674
}
681675
}
682676

683-
/// Resizes the internal vectors to a new capacity. It's your responsibility to:
684-
/// 1) Make sure the new capacity is enough for all the elements, accounting
677+
/// Resizes the internal vectors to a new capacity. It's your
678+
/// responsibility to:
679+
/// 1) Ensure `new_raw_cap` is enough for all the elements, accounting
685680
/// for the load factor.
686-
/// 2) Ensure `new_capacity` is a power of two or zero.
687-
fn resize(&mut self, new_capacity: usize) {
688-
assert!(self.table.size() <= new_capacity);
689-
assert!(new_capacity.is_power_of_two() || new_capacity == 0);
681+
/// 2) Ensure `new_raw_cap` is a power of two or zero.
682+
fn resize(&mut self, new_raw_cap: usize) {
683+
assert!(self.table.size() <= new_raw_cap);
684+
assert!(new_raw_cap.is_power_of_two() || new_raw_cap == 0);
690685

691-
let mut old_table = replace(&mut self.table, RawTable::new(new_capacity));
686+
let mut old_table = replace(&mut self.table, RawTable::new(new_raw_cap));
692687
let old_size = old_table.size();
693688

694689
if old_table.capacity() == 0 || old_table.size() == 0 {
@@ -778,14 +773,9 @@ impl<K, V, S> HashMap<K, V, S>
778773
/// ```
779774
#[stable(feature = "rust1", since = "1.0.0")]
780775
pub fn shrink_to_fit(&mut self) {
781-
let min_capacity = self.resize_policy.min_capacity(self.len());
782-
let min_capacity = max(min_capacity.next_power_of_two(), INITIAL_CAPACITY);
783-
784-
// An invalid value shouldn't make us run out of space.
785-
debug_assert!(self.len() <= min_capacity);
786-
787-
if self.table.capacity() != min_capacity {
788-
let old_table = replace(&mut self.table, RawTable::new(min_capacity));
776+
let new_raw_cap = self.resize_policy.raw_capacity(self.len());
777+
if self.raw_capacity() != new_raw_cap {
778+
let old_table = replace(&mut self.table, RawTable::new(new_raw_cap));
789779
let old_size = old_table.size();
790780

791781
// Shrink the table. Naive algorithm for resizing:
@@ -2092,7 +2082,7 @@ mod test_map {
20922082
use rand::{thread_rng, Rng};
20932083

20942084
#[test]
2095-
fn test_create_capacities() {
2085+
fn test_zero_capacities() {
20962086
type HM = HashMap<i32, i32>;
20972087

20982088
let m = HM::new();
@@ -2103,6 +2093,24 @@ mod test_map {
21032093

21042094
let m = HM::with_hasher(RandomState::new());
21052095
assert_eq!(m.capacity(), 0);
2096+
2097+
let m = HM::with_capacity(0);
2098+
assert_eq!(m.capacity(), 0);
2099+
2100+
let m = HM::with_capacity_and_hasher(0, RandomState::new());
2101+
assert_eq!(m.capacity(), 0);
2102+
2103+
let mut m = HM::new();
2104+
m.insert(1, 1);
2105+
m.insert(2, 2);
2106+
m.remove(&1);
2107+
m.remove(&2);
2108+
m.shrink_to_fit();
2109+
assert_eq!(m.capacity(), 0);
2110+
2111+
let mut m = HM::new();
2112+
m.reserve(0);
2113+
assert_eq!(m.capacity(), 0);
21062114
}
21072115

21082116
#[test]
@@ -2562,8 +2570,8 @@ mod test_map {
25622570
assert!(m.is_empty());
25632571

25642572
let mut i = 0;
2565-
let old_cap = m.table.capacity();
2566-
while old_cap == m.table.capacity() {
2573+
let old_raw_cap = m.raw_capacity();
2574+
while old_raw_cap == m.raw_capacity() {
25672575
m.insert(i, i);
25682576
i += 1;
25692577
}
@@ -2577,55 +2585,55 @@ mod test_map {
25772585
let mut m = HashMap::new();
25782586

25792587
assert_eq!(m.len(), 0);
2580-
assert_eq!(m.table.capacity(), 0);
2588+
assert_eq!(m.raw_capacity(), 0);
25812589
assert!(m.is_empty());
25822590

25832591
m.insert(0, 0);
25842592
m.remove(&0);
25852593
assert!(m.is_empty());
2586-
let initial_cap = m.table.capacity();
2587-
m.reserve(initial_cap);
2588-
let cap = m.table.capacity();
2594+
let initial_raw_cap = m.raw_capacity();
2595+
m.reserve(initial_raw_cap);
2596+
let raw_cap = m.raw_capacity();
25892597

2590-
assert_eq!(cap, initial_cap * 2);
2598+
assert_eq!(raw_cap, initial_raw_cap * 2);
25912599

25922600
let mut i = 0;
2593-
for _ in 0..cap * 3 / 4 {
2601+
for _ in 0..raw_cap * 3 / 4 {
25942602
m.insert(i, i);
25952603
i += 1;
25962604
}
25972605
// three quarters full
25982606

25992607
assert_eq!(m.len(), i);
2600-
assert_eq!(m.table.capacity(), cap);
2608+
assert_eq!(m.raw_capacity(), raw_cap);
26012609

2602-
for _ in 0..cap / 4 {
2610+
for _ in 0..raw_cap / 4 {
26032611
m.insert(i, i);
26042612
i += 1;
26052613
}
26062614
// half full
26072615

2608-
let new_cap = m.table.capacity();
2609-
assert_eq!(new_cap, cap * 2);
2616+
let new_raw_cap = m.raw_capacity();
2617+
assert_eq!(new_raw_cap, raw_cap * 2);
26102618

2611-
for _ in 0..cap / 2 - 1 {
2619+
for _ in 0..raw_cap / 2 - 1 {
26122620
i -= 1;
26132621
m.remove(&i);
2614-
assert_eq!(m.table.capacity(), new_cap);
2622+
assert_eq!(m.raw_capacity(), new_raw_cap);
26152623
}
26162624
// A little more than one quarter full.
26172625
m.shrink_to_fit();
2618-
assert_eq!(m.table.capacity(), cap);
2626+
assert_eq!(m.raw_capacity(), raw_cap);
26192627
// again, a little more than half full
2620-
for _ in 0..cap / 2 - 1 {
2628+
for _ in 0..raw_cap / 2 - 1 {
26212629
i -= 1;
26222630
m.remove(&i);
26232631
}
26242632
m.shrink_to_fit();
26252633

26262634
assert_eq!(m.len(), i);
26272635
assert!(!m.is_empty());
2628-
assert_eq!(m.table.capacity(), initial_cap);
2636+
assert_eq!(m.raw_capacity(), initial_raw_cap);
26292637
}
26302638

26312639
#[test]

src/libstd/collections/hash/set.rs

+28-5
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,10 @@ impl<T: Hash + Eq> HashSet<T, RandomState> {
119119
HashSet { map: HashMap::new() }
120120
}
121121

122-
/// Creates an empty HashSet with space for at least `n` elements in
123-
/// the hash table.
122+
/// Creates an empty `HashSet` with the specified capacity.
123+
///
124+
/// The hash set will be able to hold at least `capacity` elements without
125+
/// reallocating. If `capacity` is 0, the hash set will not allocate.
124126
///
125127
/// # Examples
126128
///
@@ -164,8 +166,11 @@ impl<T, S> HashSet<T, S>
164166
HashSet { map: HashMap::with_hasher(hasher) }
165167
}
166168

167-
/// Creates an empty HashSet with space for at least `capacity`
168-
/// elements in the hash table, using `hasher` to hash the keys.
169+
/// Creates an empty HashSet with with the specified capacity, using
170+
/// `hasher` to hash the keys.
171+
///
172+
/// The hash set will be able to hold at least `capacity` elements without
173+
/// reallocating. If `capacity` is 0, the hash set will not allocate.
169174
///
170175
/// Warning: `hasher` is normally randomly generated, and
171176
/// is designed to allow `HashSet`s to be resistant to attacks that
@@ -1068,7 +1073,7 @@ mod test_set {
10681073
use super::super::map::RandomState;
10691074

10701075
#[test]
1071-
fn test_create_capacities() {
1076+
fn test_zero_capacities() {
10721077
type HS = HashSet<i32>;
10731078

10741079
let s = HS::new();
@@ -1079,6 +1084,24 @@ mod test_set {
10791084

10801085
let s = HS::with_hasher(RandomState::new());
10811086
assert_eq!(s.capacity(), 0);
1087+
1088+
let s = HS::with_capacity(0);
1089+
assert_eq!(s.capacity(), 0);
1090+
1091+
let s = HS::with_capacity_and_hasher(0, RandomState::new());
1092+
assert_eq!(s.capacity(), 0);
1093+
1094+
let mut s = HS::new();
1095+
s.insert(1);
1096+
s.insert(2);
1097+
s.remove(&1);
1098+
s.remove(&2);
1099+
s.shrink_to_fit();
1100+
assert_eq!(s.capacity(), 0);
1101+
1102+
let mut s = HS::new();
1103+
s.reserve(0);
1104+
assert_eq!(s.capacity(), 0);
10821105
}
10831106

10841107
#[test]

0 commit comments

Comments
 (0)