Skip to content

Commit 75df685

Browse files
authored
Auto merge of #36766 - nnethercote:hash-span-capacity, r=bluss
Clarify HashMap's capacity handling. HashMap has two notions of "capacity": - "Usable capacity": the number of elements a hash map can hold without resizing. This is the meaning of "capacity" used in HashMap's API, e.g. the `with_capacity()` function. - "Internal capacity": the number of allocated slots. Except for the zero case, it is always larger than the usable capacity (because some slots must be left empty) and is always a power of two. HashMap's code is confusing because it does a poor job of distinguishing these two meanings. I propose using two different terms for these two concepts. Because "capacity" is already used in HashMap's API to mean "usable capacity", I will use a different word for "internal capacity". I propose "span", though I'm happy to consider other names.
2 parents f374565 + 607d297 commit 75df685

File tree

2 files changed

+122
-90
lines changed

2 files changed

+122
-90
lines changed

src/libstd/collections/hash/map.rs

+94-85
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,9 @@ use super::table::BucketState::{
3434
Full,
3535
};
3636

37-
const INITIAL_LOG2_CAP: usize = 5;
38-
const INITIAL_CAPACITY: usize = 1 << INITIAL_LOG2_CAP; // 2^5
37+
const MIN_NONZERO_RAW_CAPACITY: usize = 32; // must be a power of two
3938

40-
/// The default behavior of HashMap implements a load factor of 90.9%.
41-
/// This behavior is characterized by the following condition:
42-
///
43-
/// - if size > 0.909 * capacity: grow the map
39+
/// The default behavior of HashMap implements a maximum load factor of 90.9%.
4440
#[derive(Clone)]
4541
struct DefaultResizePolicy;
4642

@@ -49,40 +45,35 @@ impl DefaultResizePolicy {
4945
DefaultResizePolicy
5046
}
5147

48+
/// A hash map's "capacity" is the number of elements it can hold without
49+
/// being resized. Its "raw capacity" is the number of slots required to
50+
/// provide that capacity, accounting for maximum loading. The raw capacity
51+
/// is always zero or a power of two.
5252
#[inline]
53-
fn min_capacity(&self, usable_size: usize) -> usize {
54-
// Here, we are rephrasing the logic by specifying the lower limit
55-
// on capacity:
56-
//
57-
// - if `cap < size * 1.1`: grow the map
58-
usable_size * 11 / 10
53+
fn raw_capacity(&self, len: usize) -> usize {
54+
if len == 0 {
55+
0
56+
} else {
57+
// 1. Account for loading: `raw_capacity >= len * 1.1`.
58+
// 2. Ensure it is a power of two.
59+
// 3. Ensure it is at least the minimum size.
60+
let mut raw_cap = len * 11 / 10;
61+
assert!(raw_cap >= len, "raw_cap overflow");
62+
raw_cap = raw_cap.checked_next_power_of_two().expect("raw_capacity overflow");
63+
raw_cap = max(MIN_NONZERO_RAW_CAPACITY, raw_cap);
64+
raw_cap
65+
}
5966
}
6067

61-
/// An inverse of `min_capacity`, approximately.
68+
/// The capacity of the given raw capacity.
6269
#[inline]
63-
fn usable_capacity(&self, cap: usize) -> usize {
64-
// As the number of entries approaches usable capacity,
65-
// min_capacity(size) must be smaller than the internal capacity,
66-
// so that the map is not resized:
67-
// `min_capacity(usable_capacity(x)) <= x`.
68-
// The left-hand side can only be smaller due to flooring by integer
69-
// division.
70-
//
70+
fn capacity(&self, raw_cap: usize) -> usize {
7171
// This doesn't have to be checked for overflow since allocation size
7272
// in bytes will overflow earlier than multiplication by 10.
7373
//
7474
// As per https://github.com/rust-lang/rust/pull/30991 this is updated
75-
// to be: (cap * den + den - 1) / num
76-
(cap * 10 + 10 - 1) / 11
77-
}
78-
}
79-
80-
#[test]
81-
fn test_resize_policy() {
82-
let rp = DefaultResizePolicy;
83-
for n in 0..1000 {
84-
assert!(rp.min_capacity(rp.usable_capacity(n)) <= n);
85-
assert!(rp.usable_capacity(rp.min_capacity(n)) <= n);
75+
// to be: (raw_cap * den + den - 1) / num
76+
(raw_cap * 10 + 10 - 1) / 11
8677
}
8778
}
8879

@@ -540,11 +531,11 @@ impl<K, V, S> HashMap<K, V, S>
540531

541532
// The caller should ensure that invariants by Robin Hood Hashing hold.
542533
fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) {
543-
let cap = self.table.capacity();
534+
let raw_cap = self.raw_capacity();
544535
let mut buckets = Bucket::new(&mut self.table, hash);
545536
let ib = buckets.index();
546537

547-
while buckets.index() != ib + cap {
538+
while buckets.index() != ib + raw_cap {
548539
// We don't need to compare hashes for value swap.
549540
// Not even DIBs for Robin Hood.
550541
buckets = match buckets.peek() {
@@ -575,7 +566,10 @@ impl<K: Hash + Eq, V> HashMap<K, V, RandomState> {
575566
Default::default()
576567
}
577568

578-
/// Creates an empty `HashMap` with the given initial capacity.
569+
/// Creates an empty `HashMap` with the specified capacity.
570+
///
571+
/// The hash map will be able to hold at least `capacity` elements without
572+
/// reallocating. If `capacity` is 0, the hash map will not allocate.
579573
///
580574
/// # Examples
581575
///
@@ -623,9 +617,11 @@ impl<K, V, S> HashMap<K, V, S>
623617
}
624618
}
625619

626-
/// Creates an empty `HashMap` with space for at least `capacity`
627-
/// elements, using `hasher` to hash the keys.
620+
/// Creates an empty `HashMap` with the specified capacity, using `hasher`
621+
/// to hash the keys.
628622
///
623+
/// The hash map will be able to hold at least `capacity` elements without
624+
/// reallocating. If `capacity` is 0, the hash map will not allocate.
629625
/// Warning: `hasher` is normally randomly generated, and
630626
/// is designed to allow HashMaps to be resistant to attacks that
631627
/// cause many collisions and very poor performance. Setting it
@@ -646,13 +642,11 @@ impl<K, V, S> HashMap<K, V, S>
646642
pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S)
647643
-> HashMap<K, V, S> {
648644
let resize_policy = DefaultResizePolicy::new();
649-
let min_cap = max(INITIAL_CAPACITY, resize_policy.min_capacity(capacity));
650-
let internal_cap = min_cap.checked_next_power_of_two().expect("capacity overflow");
651-
assert!(internal_cap >= capacity, "capacity overflow");
645+
let raw_cap = resize_policy.raw_capacity(capacity);
652646
HashMap {
653647
hash_builder: hash_builder,
654648
resize_policy: resize_policy,
655-
table: RawTable::new(internal_cap),
649+
table: RawTable::new(raw_cap),
656650
}
657651
}
658652

@@ -677,7 +671,13 @@ impl<K, V, S> HashMap<K, V, S>
677671
#[inline]
678672
#[stable(feature = "rust1", since = "1.0.0")]
679673
pub fn capacity(&self) -> usize {
680-
self.resize_policy.usable_capacity(self.table.capacity())
674+
self.resize_policy.capacity(self.raw_capacity())
675+
}
676+
677+
/// Returns the hash map's raw capacity.
678+
#[inline]
679+
fn raw_capacity(&self) -> usize {
680+
self.table.capacity()
681681
}
682682

683683
/// Reserves capacity for at least `additional` more elements to be inserted
@@ -697,28 +697,24 @@ impl<K, V, S> HashMap<K, V, S>
697697
/// ```
698698
#[stable(feature = "rust1", since = "1.0.0")]
699699
pub fn reserve(&mut self, additional: usize) {
700-
let new_size = self.len().checked_add(additional).expect("capacity overflow");
701-
let min_cap = self.resize_policy.min_capacity(new_size);
702-
703-
// An invalid value shouldn't make us run out of space. This includes
704-
// an overflow check.
705-
assert!(new_size <= min_cap);
706-
707-
if self.table.capacity() < min_cap {
708-
let new_capacity = max(min_cap.next_power_of_two(), INITIAL_CAPACITY);
709-
self.resize(new_capacity);
700+
let remaining = self.capacity() - self.len(); // this can't overflow
701+
if remaining < additional {
702+
let min_cap = self.len().checked_add(additional).expect("reserve overflow");
703+
let raw_cap = self.resize_policy.raw_capacity(min_cap);
704+
self.resize(raw_cap);
710705
}
711706
}
712707

713-
/// Resizes the internal vectors to a new capacity. It's your responsibility to:
714-
/// 1) Make sure the new capacity is enough for all the elements, accounting
708+
/// Resizes the internal vectors to a new capacity. It's your
709+
/// responsibility to:
710+
/// 1) Ensure `new_raw_cap` is enough for all the elements, accounting
715711
/// for the load factor.
716-
/// 2) Ensure `new_capacity` is a power of two or zero.
717-
fn resize(&mut self, new_capacity: usize) {
718-
assert!(self.table.size() <= new_capacity);
719-
assert!(new_capacity.is_power_of_two() || new_capacity == 0);
712+
/// 2) Ensure `new_raw_cap` is a power of two or zero.
713+
fn resize(&mut self, new_raw_cap: usize) {
714+
assert!(self.table.size() <= new_raw_cap);
715+
assert!(new_raw_cap.is_power_of_two() || new_raw_cap == 0);
720716

721-
let mut old_table = replace(&mut self.table, RawTable::new(new_capacity));
717+
let mut old_table = replace(&mut self.table, RawTable::new(new_raw_cap));
722718
let old_size = old_table.size();
723719

724720
if old_table.capacity() == 0 || old_table.size() == 0 {
@@ -808,14 +804,9 @@ impl<K, V, S> HashMap<K, V, S>
808804
/// ```
809805
#[stable(feature = "rust1", since = "1.0.0")]
810806
pub fn shrink_to_fit(&mut self) {
811-
let min_capacity = self.resize_policy.min_capacity(self.len());
812-
let min_capacity = max(min_capacity.next_power_of_two(), INITIAL_CAPACITY);
813-
814-
// An invalid value shouldn't make us run out of space.
815-
debug_assert!(self.len() <= min_capacity);
816-
817-
if self.table.capacity() != min_capacity {
818-
let old_table = replace(&mut self.table, RawTable::new(min_capacity));
807+
let new_raw_cap = self.resize_policy.raw_capacity(self.len());
808+
if self.raw_capacity() != new_raw_cap {
809+
let old_table = replace(&mut self.table, RawTable::new(new_raw_cap));
819810
let old_size = old_table.size();
820811

821812
// Shrink the table. Naive algorithm for resizing:
@@ -2122,7 +2113,7 @@ mod test_map {
21222113
use rand::{thread_rng, Rng};
21232114

21242115
#[test]
2125-
fn test_create_capacities() {
2116+
fn test_zero_capacities() {
21262117
type HM = HashMap<i32, i32>;
21272118

21282119
let m = HM::new();
@@ -2133,6 +2124,24 @@ mod test_map {
21332124

21342125
let m = HM::with_hasher(RandomState::new());
21352126
assert_eq!(m.capacity(), 0);
2127+
2128+
let m = HM::with_capacity(0);
2129+
assert_eq!(m.capacity(), 0);
2130+
2131+
let m = HM::with_capacity_and_hasher(0, RandomState::new());
2132+
assert_eq!(m.capacity(), 0);
2133+
2134+
let mut m = HM::new();
2135+
m.insert(1, 1);
2136+
m.insert(2, 2);
2137+
m.remove(&1);
2138+
m.remove(&2);
2139+
m.shrink_to_fit();
2140+
assert_eq!(m.capacity(), 0);
2141+
2142+
let mut m = HM::new();
2143+
m.reserve(0);
2144+
assert_eq!(m.capacity(), 0);
21362145
}
21372146

21382147
#[test]
@@ -2592,8 +2601,8 @@ mod test_map {
25922601
assert!(m.is_empty());
25932602

25942603
let mut i = 0;
2595-
let old_cap = m.table.capacity();
2596-
while old_cap == m.table.capacity() {
2604+
let old_raw_cap = m.raw_capacity();
2605+
while old_raw_cap == m.raw_capacity() {
25972606
m.insert(i, i);
25982607
i += 1;
25992608
}
@@ -2607,55 +2616,55 @@ mod test_map {
26072616
let mut m = HashMap::new();
26082617

26092618
assert_eq!(m.len(), 0);
2610-
assert_eq!(m.table.capacity(), 0);
2619+
assert_eq!(m.raw_capacity(), 0);
26112620
assert!(m.is_empty());
26122621

26132622
m.insert(0, 0);
26142623
m.remove(&0);
26152624
assert!(m.is_empty());
2616-
let initial_cap = m.table.capacity();
2617-
m.reserve(initial_cap);
2618-
let cap = m.table.capacity();
2625+
let initial_raw_cap = m.raw_capacity();
2626+
m.reserve(initial_raw_cap);
2627+
let raw_cap = m.raw_capacity();
26192628

2620-
assert_eq!(cap, initial_cap * 2);
2629+
assert_eq!(raw_cap, initial_raw_cap * 2);
26212630

26222631
let mut i = 0;
2623-
for _ in 0..cap * 3 / 4 {
2632+
for _ in 0..raw_cap * 3 / 4 {
26242633
m.insert(i, i);
26252634
i += 1;
26262635
}
26272636
// three quarters full
26282637

26292638
assert_eq!(m.len(), i);
2630-
assert_eq!(m.table.capacity(), cap);
2639+
assert_eq!(m.raw_capacity(), raw_cap);
26312640

2632-
for _ in 0..cap / 4 {
2641+
for _ in 0..raw_cap / 4 {
26332642
m.insert(i, i);
26342643
i += 1;
26352644
}
26362645
// half full
26372646

2638-
let new_cap = m.table.capacity();
2639-
assert_eq!(new_cap, cap * 2);
2647+
let new_raw_cap = m.raw_capacity();
2648+
assert_eq!(new_raw_cap, raw_cap * 2);
26402649

2641-
for _ in 0..cap / 2 - 1 {
2650+
for _ in 0..raw_cap / 2 - 1 {
26422651
i -= 1;
26432652
m.remove(&i);
2644-
assert_eq!(m.table.capacity(), new_cap);
2653+
assert_eq!(m.raw_capacity(), new_raw_cap);
26452654
}
26462655
// A little more than one quarter full.
26472656
m.shrink_to_fit();
2648-
assert_eq!(m.table.capacity(), cap);
2657+
assert_eq!(m.raw_capacity(), raw_cap);
26492658
// again, a little more than half full
2650-
for _ in 0..cap / 2 - 1 {
2659+
for _ in 0..raw_cap / 2 - 1 {
26512660
i -= 1;
26522661
m.remove(&i);
26532662
}
26542663
m.shrink_to_fit();
26552664

26562665
assert_eq!(m.len(), i);
26572666
assert!(!m.is_empty());
2658-
assert_eq!(m.table.capacity(), initial_cap);
2667+
assert_eq!(m.raw_capacity(), initial_raw_cap);
26592668
}
26602669

26612670
#[test]

src/libstd/collections/hash/set.rs

+28-5
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,10 @@ impl<T: Hash + Eq> HashSet<T, RandomState> {
133133
HashSet { map: HashMap::new() }
134134
}
135135

136-
/// Creates an empty HashSet with space for at least `n` elements in
137-
/// the hash table.
136+
/// Creates an empty `HashSet` with the specified capacity.
137+
///
138+
/// The hash set will be able to hold at least `capacity` elements without
139+
/// reallocating. If `capacity` is 0, the hash set will not allocate.
138140
///
139141
/// # Examples
140142
///
@@ -178,8 +180,11 @@ impl<T, S> HashSet<T, S>
178180
HashSet { map: HashMap::with_hasher(hasher) }
179181
}
180182

181-
/// Creates an empty HashSet with space for at least `capacity`
182-
/// elements in the hash table, using `hasher` to hash the keys.
183+
/// Creates an empty HashSet with with the specified capacity, using
184+
/// `hasher` to hash the keys.
185+
///
186+
/// The hash set will be able to hold at least `capacity` elements without
187+
/// reallocating. If `capacity` is 0, the hash set will not allocate.
183188
///
184189
/// Warning: `hasher` is normally randomly generated, and
185190
/// is designed to allow `HashSet`s to be resistant to attacks that
@@ -1082,7 +1087,7 @@ mod test_set {
10821087
use super::super::map::RandomState;
10831088

10841089
#[test]
1085-
fn test_create_capacities() {
1090+
fn test_zero_capacities() {
10861091
type HS = HashSet<i32>;
10871092

10881093
let s = HS::new();
@@ -1093,6 +1098,24 @@ mod test_set {
10931098

10941099
let s = HS::with_hasher(RandomState::new());
10951100
assert_eq!(s.capacity(), 0);
1101+
1102+
let s = HS::with_capacity(0);
1103+
assert_eq!(s.capacity(), 0);
1104+
1105+
let s = HS::with_capacity_and_hasher(0, RandomState::new());
1106+
assert_eq!(s.capacity(), 0);
1107+
1108+
let mut s = HS::new();
1109+
s.insert(1);
1110+
s.insert(2);
1111+
s.remove(&1);
1112+
s.remove(&2);
1113+
s.shrink_to_fit();
1114+
assert_eq!(s.capacity(), 0);
1115+
1116+
let mut s = HS::new();
1117+
s.reserve(0);
1118+
assert_eq!(s.capacity(), 0);
10961119
}
10971120

10981121
#[test]

0 commit comments

Comments
 (0)