Skip to content

Commit cb1531f

Browse files
danielsntedinski
authored andcommitted
Use interned strings to reduce memory usage (rust-lang#617)
1 parent 44e948c commit cb1531f

33 files changed

+726
-400
lines changed

Cargo.lock

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ dependencies = [
3434
"rustc-std-workspace-core",
3535
]
3636

37+
[[package]]
38+
name = "ahash"
39+
version = "0.7.4"
40+
source = "registry+https://github.com/rust-lang/crates.io-index"
41+
checksum = "43bb833f0bf979d8475d38fbf09ed3b8a55e1885fe93ad3f93239fc6a4f17b98"
42+
dependencies = [
43+
"getrandom 0.2.0",
44+
"once_cell",
45+
"version_check",
46+
]
47+
3748
[[package]]
3849
name = "aho-corasick"
3950
version = "0.7.18"
@@ -481,6 +492,7 @@ version = "0.0.0"
481492
dependencies = [
482493
"bitflags",
483494
"cstr",
495+
"lazy_static",
484496
"libc",
485497
"measureme 9.1.2",
486498
"num",
@@ -505,6 +517,7 @@ dependencies = [
505517
"serde_test",
506518
"smallvec",
507519
"snap",
520+
"string-interner",
508521
"tracing",
509522
"vector-map",
510523
]
@@ -1599,6 +1612,7 @@ version = "0.11.0"
15991612
source = "registry+https://github.com/rust-lang/crates.io-index"
16001613
checksum = "362385356d610bd1e5a408ddf8d022041774b683f345a1d2cfcb4f60f8ae2db5"
16011614
dependencies = [
1615+
"ahash",
16021616
"compiler_builtins",
16031617
"rustc-std-workspace-alloc",
16041618
"rustc-std-workspace-core",
@@ -5199,6 +5213,17 @@ dependencies = [
51995213
"rustc-std-workspace-core",
52005214
]
52015215

5216+
[[package]]
5217+
name = "string-interner"
5218+
version = "0.14.0"
5219+
source = "registry+https://github.com/rust-lang/crates.io-index"
5220+
checksum = "91e2531d8525b29b514d25e275a43581320d587b86db302b9a7e464bac579648"
5221+
dependencies = [
5222+
"cfg-if 1.0.0",
5223+
"hashbrown",
5224+
"serde",
5225+
]
5226+
52025227
[[package]]
52035228
name = "string_cache"
52045229
version = "0.8.0"

compiler/cbmc/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@ doctest = false
1212

1313
[dependencies]
1414
bitflags = "1.0"
15+
lazy_static = "1.4.0"
1516
cstr = "0.2"
1617
libc = "0.2"
1718
measureme = "9.1.0"
1819
num = "0.4.0"
1920
serde = {version = "1", features = ["derive"]}
2021
snap = "1"
22+
string-interner = "0.14.0"
2123
tracing = "0.1"
2224
vector-map = "1.0.1"
2325
rustc_middle = { path = "../rustc_middle" }

compiler/cbmc/src/cbmc_string.rs

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0 OR MIT
3+
4+
use lazy_static::lazy_static;
5+
use std::sync::Mutex;
6+
use string_interner::symbol::SymbolU32;
7+
use string_interner::StringInterner;
8+
9+
/// This class implements an interner for Strings.
10+
/// CBMC objects to have a large number of strings which refer to names: symbols, files, etc.
11+
/// These tend to be reused many times, which causes signifcant memory usage.
12+
/// If we intern the strings, each unique string is only allocated once, saving memory.
13+
/// On the stdlib test, interning led to a 15% savings in peak memory usage.
14+
/// Since they're referred to by index, InternedStrings become `Copy`, which simplifies APIs.
15+
/// The downside is that interned strings live the lifetime of the execution.
16+
/// So you should only intern strings that will be used in long-lived data-structures, not temps.
17+
///
18+
/// We use a single global string interner, which is protected by a Mutex (i.e. threadsafe).
19+
/// To create an interned string, either do
20+
/// `let i : InternedString = s.into();` or
21+
/// `let i = s.intern();`
22+
#[derive(Debug, Clone, Hash, Copy, PartialEq, Eq, PartialOrd, Ord)]
23+
pub struct InternedString(SymbolU32);
24+
25+
// Use a `Mutex` to make this thread safe.
26+
lazy_static! {
27+
static ref INTERNER: Mutex<StringInterner> = Mutex::new(StringInterner::default());
28+
}
29+
30+
impl InternedString {
31+
pub fn is_empty(&self) -> bool {
32+
self.map(|s| s.is_empty())
33+
}
34+
35+
pub fn len(&self) -> usize {
36+
self.map(|s| s.len())
37+
}
38+
39+
/// Apply the function `f` to the interned string, represented as an &str.
40+
/// Needed because exporting the &str backing the InternedString is blocked by lifetime rules.
41+
/// Instead, this allows users to operate on the &str when needed.
42+
pub fn map<T, F: FnOnce(&str) -> T>(&self, f: F) -> T {
43+
f(INTERNER.lock().unwrap().resolve(self.0).unwrap())
44+
}
45+
46+
pub fn starts_with(&self, pattern: &str) -> bool {
47+
self.map(|s| s.starts_with(pattern))
48+
}
49+
}
50+
51+
impl std::fmt::Display for InternedString {
52+
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
53+
write!(fmt, "{}", INTERNER.lock().unwrap().resolve(self.0).unwrap())
54+
}
55+
}
56+
57+
impl<T> From<T> for InternedString
58+
where
59+
T: AsRef<str>,
60+
{
61+
fn from(s: T) -> InternedString {
62+
InternedString(INTERNER.lock().unwrap().get_or_intern(s))
63+
}
64+
}
65+
66+
impl<T> PartialEq<T> for InternedString
67+
where
68+
T: AsRef<str>,
69+
{
70+
fn eq(&self, other: &T) -> bool {
71+
INTERNER.lock().unwrap().resolve(self.0).unwrap() == other.as_ref()
72+
}
73+
}
74+
75+
pub trait InternString {
76+
fn intern(self) -> InternedString;
77+
}
78+
79+
impl<T> InternString for T
80+
where
81+
T: Into<InternedString>,
82+
{
83+
fn intern(self) -> InternedString {
84+
self.into()
85+
}
86+
}
87+
pub trait InternStringOption {
88+
fn intern(self) -> Option<InternedString>;
89+
}
90+
91+
impl<T> InternStringOption for Option<T>
92+
where
93+
T: Into<InternedString>,
94+
{
95+
fn intern(self) -> Option<InternedString> {
96+
self.map(|s| s.into())
97+
}
98+
}
99+
100+
#[cfg(test)]
101+
mod tests {
102+
use crate::cbmc_string::InternedString;
103+
104+
#[test]
105+
fn test_string_interner() {
106+
let a: InternedString = "A".into();
107+
let b: InternedString = "B".into();
108+
let aa: InternedString = "A".into();
109+
110+
assert_eq!(a, aa);
111+
assert_ne!(a, b);
112+
assert_ne!(aa, b);
113+
114+
assert_eq!(a, "A");
115+
assert_eq!(b, "B");
116+
assert_eq!(aa, "A");
117+
}
118+
}

compiler/cbmc/src/env.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ pub fn additional_env_symbols() -> Vec<Symbol> {
7070
int_constant("__CPROVER_malloc_failure_mode_return_null", 1),
7171
Symbol::typedef("__CPROVER_size_t", "__CPROVER_size_t", Type::size_t(), Location::none()),
7272
Symbol::static_variable(
73-
"__CPROVER_memory".to_string(),
74-
"__CPROVER_memory".to_string(),
73+
"__CPROVER_memory",
74+
"__CPROVER_memory",
7575
Type::unsigned_int(8).infinite_array_of(),
7676
Location::none(),
7777
)

0 commit comments

Comments
 (0)