Skip to content

Commit 1d15173

Browse files
committed
fix!: Avoid lifetime in Outcome.
Otherwise it's not possible to keep allocations for matches, while also adjusting pattern lists due to stack changes. On the bright side, this adds some more deduplication to the implementation, at the cost of dealing with some hashmaps that are updated when matching.
1 parent 1eac372 commit 1d15173

File tree

6 files changed

+163
-55
lines changed

6 files changed

+163
-55
lines changed

gix-attributes/src/search/attributes.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ impl Search {
8181
&'a self,
8282
relative_path: impl Into<&'b BStr>,
8383
case: gix_glob::pattern::Case,
84-
out: &mut Outcome<'a>,
84+
out: &mut Outcome,
8585
) -> bool {
8686
let relative_path = relative_path.into();
8787
let basename_pos = relative_path.rfind(b"/").map(|p| p + 1);
@@ -174,12 +174,12 @@ fn macro_mode() -> gix_glob::pattern::Mode {
174174
/// `is_dir` is true if `relative_path` is a directory.
175175
/// Return `true` if at least one pattern matched.
176176
#[allow(unused_variables)]
177-
fn pattern_matching_relative_path<'a>(
178-
list: &'a gix_glob::search::pattern::List<Attributes>,
177+
fn pattern_matching_relative_path(
178+
list: &gix_glob::search::pattern::List<Attributes>,
179179
relative_path: &BStr,
180180
basename_pos: Option<usize>,
181181
case: gix_glob::pattern::Case,
182-
out: &mut Outcome<'a>,
182+
out: &mut Outcome,
183183
) -> bool {
184184
let (relative_path, basename_start_pos) =
185185
match list.strip_base_handle_recompute_basename_pos(relative_path, basename_pos, case) {
@@ -207,7 +207,7 @@ fn pattern_matching_relative_path<'a>(
207207
if out.has_unspecified_attributes(attrs.iter().map(|attr| attr.id))
208208
&& pattern.matches_repo_relative_path(relative_path, basename_start_pos, None, case)
209209
{
210-
let all_filled = out.fill_attributes(attrs.iter(), pattern, list.source.as_deref(), *sequence_number);
210+
let all_filled = out.fill_attributes(attrs.iter(), pattern, list.source.as_ref(), *sequence_number);
211211
if all_filled {
212212
break 'outer;
213213
}

gix-attributes/src/search/mod.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ use std::collections::HashMap;
33
use kstring::KString;
44
use smallvec::SmallVec;
55

6-
use crate::Assignment;
6+
use crate::{Assignment, AssignmentRef};
77

88
mod attributes;
99
mod outcome;
10+
mod refmap;
11+
pub(crate) use refmap::RefMap;
1012

1113
/// A typically sized list of attributes.
1214
pub type Assignments = SmallVec<[TrackedAssignment; AVERAGE_NUM_ATTRS]>;
@@ -49,7 +51,7 @@ pub struct Match<'a> {
4951
/// The glob pattern itself, like `/target/*`.
5052
pub pattern: &'a gix_glob::Pattern,
5153
/// The key=value pair of the attribute that matched at the pattern. There can be multiple matches per pattern.
52-
pub assignment: Assignment,
54+
pub assignment: AssignmentRef<'a>,
5355
/// Additional information about the kind of match.
5456
pub kind: MatchKind,
5557
/// Information about the location of the match.
@@ -88,24 +90,30 @@ pub enum MatchKind {
8890

8991
/// The result of a search, containing all matching attributes.
9092
#[derive(Default)]
91-
pub struct Outcome<'pattern> {
93+
pub struct Outcome {
9294
/// The list of all available attributes, by ascending order. Each slots index corresponds to an attribute with that order, i.e.
9395
/// `arr[attr.id] = <attr info>`.
9496
///
9597
/// This list needs to be up-to-date with the search group so all possible attribute names are known.
96-
matches_by_id: Vec<Slot<'pattern>>,
98+
matches_by_id: Vec<Slot>,
9799
/// A stack of attributes to use for processing attributes of matched patterns and for resolving their macros.
98100
attrs_stack: SmallVec<[(AttributeId, Assignment, Option<AttributeId>); 8]>,
99101
/// A set of attributes we should limit ourselves to, or empty if we should fill in all attributes, made of
100102
selected: SmallVec<[(KString, Option<AttributeId>); AVERAGE_NUM_ATTRS]>,
103+
/// storage for all patterns we have matched so far (in order to avoid referencing them, we copy them, but only once).
104+
patterns: RefMap<gix_glob::Pattern>,
105+
/// storage for all assignments we have matched so far (in order to avoid referencing them, we copy them, but only once).
106+
assignments: RefMap<Assignment>,
107+
/// storage for all source paths we have matched so far (in order to avoid referencing them, we copy them, but only once).
108+
source_paths: RefMap<std::path::PathBuf>,
101109
/// The amount of attributes that still need to be set, or `None` if this outcome is consumed which means it
102110
/// needs to be re-initialized.
103111
remaining: Option<usize>,
104112
}
105113

106114
#[derive(Default, Clone)]
107-
struct Slot<'pattern> {
108-
r#match: Option<Match<'pattern>>,
115+
struct Slot {
116+
r#match: Option<outcome::Match>,
109117
/// A list of all assignments, being an empty list for non-macro attributes, or all assignments (with order) for macros.
110118
/// It's used to resolve macros.
111119
macro_attributes: Assignments,

gix-attributes/src/search/outcome.rs

Lines changed: 85 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
1-
use std::{borrow::Cow, path::Path};
2-
31
use bstr::{BString, ByteSlice};
42
use gix_glob::Pattern;
53
use kstring::{KString, KStringRef};
64

5+
use crate::search::refmap::RefMapKey;
76
use crate::{
87
search::{
9-
Assignments, AttributeId, Attributes, Match, MatchKind, MatchLocation, Metadata, MetadataCollection, Outcome,
10-
TrackedAssignment, Value,
8+
Assignments, AttributeId, Attributes, MatchKind, Metadata, MetadataCollection, Outcome, TrackedAssignment,
9+
Value,
1110
},
12-
Assignment, NameRef, State,
11+
AssignmentRef, NameRef, StateRef,
1312
};
1413

1514
/// Initialization
16-
impl<'pattern> Outcome<'pattern> {
15+
impl Outcome {
1716
/// Initialize this instance to collect outcomes for all names in `collection`, which represents all possible attributes
1817
/// or macros we may visit, and [`reset`][Self::reset()] it unconditionally.
1918
///
@@ -74,7 +73,7 @@ impl<'pattern> Outcome<'pattern> {
7473
}
7574

7675
/// Access
77-
impl<'pattern> Outcome<'pattern> {
76+
impl Outcome {
7877
/// Return an iterator over all filled attributes we were initialized with.
7978
///
8079
/// ### Note
@@ -88,42 +87,44 @@ impl<'pattern> Outcome<'pattern> {
8887
/// the same as what `git` provides.
8988
/// Ours is in order of declaration, whereas `git` seems to list macros first somehow. Since the values are the same, this
9089
/// shouldn't be an issue.
91-
pub fn iter<'a>(&'a self) -> impl Iterator<Item = &'a Match<'pattern>> + 'a {
92-
self.matches_by_id.iter().filter_map(|item| item.r#match.as_ref())
90+
pub fn iter(&self) -> impl Iterator<Item = crate::search::Match<'_>> {
91+
self.matches_by_id
92+
.iter()
93+
.filter_map(|item| item.r#match.as_ref().map(|m| m.to_outer(self)))
9394
}
9495

9596
/// Iterate over all matches of the attribute selection in their original order.
96-
pub fn iter_selected<'a>(&'a self) -> impl Iterator<Item = Cow<'a, Match<'pattern>>> + 'a {
97+
///
98+
/// This only yields values if this instance was initialized with [`Outcome::initialize_with_selection()`].
99+
pub fn iter_selected(&self) -> impl Iterator<Item = crate::search::Match<'_>> {
97100
static DUMMY: Pattern = Pattern {
98101
text: BString::new(Vec::new()),
99102
mode: gix_glob::pattern::Mode::empty(),
100103
first_wildcard_pos: None,
101104
};
102105
self.selected.iter().map(|(name, id)| {
103-
id.and_then(|id| self.matches_by_id[id.0].r#match.as_ref())
104-
.map(Cow::Borrowed)
105-
.unwrap_or_else(|| {
106-
Cow::Owned(Match {
107-
pattern: &DUMMY,
108-
assignment: Assignment {
109-
name: NameRef::try_from(name.as_bytes().as_bstr())
110-
.unwrap_or_else(|_| NameRef("invalid".into()))
111-
.to_owned(),
112-
state: State::Unspecified,
113-
},
114-
kind: MatchKind::Attribute { macro_id: None },
115-
location: MatchLocation {
116-
source: None,
117-
sequence_number: 0,
118-
},
119-
})
106+
id.and_then(|id| self.matches_by_id[id.0].r#match.as_ref().map(|m| m.to_outer(self)))
107+
.unwrap_or_else(|| crate::search::Match {
108+
pattern: &DUMMY,
109+
assignment: AssignmentRef {
110+
name: NameRef::try_from(name.as_bytes().as_bstr())
111+
.unwrap_or_else(|_| NameRef("invalid".into())),
112+
state: StateRef::Unspecified,
113+
},
114+
kind: MatchKind::Attribute { macro_id: None },
115+
location: crate::search::MatchLocation {
116+
source: None,
117+
sequence_number: 0,
118+
},
120119
})
121120
})
122121
}
123122

124123
/// Obtain a match by the order of its attribute, if the order exists in our initialized attribute list and there was a match.
125-
pub fn match_by_id(&self, id: AttributeId) -> Option<&Match<'pattern>> {
126-
self.matches_by_id.get(id.0).and_then(|m| m.r#match.as_ref())
124+
pub fn match_by_id(&self, id: AttributeId) -> Option<crate::search::Match<'_>> {
125+
self.matches_by_id
126+
.get(id.0)
127+
.and_then(|m| m.r#match.as_ref().map(|m| m.to_outer(self)))
127128
}
128129

129130
/// Return `true` if there is nothing more to be done as all attributes were filled.
@@ -133,16 +134,16 @@ impl<'pattern> Outcome<'pattern> {
133134
}
134135

135136
/// Mutation
136-
impl<'pattern> Outcome<'pattern> {
137+
impl Outcome {
137138
/// Fill all `attrs` and resolve them recursively if they are macros. Return `true` if there is no attribute left to be resolved and
138139
/// we are totally done.
139140
/// `pattern` is what matched a patch and is passed for contextual information,
140141
/// providing `sequence_number` and `source` as well.
141142
pub(crate) fn fill_attributes<'a>(
142143
&mut self,
143144
attrs: impl Iterator<Item = &'a TrackedAssignment>,
144-
pattern: &'pattern gix_glob::Pattern,
145-
source: Option<&'pattern Path>,
145+
pattern: &gix_glob::Pattern,
146+
source: Option<&std::path::PathBuf>,
146147
sequence_number: usize,
147148
) -> bool {
148149
self.attrs_stack.extend(attrs.filter_map(|attr| {
@@ -160,8 +161,8 @@ impl<'pattern> Outcome<'pattern> {
160161
let is_macro = !slot.macro_attributes.is_empty();
161162

162163
slot.r#match = Some(Match {
163-
pattern,
164-
assignment: assignment.to_owned(),
164+
pattern: self.patterns.insert(pattern),
165+
assignment: self.assignments.insert_owned(assignment),
165166
kind: if is_macro {
166167
MatchKind::Macro {
167168
parent_macro_id: parent_order,
@@ -170,7 +171,7 @@ impl<'pattern> Outcome<'pattern> {
170171
MatchKind::Attribute { macro_id: parent_order }
171172
},
172173
location: MatchLocation {
173-
source,
174+
source: source.map(|path| self.source_paths.insert(path)),
174175
sequence_number,
175176
},
176177
});
@@ -193,7 +194,7 @@ impl<'pattern> Outcome<'pattern> {
193194
}
194195
}
195196

196-
impl<'attr> Outcome<'attr> {
197+
impl Outcome {
197198
/// Given a list of `attrs` by order, return true if at least one of them is not set
198199
pub(crate) fn has_unspecified_attributes(&self, mut attrs: impl Iterator<Item = AttributeId>) -> bool {
199200
attrs.any(|order| self.matches_by_id[order.0].r#match.is_none())
@@ -314,3 +315,51 @@ impl MatchKind {
314315
}
315316
}
316317
}
318+
319+
/// A version of `Match` without references.
320+
#[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
321+
pub struct Match {
322+
/// The glob pattern itself, like `/target/*`.
323+
pub pattern: RefMapKey,
324+
/// The key=value pair of the attribute that matched at the pattern. There can be multiple matches per pattern.
325+
pub assignment: RefMapKey,
326+
/// Additional information about the kind of match.
327+
pub kind: MatchKind,
328+
/// Information about the location of the match.
329+
pub location: MatchLocation,
330+
}
331+
332+
impl Match {
333+
fn to_outer<'a>(&self, out: &'a Outcome) -> crate::search::Match<'a> {
334+
crate::search::Match {
335+
pattern: out.patterns.resolve(self.pattern).expect("pattern still present"),
336+
assignment: out
337+
.assignments
338+
.resolve(self.assignment)
339+
.expect("assignment present")
340+
.as_ref(),
341+
kind: self.kind,
342+
location: self.location.to_outer(out),
343+
}
344+
}
345+
}
346+
347+
/// A version of `MatchLocation` without references.
348+
#[derive(Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
349+
pub struct MatchLocation {
350+
/// The path to the source from which the pattern was loaded, or `None` if it was specified by other means.
351+
pub source: Option<RefMapKey>,
352+
/// The line at which the pattern was found in its `source` file, or the occurrence in which it was provided.
353+
pub sequence_number: usize,
354+
}
355+
356+
impl MatchLocation {
357+
fn to_outer<'a>(&self, out: &'a Outcome) -> crate::search::MatchLocation<'a> {
358+
crate::search::MatchLocation {
359+
source: self
360+
.source
361+
.and_then(|source| out.source_paths.resolve(source).map(|p| p.as_path())),
362+
sequence_number: self.sequence_number,
363+
}
364+
}
365+
}

gix-attributes/src/search/refmap.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
//! A utility to store objects by identity, which deduplicates them while avoiding lifetimes.
2+
//!
3+
//! We chose to use hashing/identity over pointers as it's possible that different objects end up in the same memory location,
4+
//! which would create obscure bugs. The same could happen with hash collisions, but they these are designed to be less likely.
5+
use std::collections::btree_map::Entry;
6+
use std::collections::hash_map::DefaultHasher;
7+
use std::collections::BTreeMap;
8+
use std::hash::{Hash, Hasher};
9+
10+
pub(crate) type RefMapKey = u64;
11+
pub(crate) struct RefMap<T>(BTreeMap<RefMapKey, T>);
12+
13+
impl<T> Default for RefMap<T> {
14+
fn default() -> Self {
15+
RefMap(Default::default())
16+
}
17+
}
18+
19+
impl<T> RefMap<T>
20+
where
21+
T: Hash + Clone,
22+
{
23+
pub(crate) fn insert(&mut self, value: &T) -> RefMapKey {
24+
let mut s = DefaultHasher::new();
25+
value.hash(&mut s);
26+
let key = s.finish();
27+
match self.0.entry(key) {
28+
Entry::Vacant(e) => {
29+
e.insert(value.clone());
30+
key
31+
}
32+
Entry::Occupied(_) => key,
33+
}
34+
}
35+
36+
pub(crate) fn insert_owned(&mut self, value: T) -> RefMapKey {
37+
let mut s = DefaultHasher::new();
38+
value.hash(&mut s);
39+
let key = s.finish();
40+
match self.0.entry(key) {
41+
Entry::Vacant(e) => {
42+
e.insert(value);
43+
key
44+
}
45+
Entry::Occupied(_) => key,
46+
}
47+
}
48+
49+
pub(crate) fn resolve(&self, key: RefMapKey) -> Option<&T> {
50+
self.0.get(&key)
51+
}
52+
}

gix-attributes/src/state.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ impl Value {
6464
}
6565

6666
/// Access
67-
impl State {
67+
impl StateRef<'_> {
6868
/// Return `true` if the associated attribute was set to be unspecified using the `!attr` prefix or it wasn't mentioned.
6969
pub fn is_unspecified(&self) -> bool {
70-
matches!(self, State::Unspecified)
70+
matches!(self, StateRef::Unspecified)
7171
}
7272
}
7373

gix-attributes/tests/search/mod.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ fn baseline() -> crate::Result {
103103
assert_references(&actual);
104104
let actual: Vec<_> = actual
105105
.iter()
106-
.filter_map(|m| (!m.assignment.state.is_unspecified()).then(|| m.assignment.as_ref()))
106+
.filter_map(|m| (!m.assignment.state.is_unspecified()).then_some(m.assignment))
107107
.collect();
108108
assert_eq!(actual, expected, "we have the same matches: {rela_path:?}");
109109
assert_ne!(has_match, actual.is_empty());
@@ -196,7 +196,7 @@ fn all_attributes_are_listed_in_declaration_order() -> crate::Result {
196196
out.reset();
197197
group.pattern_matching_relative_path(rela_path, Case::Sensitive, &mut out);
198198
assert_references(&out);
199-
let actual: Vec<_> = out.iter().map(|m| m.assignment.as_ref()).collect();
199+
let actual: Vec<_> = out.iter().map(|m| m.assignment).collect();
200200
assert_eq!(
201201
by_name(actual),
202202
by_name(expected),
@@ -226,10 +226,9 @@ fn given_attributes_are_made_available_in_given_order() -> crate::Result {
226226
out.reset();
227227
group.pattern_matching_relative_path(rela_path, Case::Sensitive, &mut out);
228228
assert_references(&out);
229-
let actual: Vec<_> = out.iter_selected().map(|m| m.into_owned().assignment).collect();
229+
let actual: Vec<_> = out.iter_selected().map(|m| m.assignment).collect();
230230
assert_eq!(
231-
actual.iter().map(|a| a.as_ref()).collect::<Vec<_>>(),
232-
expected,
231+
actual, expected,
233232
"{rela_path}: the order of everything matches perfectly"
234233
);
235234
}
@@ -241,7 +240,7 @@ fn given_attributes_are_made_available_in_given_order() -> crate::Result {
241240
Ok(())
242241
}
243242

244-
fn by_name(assignments: Vec<AssignmentRef<'_>>) -> BTreeMap<NameRef<'_>, StateRef<'_>> {
243+
fn by_name(assignments: Vec<AssignmentRef>) -> BTreeMap<NameRef, StateRef> {
245244
assignments.into_iter().map(|a| (a.name, a.state)).collect()
246245
}
247246

0 commit comments

Comments
 (0)