@@ -7,6 +7,7 @@ use crate::{
7
7
prefilter:: Prefilter ,
8
8
primitives:: { PatternID , StateID } ,
9
9
search:: { Anchored , HalfMatch , Input , MatchError } ,
10
+ start,
10
11
} ,
11
12
} ;
12
13
@@ -226,21 +227,50 @@ pub unsafe trait Automaton {
226
227
/// ```
227
228
fn next_eoi_state ( & self , current : StateID ) -> StateID ;
228
229
229
- /// Return the ID of the start state for this lazy DFA when executing a
230
- /// forward search .
230
+ /// Return the ID of the start state for this DFA for the given starting
231
+ /// configuration .
231
232
///
232
233
/// Unlike typical DFA implementations, the start state for DFAs in this
233
234
/// crate is dependent on a few different factors:
234
235
///
235
236
/// * The [`Anchored`] mode of the search. Unanchored, anchored and
236
237
/// anchored searches for a specific [`PatternID`] all use different start
237
238
/// states.
238
- /// * The position at which the search begins, via [`Input::start`]. This
239
- /// and the byte immediately preceding the start of the search (if one
240
- /// exists) influence which look-behind assertions are true at the start
241
- /// of the search. This in turn influences which start state is selected.
242
- /// * Whether the search is a forward or reverse search. This routine can
243
- /// only be used for forward searches.
239
+ /// * Whether a "look-behind" byte exists. For example, the `^` anchor
240
+ /// matches if and only if there is no look-behind byte.
241
+ /// * The specific value of that look-behind byte. For example, a `(?m:^)`
242
+ /// assertion only matches when there is either no look-behind byte, or
243
+ /// when the look-behind byte is a line terminator.
244
+ ///
245
+ /// The [starting configuration](start::Config) provides the above
246
+ /// information.
247
+ ///
248
+ /// This routine can be used for either forward or reverse searches.
249
+ /// Although, as a convenience, if you have an [`Input`], then it may
250
+ /// be more succinct to use [`Automaton::start_state_forward`] or
251
+ /// [`Automaton::start_state_reverse`]. Note, for example, that the
252
+ /// convenience routines return a [`MatchError`] on failure where as this
253
+ /// routine returns a [`StartError`].
254
+ ///
255
+ /// # Errors
256
+ ///
257
+ /// This may return a [`StartError`] if the search needs to give up when
258
+ /// determining the start state (for example, if it sees a "quit" byte).
259
+ /// This can also return an error if the given configuration contains an
260
+ /// unsupported [`Anchored`] configuration.
261
+ fn start_state (
262
+ & self ,
263
+ config : & start:: Config ,
264
+ ) -> Result < StateID , StartError > ;
265
+
266
+ /// Return the ID of the start state for this DFA when executing a forward
267
+ /// search.
268
+ ///
269
+ /// This is a convenience routine for calling [`Automaton::start_state`]
270
+ /// that converts the given [`Input`] to a [start
271
+ /// configuration](start::Config). Additionally, if an error occurs, it is
272
+ /// converted from a [`StartError`] to a [`MatchError`] using the offset
273
+ /// information in the given [`Input`].
244
274
///
245
275
/// # Errors
246
276
///
@@ -251,23 +281,30 @@ pub unsafe trait Automaton {
251
281
fn start_state_forward (
252
282
& self ,
253
283
input : & Input < ' _ > ,
254
- ) -> Result < StateID , MatchError > ;
284
+ ) -> Result < StateID , MatchError > {
285
+ let config = start:: Config :: from_input_forward ( input) ;
286
+ self . start_state ( & config) . map_err ( |err| match err {
287
+ StartError :: Quit { byte } => {
288
+ let offset = input
289
+ . start ( )
290
+ . checked_sub ( 1 )
291
+ . expect ( "no quit in start without look-behind" ) ;
292
+ MatchError :: quit ( byte, offset)
293
+ }
294
+ StartError :: UnsupportedAnchored { mode } => {
295
+ MatchError :: unsupported_anchored ( mode)
296
+ }
297
+ } )
298
+ }
255
299
256
- /// Return the ID of the start state for this lazy DFA when executing a
257
- /// reverse search.
300
+ /// Return the ID of the start state for this DFA when executing a reverse
301
+ /// search.
258
302
///
259
- /// Unlike typical DFA implementations, the start state for DFAs in this
260
- /// crate is dependent on a few different factors:
261
- ///
262
- /// * The [`Anchored`] mode of the search. Unanchored, anchored and
263
- /// anchored searches for a specific [`PatternID`] all use different start
264
- /// states.
265
- /// * The position at which the search begins, via [`Input::start`]. This
266
- /// and the byte immediately preceding the start of the search (if one
267
- /// exists) influence which look-behind assertions are true at the start
268
- /// of the search. This in turn influences which start state is selected.
269
- /// * Whether the search is a forward or reverse search. This routine can
270
- /// only be used for reverse searches.
303
+ /// This is a convenience routine for calling [`Automaton::start_state`]
304
+ /// that converts the given [`Input`] to a [start
305
+ /// configuration](start::Config). Additionally, if an error occurs, it is
306
+ /// converted from a [`StartError`] to a [`MatchError`] using the offset
307
+ /// information in the given [`Input`].
271
308
///
272
309
/// # Errors
273
310
///
@@ -278,7 +315,18 @@ pub unsafe trait Automaton {
278
315
fn start_state_reverse (
279
316
& self ,
280
317
input : & Input < ' _ > ,
281
- ) -> Result < StateID , MatchError > ;
318
+ ) -> Result < StateID , MatchError > {
319
+ let config = start:: Config :: from_input_reverse ( input) ;
320
+ self . start_state ( & config) . map_err ( |err| match err {
321
+ StartError :: Quit { byte } => {
322
+ let offset = input. end ( ) ;
323
+ MatchError :: quit ( byte, offset)
324
+ }
325
+ StartError :: UnsupportedAnchored { mode } => {
326
+ MatchError :: unsupported_anchored ( mode)
327
+ }
328
+ } )
329
+ }
282
330
283
331
/// If this DFA has a universal starting state for the given anchor mode
284
332
/// and the DFA supports universal starting states, then this returns that
@@ -1798,6 +1846,14 @@ unsafe impl<'a, A: Automaton + ?Sized> Automaton for &'a A {
1798
1846
( * * self ) . next_eoi_state ( current)
1799
1847
}
1800
1848
1849
+ #[ inline]
1850
+ fn start_state (
1851
+ & self ,
1852
+ config : & start:: Config ,
1853
+ ) -> Result < StateID , StartError > {
1854
+ ( * * self ) . start_state ( config)
1855
+ }
1856
+
1801
1857
#[ inline]
1802
1858
fn start_state_forward (
1803
1859
& self ,
@@ -2015,6 +2071,90 @@ impl OverlappingState {
2015
2071
}
2016
2072
}
2017
2073
2074
+ /// An error that can occur when computing the start state for a search.
2075
+ ///
2076
+ /// Computing a start state can fail for a few reasons, either based on
2077
+ /// incorrect configuration or even based on whether the look-behind byte
2078
+ /// triggers a quit state. Typically one does not need to handle this error
2079
+ /// if you're using [`Automaton::start_state_forward`] (or its reverse
2080
+ /// counterpart), as that routine automatically converts `StartError` to a
2081
+ /// [`MatchError`] for you.
2082
+ ///
2083
+ /// This error may be returned by the [`Automaton::start_state`] routine.
2084
+ ///
2085
+ /// This error implements the `std::error::Error` trait when the `std` feature
2086
+ /// is enabled.
2087
+ ///
2088
+ /// This error is marked as non-exhaustive. New variants may be added in a
2089
+ /// semver compatible release.
2090
+ #[ non_exhaustive]
2091
+ #[ derive( Clone , Debug ) ]
2092
+ pub enum StartError {
2093
+ /// An error that occurs when a starting configuration's look-behind byte
2094
+ /// is in this DFA's quit set.
2095
+ Quit {
2096
+ /// The quit byte that was found.
2097
+ byte : u8 ,
2098
+ } ,
2099
+ /// An error that occurs when the caller requests an anchored mode that
2100
+ /// isn't supported by the DFA.
2101
+ UnsupportedAnchored {
2102
+ /// The anchored mode given that is unsupported.
2103
+ mode : Anchored ,
2104
+ } ,
2105
+ }
2106
+
2107
+ impl StartError {
2108
+ pub ( crate ) fn quit ( byte : u8 ) -> StartError {
2109
+ StartError :: Quit { byte }
2110
+ }
2111
+
2112
+ pub ( crate ) fn unsupported_anchored ( mode : Anchored ) -> StartError {
2113
+ StartError :: UnsupportedAnchored { mode }
2114
+ }
2115
+ }
2116
+
2117
+ #[ cfg( feature = "std" ) ]
2118
+ impl std:: error:: Error for StartError { }
2119
+
2120
+ impl core:: fmt:: Display for StartError {
2121
+ fn fmt ( & self , f : & mut core:: fmt:: Formatter < ' _ > ) -> core:: fmt:: Result {
2122
+ match * self {
2123
+ StartError :: Quit { byte } => write ! (
2124
+ f,
2125
+ "error computing start state because the look-behind byte \
2126
+ {:?} triggered a quit state",
2127
+ crate :: util:: escape:: DebugByte ( byte) ,
2128
+ ) ,
2129
+ StartError :: UnsupportedAnchored { mode : Anchored :: Yes } => {
2130
+ write ! (
2131
+ f,
2132
+ "error computing start state because \
2133
+ anchored searches are not supported or enabled"
2134
+ )
2135
+ }
2136
+ StartError :: UnsupportedAnchored { mode : Anchored :: No } => {
2137
+ write ! (
2138
+ f,
2139
+ "error computing start state because \
2140
+ unanchored searches are not supported or enabled"
2141
+ )
2142
+ }
2143
+ StartError :: UnsupportedAnchored {
2144
+ mode : Anchored :: Pattern ( pid) ,
2145
+ } => {
2146
+ write ! (
2147
+ f,
2148
+ "error computing start state because \
2149
+ anchored searches for a specific pattern ({}) \
2150
+ are not supported or enabled",
2151
+ pid. as_usize( ) ,
2152
+ )
2153
+ }
2154
+ }
2155
+ }
2156
+ }
2157
+
2018
2158
/// Runs the given overlapping `search` function (forwards or backwards) until
2019
2159
/// a match is found whose offset does not split a codepoint.
2020
2160
///
0 commit comments