Skip to content

Commit 9fd7dce

Browse files
committed
[libc++][regex] Correctly adjust match prefix for zero-length matches.
1 parent dbc3e26 commit 9fd7dce

File tree

2 files changed

+67
-2
lines changed

2 files changed

+67
-2
lines changed

libcxx/include/regex

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4700,6 +4700,9 @@ private:
47004700

47014701
template <class, class>
47024702
friend class __lookahead;
4703+
4704+
template <class, class, class>
4705+
friend class regex_iterator;
47034706
};
47044707

47054708
template <class _BidirectionalIterator, class _Allocator>
@@ -5410,7 +5413,9 @@ template <class _BidirectionalIterator, class _CharT, class _Traits>
54105413
regex_iterator<_BidirectionalIterator, _CharT, _Traits>&
54115414
regex_iterator<_BidirectionalIterator, _CharT, _Traits>::operator++() {
54125415
__flags_ |= regex_constants::__no_update_pos;
5413-
_BidirectionalIterator __start = __match_[0].second;
5416+
_BidirectionalIterator __start = __match_[0].second;
5417+
_BidirectionalIterator __prefix_start = __start;
5418+
54145419
if (__match_[0].first == __match_[0].second) {
54155420
if (__start == __end_) {
54165421
__match_ = value_type();
@@ -5424,9 +5429,21 @@ regex_iterator<_BidirectionalIterator, _CharT, _Traits>::operator++() {
54245429
else
54255430
++__start;
54265431
}
5432+
54275433
__flags_ |= regex_constants::match_prev_avail;
5428-
if (!std::regex_search(__start, __end_, __match_, *__pregex_, __flags_))
5434+
if (!std::regex_search(__start, __end_, __match_, *__pregex_, __flags_)) {
54295435
__match_ = value_type();
5436+
5437+
} else {
5438+
// The Standard mandates that if `regex_search` returns true ([re.regiter.incr]), "`match.prefix().first` shall be
5439+
// equal to the previous value of `match[0].second`... It is unspecified how the implementation makes these
5440+
// adjustments." The adjustment is necessary if we incremented `__start` above (the branch that deals with
5441+
// zero-length matches).
5442+
auto& __prefix = __match_.__prefix_;
5443+
__prefix.first = __prefix_start;
5444+
__prefix.matched = __prefix.first != __prefix.second;
5445+
}
5446+
54305447
return *this;
54315448
}
54325449

libcxx/test/std/re/re.iter/re.regiter/re.regiter.incr/post.pass.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,5 +114,53 @@ int main(int, char**)
114114
assert(i == e);
115115
}
116116

117+
{
118+
// Check that we correctly adjust the match prefix when dealing with zero-length matches -- this is explicitly
119+
// required by the Standard ([re.regiter.incr]: "In all cases in which the call to `regex_search` returns true,
120+
// `match.prefix().first` shall be equal to the previous value of `match[0].second`"). For a pattern that matches
121+
// empty sequences, there is an implicit zero-length match between every character in a string -- make sure the
122+
// prefix of each of these matches (except the first one) is the preceding character.
123+
124+
auto validate = [](const std::regex& empty_matching_pattern) {
125+
const char source[] = "abc";
126+
127+
std::cregex_iterator i(source, source + 3, empty_matching_pattern);
128+
assert(!i->prefix().matched);
129+
assert(i->prefix().length() == 0);
130+
assert(i->prefix().first == source);
131+
assert(i->prefix().second == source);
132+
133+
++i;
134+
assert(i->prefix().matched);
135+
assert(i->prefix().length() == 1);
136+
assert(i->prefix().first == source);
137+
assert(i->prefix().second == source + 1);
138+
assert(i->prefix().str() == "a");
139+
140+
++i;
141+
assert(i->prefix().matched);
142+
assert(i->prefix().length() == 1);
143+
assert(i->prefix().first == source + 1);
144+
assert(i->prefix().second == source + 2);
145+
assert(i->prefix().str() == "b");
146+
147+
++i;
148+
assert(i->prefix().matched);
149+
assert(i->prefix().length() == 1);
150+
assert(i->prefix().first == source + 2);
151+
assert(i->prefix().second == source + 3);
152+
assert(i->prefix().str() == "c");
153+
154+
++i;
155+
assert(i == std::cregex_iterator());
156+
};
157+
158+
// An empty pattern produces zero-length matches.
159+
validate(std::regex(""));
160+
// Any character repeated zero or more times can produce zero-length matches.
161+
validate(std::regex("X*"));
162+
validate(std::regex("X{0,3}"));
163+
}
164+
117165
return 0;
118166
}

0 commit comments

Comments
 (0)