Skip to content

Commit 70bd296

Browse files
committed
Optimize stripos/stristr
Closes GH-7847 Closes GH-7852 Previously stripos/stristr would lowercase both the haystack and the needle to reuse strpos. The approach in this PR is similar to strpos. memchr is highly optimized so we're using it to search for the first character of the needle in the haystack. If we find it we compare the remaining characters of the needle manually. The new implementation seems to perform about half as well as strpos (as two memchr calls are necessary to find the next candidate).
1 parent b27d2ff commit 70bd296

File tree

5 files changed

+75
-22
lines changed

5 files changed

+75
-22
lines changed

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ PHP NEWS
2020
. net_get_interfaces() also reports wireless network interfaces on Windows.
2121
(Yurun)
2222
. Finished AVIF support in getimagesize(). (Yannis Guyon)
23+
. Fixed bug GH-7847 (stripos with large haystack has bad performance).
24+
(ilutov)
2325

2426
- Zip:
2527
. add ZipArchive::clearError() method

Zend/zend_operators.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,67 @@ static zend_always_inline void zend_unwrap_reference(zval *op) /* {{{ */
919919
}
920920
/* }}} */
921921

922+
static zend_always_inline bool zend_strnieq(const char *ptr1, const char *ptr2, size_t num)
923+
{
924+
const char *end = ptr1 + num;
925+
while (ptr1 < end) {
926+
if (zend_tolower_ascii(*ptr1++) != zend_tolower_ascii(*ptr2++)) {
927+
return 0;
928+
}
929+
}
930+
return 1;
931+
}
932+
933+
static zend_always_inline const char *
934+
zend_memnistr(const char *haystack, const char *needle, size_t needle_len, const char *end)
935+
{
936+
ZEND_ASSERT(end >= haystack);
937+
938+
if (UNEXPECTED(needle_len > (end - haystack))) {
939+
return NULL;
940+
}
941+
942+
if (UNEXPECTED(needle_len == 0)) {
943+
return haystack;
944+
}
945+
946+
const char first_lower = zend_tolower_ascii(*needle);
947+
const char first_upper = zend_toupper_ascii(*needle);
948+
const char *p_lower = (const char *)memchr(haystack, first_lower, end - haystack);
949+
const char *p_upper = NULL;
950+
if (first_lower != first_upper) {
951+
// If the needle length is 1 we don't need to look beyond p_lower as it is a guaranteed match
952+
size_t upper_search_length = end - (needle_len == 1 && p_lower != NULL ? p_lower : haystack);
953+
p_upper = (const char *)memchr(haystack, first_upper, upper_search_length);
954+
}
955+
const char *p = !p_upper || (p_lower && p_lower < p_upper) ? p_lower : p_upper;
956+
957+
if (needle_len == 1) {
958+
return p;
959+
}
960+
961+
const char needle_end_lower = zend_tolower_ascii(needle[needle_len - 1]);
962+
const char needle_end_upper = zend_toupper_ascii(needle[needle_len - 1]);
963+
end -= needle_len;
964+
965+
while (p && p <= end) {
966+
if (needle_end_lower == p[needle_len - 1] || needle_end_upper == p[needle_len - 1]) {
967+
if (zend_strnieq(needle + 1, p + 1, needle_len - 2)) {
968+
return p;
969+
}
970+
}
971+
if (p_lower == p) {
972+
p_lower = (const char *)memchr(p_lower + 1, first_lower, end - p_lower);
973+
}
974+
if (p_upper == p) {
975+
p_upper = (const char *)memchr(p_upper + 1, first_upper, end - p_upper);
976+
}
977+
p = !p_upper || (p_lower && p_lower < p_upper) ? p_lower : p_upper;
978+
}
979+
980+
return NULL;
981+
}
982+
922983

923984
END_EXTERN_C()
924985

ext/standard/php_string.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ PHPAPI void php_stripcslashes(zend_string *str);
4848
PHPAPI zend_string *php_basename(const char *s, size_t len, const char *suffix, size_t sufflen);
4949
PHPAPI size_t php_dirname(char *str, size_t len);
5050
PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len);
51+
PHPAPI const char *php_stristr_no_tolower(const char *s, const char *t, size_t s_len, size_t t_len);
5152
PHPAPI zend_string *php_str_to_str(const char *haystack, size_t length, const char *needle,
5253
size_t needle_len, const char *str, size_t str_len);
5354
PHPAPI zend_string *php_trim(zend_string *str, const char *what, size_t what_len, int mode);

ext/standard/string.c

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,11 @@ PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len)
16931693
}
16941694
/* }}} */
16951695

1696+
PHPAPI const char *php_stristr_no_tolower(const char *s, const char *t, size_t s_len, size_t t_len)
1697+
{
1698+
return (const char*)php_memnistr(s, t, t_len, s + s_len);
1699+
}
1700+
16961701
/* {{{ php_strspn */
16971702
PHPAPI size_t php_strspn(const char *s1, const char *s2, const char *s1_end, const char *s2_end)
16981703
{
@@ -1735,8 +1740,6 @@ PHP_FUNCTION(stristr)
17351740
zend_string *haystack, *needle;
17361741
const char *found = NULL;
17371742
size_t found_offset;
1738-
char *haystack_dup;
1739-
char *orig_needle;
17401743
bool part = 0;
17411744

17421745
ZEND_PARSE_PARAMETERS_START(2, 3)
@@ -1746,13 +1749,10 @@ PHP_FUNCTION(stristr)
17461749
Z_PARAM_BOOL(part)
17471750
ZEND_PARSE_PARAMETERS_END();
17481751

1749-
haystack_dup = estrndup(ZSTR_VAL(haystack), ZSTR_LEN(haystack));
1750-
orig_needle = estrndup(ZSTR_VAL(needle), ZSTR_LEN(needle));
1751-
found = php_stristr(haystack_dup, orig_needle, ZSTR_LEN(haystack), ZSTR_LEN(needle));
1752-
efree(orig_needle);
1752+
found = php_stristr_no_tolower(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(haystack), ZSTR_LEN(needle));
17531753

17541754
if (found) {
1755-
found_offset = found - haystack_dup;
1755+
found_offset = found - ZSTR_VAL(haystack);
17561756
if (part) {
17571757
RETVAL_STRINGL(ZSTR_VAL(haystack), found_offset);
17581758
} else {
@@ -1761,8 +1761,6 @@ PHP_FUNCTION(stristr)
17611761
} else {
17621762
RETVAL_FALSE;
17631763
}
1764-
1765-
efree(haystack_dup);
17661764
}
17671765
/* }}} */
17681766

@@ -1890,7 +1888,6 @@ PHP_FUNCTION(stripos)
18901888
const char *found = NULL;
18911889
zend_string *haystack, *needle;
18921890
zend_long offset = 0;
1893-
zend_string *needle_dup = NULL, *haystack_dup;
18941891

18951892
ZEND_PARSE_PARAMETERS_START(2, 3)
18961893
Z_PARAM_STR(haystack)
@@ -1907,23 +1904,14 @@ PHP_FUNCTION(stripos)
19071904
RETURN_THROWS();
19081905
}
19091906

1910-
if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
1911-
RETURN_FALSE;
1912-
}
1913-
1914-
haystack_dup = zend_string_tolower(haystack);
1915-
needle_dup = zend_string_tolower(needle);
1916-
found = (char*)php_memnstr(ZSTR_VAL(haystack_dup) + offset,
1917-
ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack));
1907+
found = (char*)php_memnistr(ZSTR_VAL(haystack) + offset,
1908+
ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
19181909

19191910
if (found) {
1920-
RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
1911+
RETVAL_LONG(found - ZSTR_VAL(haystack));
19211912
} else {
19221913
RETVAL_FALSE;
19231914
}
1924-
1925-
zend_string_release_ex(haystack_dup, 0);
1926-
zend_string_release_ex(needle_dup, 0);
19271915
}
19281916
/* }}} */
19291917

main/php.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,7 @@ END_EXTERN_C()
344344
#define phpin zendin
345345

346346
#define php_memnstr zend_memnstr
347+
#define php_memnistr zend_memnistr
347348

348349
/* functions */
349350
BEGIN_EXTERN_C()

0 commit comments

Comments
 (0)