php · iluuu1994 · Dec 29, 2021 · Jan 27, 2022 · TysonAndre · Jan 7, 2022
diff --git a/NEWS b/NEWS
@@ -20,6 +20,8 @@ PHP                                                                        NEWS
   . net_get_interfaces() also reports wireless network interfaces on Windows.
     (Yurun)
   . Finished AVIF support in getimagesize(). (Yannis Guyon)
+  . Fixed bug GH-7847 (stripos with large haystack has bad performance).
+    (ilutov)
 
 - Zip:
   . add ZipArchive::clearError() method

diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS
@@ -17,6 +17,9 @@ PHP 8.2 INTERNALS UPGRADE NOTES
   zend_binary_str(n)casecmp() as one would expect. Call the appropriate
   wrapped function directly instead.
 * Removed the (ZEND_)WRONG_PARAM_COUNT_WITH_RETVAL() macros.
+* php_stristr() no longer lowercases the haystack and needle as a side effect.
+  Call zend_str_tolower() yourself if necessary. You no longer need to copy
+  the haystack and needle before passing them to php_stristr().
 
 ========================
 2. Build system changes

diff --git a/Zend/zend_operators.h b/Zend/zend_operators.h
@@ -919,6 +919,67 @@ static zend_always_inline void zend_unwrap_reference(zval *op) /* {{{ */
 }
 /* }}} */
 
+static zend_always_inline bool zend_strnieq(const char *ptr1, const char *ptr2, size_t num)
+{
+	const char *end = ptr1 + num;
+	while (ptr1 < end) {
+		if (zend_tolower_ascii(*ptr1++) != zend_tolower_ascii(*ptr2++)) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static zend_always_inline const char *
+zend_memnistr(const char *haystack, const char *needle, size_t needle_len, const char *end)
+{
+	ZEND_ASSERT(end >= haystack);
+
+	if (UNEXPECTED(needle_len > (end - haystack))) {
+		return NULL;
+	}
+
+	if (UNEXPECTED(needle_len == 0)) {
+		return haystack;
+	}
+
+	const char first_lower = zend_tolower_ascii(*needle);
+	const char first_upper = zend_toupper_ascii(*needle);
+	const char *p_lower = (const char *)memchr(haystack, first_lower, end - haystack);
+	const char *p_upper = NULL;
+	if (first_lower != first_upper) { 
+		// If the needle length is 1 we don't need to look beyond p_lower as it is a guaranteed match
+		size_t upper_search_length = end - (needle_len == 1 && p_lower != NULL ? p_lower : haystack);
+		p_upper = (const char *)memchr(haystack, first_upper, upper_search_length);
+	}
+	const char *p = !p_upper || (p_lower && p_lower < p_upper) ? p_lower : p_upper;
+
+	if (needle_len == 1) {
+		return p;
+	}
+
+	const char needle_end_lower = zend_tolower_ascii(needle[needle_len - 1]);
+	const char needle_end_upper = zend_toupper_ascii(needle[needle_len - 1]);
+	end -= needle_len;
+
+	while (p && p <= end) {
+		if (needle_end_lower == p[needle_len - 1] || needle_end_upper == p[needle_len - 1]) {
+			if (zend_strnieq(needle + 1, p + 1, needle_len - 2)) {
+				return p;
+			}
+		}
+		if (p_lower == p) {
+			p_lower = (const char *)memchr(p_lower + 1, first_lower, end - p_lower);
+		}
+		if (p_upper == p) {
+			p_upper = (const char *)memchr(p_upper + 1, first_upper, end - p_upper);
+		}
+		p = !p_upper || (p_lower && p_lower < p_upper) ? p_lower : p_upper;
+	}
+
+	return NULL;
+}
+
 
 END_EXTERN_C()
 

@@ -378,9 +378,9 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc)
 				const char buf[] = "Content-Type:";
 				if (Z_TYPE_P(header) == IS_STRING &&
 						!zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
-					char *needle = estrdup("charset=");
+					char needle[] = "charset=";
 					char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header));
-					char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1);
+					char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), strlen(needle));
 
 					if (encoding) {
 						char *end;
@@ -408,7 +408,6 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc)
 						}
 					}
 					efree(haystack);
-					efree(needle);
 					break; /* found content-type */
 				}
 			} ZEND_HASH_FOREACH_END();

diff --git a/ext/phar/phar.c b/ext/phar/phar.c
@@ -2531,7 +2531,6 @@ int phar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int conv
 {
 	char halt_stub[] = "__HALT_COMPILER();";
 	zend_string *newstub;
-	char *tmp;
 	phar_entry_info *entry, *newentry;
 	size_t halt_offset;
 	int restore_alias_len, global_flags = 0, closeoldfile;
@@ -2635,9 +2634,7 @@ int phar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int conv
 		} else {
 			free_user_stub = 0;
 		}
-		tmp = estrndup(user_stub, len);
-		if ((pos = php_stristr(tmp, halt_stub, len, sizeof(halt_stub) - 1)) == NULL) {
-			efree(tmp);
+		if ((pos = php_stristr(user_stub, halt_stub, len, sizeof(halt_stub) - 1)) == NULL) {
 			if (closeoldfile) {
 				php_stream_close(oldfile);
 			}
@@ -2650,8 +2647,7 @@ int phar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int conv
 			}
 			return EOF;
 		}
-		pos = user_stub + (pos - tmp);
-		efree(tmp);
+		pos = user_stub + (pos - user_stub);
 		len = pos - user_stub + 18;
 		if ((size_t)len != php_stream_write(newfile, user_stub, len)
 		||			  5 != php_stream_write(newfile, " ?>\r\n", 5)) {

diff --git a/ext/phar/tar.c b/ext/phar/tar.c
@@ -967,7 +967,7 @@ int phar_tar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int
 	int closeoldfile, free_user_stub;
 	size_t signature_length;
 	struct _phar_pass_tar_info pass;
-	char *buf, *signature, *tmp, sigbuf[8];
+	char *buf, *signature, sigbuf[8];
 	char halt_stub[] = "__HALT_COMPILER();";
 
 	entry.flags = PHAR_ENT_PERM_DEF_FILE;
@@ -1063,9 +1063,7 @@ int phar_tar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int
 			free_user_stub = 0;
 		}
 
-		tmp = estrndup(user_stub, len);
-		if ((pos = php_stristr(tmp, halt_stub, len, sizeof(halt_stub) - 1)) == NULL) {
-			efree(tmp);
+		if ((pos = php_stristr(user_stub, halt_stub, len, sizeof(halt_stub) - 1)) == NULL) {
 			if (error) {
 				spprintf(error, 0, "illegal stub for tar-based phar \"%s\"", phar->fname);
 			}
@@ -1074,8 +1072,7 @@ int phar_tar_flush(phar_archive_data *phar, char *user_stub, zend_long len, int
 			}
 			return EOF;
 		}
-		pos = user_stub + (pos - tmp);
-		efree(tmp);
+		pos = user_stub + (pos - user_stub);
 
 		len = pos - user_stub + 18;
 		entry.fp = php_stream_fopen_tmpfile();

diff --git a/ext/phar/zip.c b/ext/phar/zip.c
@@ -1202,7 +1202,6 @@ int phar_zip_flush(phar_archive_data *phar, char *user_stub, zend_long len, int
 	char *pos;
 	static const char newstub[] = "<?php // zip-based phar archive stub file\n__HALT_COMPILER();";
 	char halt_stub[] = "__HALT_COMPILER();";
-	char *tmp;
 
 	php_stream *stubfile, *oldfile;
 	int free_user_stub, closeoldfile = 0;
@@ -1305,9 +1304,7 @@ int phar_zip_flush(phar_archive_data *phar, char *user_stub, zend_long len, int
 			free_user_stub = 0;
 		}
 
-		tmp = estrndup(user_stub, len);
-		if ((pos = php_stristr(tmp, halt_stub, len, sizeof(halt_stub) - 1)) == NULL) {
-			efree(tmp);
+		if ((pos = php_stristr(user_stub, halt_stub, len, sizeof(halt_stub) - 1)) == NULL) {
 			if (error) {
 				spprintf(error, 0, "illegal stub for zip-based phar \"%s\"", phar->fname);
 			}
@@ -1316,8 +1313,7 @@ int phar_zip_flush(phar_archive_data *phar, char *user_stub, zend_long len, int
 			}
 			return EOF;
 		}
-		pos = user_stub + (pos - tmp);
-		efree(tmp);
+		pos = user_stub + (pos - user_stub);
 
 		len = pos - user_stub + 18;
 		entry.fp = php_stream_fopen_tmpfile();

@@ -1687,9 +1687,7 @@ PHP_FUNCTION(pathinfo)
    case insensitive strstr */
 PHPAPI char *php_stristr(char *s, char *t, size_t s_len, size_t t_len)
 {
-	zend_str_tolower(s, s_len);
-	zend_str_tolower(t, t_len);
-	return (char*)php_memnstr(s, t, t_len, s + s_len);
+	return (char*)php_memnistr(s, t, t_len, s + s_len);
 }
 /* }}} */
 
@@ -1735,8 +1733,6 @@ PHP_FUNCTION(stristr)
 	zend_string *haystack, *needle;
 	const char *found = NULL;
 	size_t  found_offset;
-	char *haystack_dup;
-	char *orig_needle;
 	bool part = 0;
 
 	ZEND_PARSE_PARAMETERS_START(2, 3)
@@ -1746,13 +1742,10 @@ PHP_FUNCTION(stristr)
 		Z_PARAM_BOOL(part)
 	ZEND_PARSE_PARAMETERS_END();
 
-	haystack_dup = estrndup(ZSTR_VAL(haystack), ZSTR_LEN(haystack));
-	orig_needle = estrndup(ZSTR_VAL(needle), ZSTR_LEN(needle));
-	found = php_stristr(haystack_dup, orig_needle, ZSTR_LEN(haystack), ZSTR_LEN(needle));
-	efree(orig_needle);
+	found = php_stristr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(haystack), ZSTR_LEN(needle));
 
 	if (found) {
-		found_offset = found - haystack_dup;
+		found_offset = found - ZSTR_VAL(haystack);
 		if (part) {
 			RETVAL_STRINGL(ZSTR_VAL(haystack), found_offset);
 		} else {
@@ -1761,8 +1754,6 @@ PHP_FUNCTION(stristr)
 	} else {
 		RETVAL_FALSE;
 	}
-
-	efree(haystack_dup);
 }
 /* }}} */
 
@@ -1890,7 +1881,6 @@ PHP_FUNCTION(stripos)
 	const char *found = NULL;
 	zend_string *haystack, *needle;
 	zend_long offset = 0;
-	zend_string *needle_dup = NULL, *haystack_dup;
 
 	ZEND_PARSE_PARAMETERS_START(2, 3)
 		Z_PARAM_STR(haystack)
@@ -1907,23 +1897,14 @@ PHP_FUNCTION(stripos)
 		RETURN_THROWS();
 	}
 
-	if (ZSTR_LEN(needle) > ZSTR_LEN(haystack)) {
-		RETURN_FALSE;
-	}
-
-	haystack_dup = zend_string_tolower(haystack);
-	needle_dup = zend_string_tolower(needle);
-	found = (char*)php_memnstr(ZSTR_VAL(haystack_dup) + offset,
-			ZSTR_VAL(needle_dup), ZSTR_LEN(needle_dup), ZSTR_VAL(haystack_dup) + ZSTR_LEN(haystack));
+	found = (char*)php_memnistr(ZSTR_VAL(haystack) + offset,
+			ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
 
 	if (found) {
-		RETVAL_LONG(found - ZSTR_VAL(haystack_dup));
+		RETVAL_LONG(found - ZSTR_VAL(haystack));
 	} else {
 		RETVAL_FALSE;
 	}
-
-	zend_string_release_ex(haystack_dup, 0);
-	zend_string_release_ex(needle_dup, 0);
 }
 /* }}} */
 

@@ -344,6 +344,7 @@ END_EXTERN_C()
 #define phpin zendin
 
 #define php_memnstr zend_memnstr
+#define php_memnistr zend_memnistr
 
 /* functions */
 BEGIN_EXTERN_C()