php · alexdowad · Jan 20, 2023 · Jan 22, 2023 · youkidearitai · Jan 22, 2023
@@ -1534,7 +1534,7 @@ PHP_FUNCTION(mb_output_handler)
 
 	const mbfl_encoding *encoding = MBSTRG(current_http_output_encoding);
 	if (encoding == &mbfl_encoding_pass) {
-		RETURN_STR(zend_string_copy(str));
+		RETURN_STR_COPY(str);
 	}
 
 	if (arg_status & PHP_OUTPUT_HANDLER_START) {
@@ -1574,7 +1574,7 @@ PHP_FUNCTION(mb_output_handler)
 	}
 
 	if (!MBSTRG(outconv_enabled)) {
-		RETURN_STR(zend_string_copy(str));
+		RETURN_STR_COPY(str);
 	}
 
 	mb_convert_buf buf;
@@ -5066,12 +5066,10 @@ PHP_FUNCTION(mb_chr)
 /* {{{ */
 PHP_FUNCTION(mb_scrub)
 {
-	char* str;
-	size_t str_len;
-	zend_string *enc_name = NULL;
+	zend_string *str, *enc_name = NULL;
 
 	ZEND_PARSE_PARAMETERS_START(1, 2)
-		Z_PARAM_STRING(str, str_len)
+		Z_PARAM_STR(str)
 		Z_PARAM_OPTIONAL
 		Z_PARAM_STR_OR_NULL(enc_name)
 	ZEND_PARSE_PARAMETERS_END();
@@ -5081,7 +5079,12 @@ PHP_FUNCTION(mb_scrub)
 		RETURN_THROWS();
 	}
 
-	RETURN_STR(php_mb_convert_encoding_ex(str, str_len, enc, enc));
+	if (enc == &mbfl_encoding_utf8 && (GC_FLAGS(str) & IS_STR_VALID_UTF8)) {
+		/* A valid UTF-8 string will not be changed by mb_scrub; so just increment the refcount and return it */
+		RETURN_STR_COPY(str);
+	}
+
+	RETURN_STR(php_mb_convert_encoding_ex(ZSTR_VAL(str), ZSTR_LEN(str), enc, enc));
 }
 /* }}} */
 

@@ -8,7 +8,15 @@ var_dump(
     "?" === mb_scrub("\x80"),
     "?" === mb_scrub("\x80", 'UTF-8')
 );
+
+$utf8str = "abc 日本語 Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞";
+// Check $utf8str so it is marked as 'valid UTF-8'
+// This will enable optimized implementation of mb_scrub
+if (!mb_check_encoding($utf8str, 'UTF-8'))
+    die("Test string should be valid UTF-8");
+var_dump(mb_scrub($utf8str));
 ?>
 --EXPECT--
 bool(true)
 bool(true)
+string(122) "abc 日本語 Οὐχὶ ταὐτὰ παρίσταταί μοι γιγνώσκειν ⡍⠔⠙⠖ ⡊ ⠙⠕⠝⠰⠞"