Skip to content

Imply UTF8 validity in explode function #10805

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions ext/standard/string.c
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,8 @@ PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return
const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
zval tmp;
zend_string *tmp2;
uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(delim, str);

if (p2 == NULL) {
ZVAL_STR_COPY(&tmp, str);
Expand All @@ -831,15 +833,19 @@ PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return
ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
do {
ZEND_HASH_FILL_GROW();
ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, p2 - p1));
tmp2 = zend_string_init_fast(p1, p2 - p1);
GC_ADD_FLAGS(tmp2, flags);
ZEND_HASH_FILL_SET_STR(tmp2);
ZEND_HASH_FILL_NEXT();
p1 = p2 + ZSTR_LEN(delim);
p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
} while (p2 != NULL && --limit > 1);

if (p1 <= endp) {
ZEND_HASH_FILL_GROW();
ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, endp - p1));
tmp2 = zend_string_init_fast(p1, endp - p1);
GC_ADD_FLAGS(tmp2, flags);
ZEND_HASH_FILL_SET_STR(tmp2);
ZEND_HASH_FILL_NEXT();
}
} ZEND_HASH_FILL_END();
Expand All @@ -855,6 +861,7 @@ PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *st
const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
zval tmp;
uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(delim, str);

if (p2 == NULL) {
/*
Expand All @@ -880,6 +887,7 @@ PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *st
/* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
for (i = 0; i < to_return; i++) { /* this checks also for to_return > 0 */
ZVAL_STRINGL(&tmp, positions[i], (positions[i+1] - ZSTR_LEN(delim)) - positions[i]);
GC_ADD_FLAGS(Z_STR(tmp), flags);
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
}
efree((void *)positions);
Expand Down
50 changes: 50 additions & 0 deletions ext/zend_test/tests/strings_marked_as_utf8.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,34 @@ var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat));
$string_concat = implode('', [1, 1.0, 'a']);
var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat));

echo "explode:\n";
$string = 'můj žlutý kůň';
$firstByte = substr('ů', 0, 1); // byte present in $string, but itself it is not valid UTF-8
$dumpUtf8ValidityArrFx = function (array $strings): void {
echo (implode(' ', array_map(fn ($v) => zend_test_is_string_marked_as_valid_utf8($v) ? 'true' : 'false', $strings)) ?: 'empty') . "\n";
};
$dumpUtf8ValidityArrFx(explode("\xff", ''));
$dumpUtf8ValidityArrFx(explode('ů', $string));
$dumpUtf8ValidityArrFx(explode('ů', $string . "\xff"));
$dumpUtf8ValidityArrFx(explode('ů', $string, 1));
$dumpUtf8ValidityArrFx(explode('ů', $string . "\xff", 1));
$dumpUtf8ValidityArrFx(explode($firstByte, $string));
$dumpUtf8ValidityArrFx(explode($firstByte, $string . "\xff"));
$dumpUtf8ValidityArrFx(explode("\xff", $string));
$dumpUtf8ValidityArrFx(explode("\xff", $string . "\xff"));
$dumpUtf8ValidityArrFx(explode('ů', $string, -1));
$dumpUtf8ValidityArrFx(explode('ů', $string . "\xff", -1));
$dumpUtf8ValidityArrFx(explode($firstByte, $string, -1));
$dumpUtf8ValidityArrFx(explode($firstByte, $string . "\xff", -1));
$dumpUtf8ValidityArrFx(explode("\xff", $string, -1));
$dumpUtf8ValidityArrFx(explode("\xff", $string . "\xff", -1));
$dumpUtf8ValidityArrFx(explode('ů', $string, -2));
$dumpUtf8ValidityArrFx(explode('ů', $string . "\xff", -2));
$dumpUtf8ValidityArrFx(explode($firstByte, $string, -2));
$dumpUtf8ValidityArrFx(explode($firstByte, $string . "\xff", -2));
$dumpUtf8ValidityArrFx(explode("\xff", $string, -2));
$dumpUtf8ValidityArrFx(explode("\xff", $string . "\xff", -2));

?>
--EXPECT--
Empty strings:
Expand Down Expand Up @@ -199,3 +227,25 @@ bool(true)
bool(true)
bool(true)
bool(true)
explode:
true
true true true
true false false
true
false
true false false false false
true false false false false
false
false true
true true
false false
false false false false
false false false false
empty
false
true
false
false false false
false false false
empty
empty