Skip to content

Commit 7c70cd0

Browse files
committed
Imply UTF8 validity in explode function
1 parent b39ff33 commit 7c70cd0

File tree

2 files changed

+60
-2
lines changed

2 files changed

+60
-2
lines changed

ext/standard/string.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,8 @@ PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return
822822
const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
823823
const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
824824
zval tmp;
825+
zend_string *tmp2;
826+
uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(delim, str);
825827

826828
if (p2 == NULL) {
827829
ZVAL_STR_COPY(&tmp, str);
@@ -831,15 +833,19 @@ PHPAPI void php_explode(const zend_string *delim, zend_string *str, zval *return
831833
ZEND_HASH_FILL_PACKED(Z_ARRVAL_P(return_value)) {
832834
do {
833835
ZEND_HASH_FILL_GROW();
834-
ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, p2 - p1));
836+
tmp2 = zend_string_init_fast(p1, p2 - p1);
837+
GC_ADD_FLAGS(tmp2, flags);
838+
ZEND_HASH_FILL_SET_STR(tmp2);
835839
ZEND_HASH_FILL_NEXT();
836840
p1 = p2 + ZSTR_LEN(delim);
837841
p2 = php_memnstr(p1, ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
838842
} while (p2 != NULL && --limit > 1);
839843

840844
if (p1 <= endp) {
841845
ZEND_HASH_FILL_GROW();
842-
ZEND_HASH_FILL_SET_STR(zend_string_init_fast(p1, endp - p1));
846+
tmp2 = zend_string_init_fast(p1, endp - p1);
847+
GC_ADD_FLAGS(tmp2, flags);
848+
ZEND_HASH_FILL_SET_STR(tmp2);
843849
ZEND_HASH_FILL_NEXT();
844850
}
845851
} ZEND_HASH_FILL_END();
@@ -855,6 +861,7 @@ PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *st
855861
const char *endp = ZSTR_VAL(str) + ZSTR_LEN(str);
856862
const char *p2 = php_memnstr(ZSTR_VAL(str), ZSTR_VAL(delim), ZSTR_LEN(delim), endp);
857863
zval tmp;
864+
uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(delim, str);
858865

859866
if (p2 == NULL) {
860867
/*
@@ -880,6 +887,7 @@ PHPAPI void php_explode_negative_limit(const zend_string *delim, zend_string *st
880887
/* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
881888
for (i = 0; i < to_return; i++) { /* this checks also for to_return > 0 */
882889
ZVAL_STRINGL(&tmp, positions[i], (positions[i+1] - ZSTR_LEN(delim)) - positions[i]);
890+
GC_ADD_FLAGS(Z_STR(tmp), flags);
883891
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
884892
}
885893
efree((void *)positions);

ext/zend_test/tests/strings_marked_as_utf8.phpt

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,34 @@ var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat));
142142
$string_concat = implode('', [1, 1.0, 'a']);
143143
var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat));
144144

145+
echo "explode:\n";
146+
$string = 'můj žlutý kůň';
147+
$firstByte = substr('ů', 0, 1); // byte present in $string, but itself it is not valid UTF-8
148+
$dumpUtf8ValidityArrFx = function (array $strings): void {
149+
echo (implode(' ', array_map(fn ($v) => zend_test_is_string_marked_as_valid_utf8($v) ? 'true' : 'false', $strings)) ?: 'empty') . "\n";
150+
};
151+
$dumpUtf8ValidityArrFx(explode("\xff", ''));
152+
$dumpUtf8ValidityArrFx(explode('ů', $string));
153+
$dumpUtf8ValidityArrFx(explode('ů', $string . "\xff"));
154+
$dumpUtf8ValidityArrFx(explode('ů', $string, 1));
155+
$dumpUtf8ValidityArrFx(explode('ů', $string . "\xff", 1));
156+
$dumpUtf8ValidityArrFx(explode($firstByte, $string));
157+
$dumpUtf8ValidityArrFx(explode($firstByte, $string . "\xff"));
158+
$dumpUtf8ValidityArrFx(explode("\xff", $string));
159+
$dumpUtf8ValidityArrFx(explode("\xff", $string . "\xff"));
160+
$dumpUtf8ValidityArrFx(explode('ů', $string, -1));
161+
$dumpUtf8ValidityArrFx(explode('ů', $string . "\xff", -1));
162+
$dumpUtf8ValidityArrFx(explode($firstByte, $string, -1));
163+
$dumpUtf8ValidityArrFx(explode($firstByte, $string . "\xff", -1));
164+
$dumpUtf8ValidityArrFx(explode("\xff", $string, -1));
165+
$dumpUtf8ValidityArrFx(explode("\xff", $string . "\xff", -1));
166+
$dumpUtf8ValidityArrFx(explode('ů', $string, -2));
167+
$dumpUtf8ValidityArrFx(explode('ů', $string . "\xff", -2));
168+
$dumpUtf8ValidityArrFx(explode($firstByte, $string, -2));
169+
$dumpUtf8ValidityArrFx(explode($firstByte, $string . "\xff", -2));
170+
$dumpUtf8ValidityArrFx(explode("\xff", $string, -2));
171+
$dumpUtf8ValidityArrFx(explode("\xff", $string . "\xff", -2));
172+
145173
?>
146174
--EXPECT--
147175
Empty strings:
@@ -199,3 +227,25 @@ bool(true)
199227
bool(true)
200228
bool(true)
201229
bool(true)
230+
explode:
231+
false
232+
true true true
233+
false false false
234+
true
235+
false
236+
false false false false false
237+
false false false false false
238+
false
239+
false false
240+
true true
241+
false false
242+
false false false false
243+
false false false false
244+
empty
245+
false
246+
true
247+
false
248+
false false false
249+
false false false
250+
empty
251+
empty

0 commit comments

Comments
 (0)