Skip to content

Commit 13b791c

Browse files
committed
Normalize substr() behavior
Make the behavior of substr(), mb_substr(), iconv_substr() and grapheme_substr() consistent when it comes to the handling of out of bounds offsets. substr() will now always clamp out of bounds offsets to the string boundary. Cases that previously returned false will now return an empty string. This means that substr() itself *always* returns a string now (like mb_substr() already did before.) Closes GH-6182.
1 parent 17a789e commit 13b791c

14 files changed

+188
-164
lines changed

ext/iconv/iconv.c

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -638,27 +638,22 @@ static php_iconv_err_t _php_iconv_substr(smart_str *pretval,
638638
return err;
639639
}
640640

641-
if (len < 0) {
642-
if ((len += (total_len - offset)) < 0) {
643-
return PHP_ICONV_ERR_SUCCESS;
644-
}
645-
}
646-
647641
if (offset < 0) {
648642
if ((offset += total_len) < 0) {
649-
return PHP_ICONV_ERR_SUCCESS;
643+
offset = 0;
650644
}
645+
} else if ((size_t)offset > total_len) {
646+
offset = total_len;
651647
}
652648

653-
if((size_t)len > total_len) {
649+
if (len < 0) {
650+
if ((len += (total_len - offset)) < 0) {
651+
len = 0;
652+
}
653+
} else if ((size_t)len > total_len) {
654654
len = total_len;
655655
}
656656

657-
658-
if ((size_t)offset > total_len) {
659-
return PHP_ICONV_ERR_SUCCESS;
660-
}
661-
662657
if ((size_t)(offset + len) > total_len ) {
663658
/* trying to compute the length */
664659
len = total_len - offset;

ext/iconv/tests/iconv_substr.phpt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ var_dump(iconv("ISO-2022-JP", "EUC-JP", iconv_substr(iconv("EUC-JP", "ISO-2022-J
4545
666768696a6b6c
4646
a6a4a8a4aaa4ab
4747
a4aba4ada4afa4b1a4b3a4b5a4b7
48-
bool(false)
49-
bool(false)
48+
string(0) ""
49+
string(0) ""
5050
string(14) "This is a test"
5151
string(14) "This is a test"
5252
string(3) "est"
@@ -55,8 +55,8 @@ string(3) "est"
5555
string(3) "est"
5656
string(5) "This "
5757
string(5) "This "
58-
bool(false)
59-
bool(false)
60-
bool(false)
61-
bool(false)
58+
string(0) ""
59+
string(0) ""
60+
string(0) ""
61+
string(0) ""
6262
string(10) "ちは ISO-2"
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
--TEST--
2+
iconv_substr() with out of bounds offset
3+
--SKIPIF--
4+
<?php extension_loaded('iconv') or die('skip iconv extension is not available'); ?>
5+
--FILE--
6+
<?php
7+
8+
var_dump(iconv_substr("foo", 3));
9+
var_dump(iconv_substr("foo", -3));
10+
var_dump(iconv_substr("foo", 4));
11+
var_dump(iconv_substr("foo", -4));
12+
var_dump(iconv_substr("äöü", 3));
13+
var_dump(iconv_substr("äöü", -3));
14+
var_dump(iconv_substr("äöü", 4));
15+
var_dump(iconv_substr("äöü", -4));
16+
var_dump(iconv_substr("foo", 0, 3));
17+
var_dump(iconv_substr("foo", 0, -3));
18+
var_dump(iconv_substr("foo", 0, 4));
19+
var_dump(iconv_substr("foo", 0, -4));
20+
var_dump(iconv_substr("äöü", 0, 3));
21+
var_dump(iconv_substr("äöü", 0, -3));
22+
var_dump(iconv_substr("äöü", 0, 4));
23+
var_dump(iconv_substr("äöü", 0, -4));
24+
var_dump(iconv_substr("äöü", -4, 1));
25+
var_dump(iconv_substr("äöü", -4, -1));
26+
var_dump(iconv_substr("äöü", 2, -2));
27+
28+
?>
29+
--EXPECT--
30+
string(0) ""
31+
string(3) "foo"
32+
string(0) ""
33+
string(3) "foo"
34+
string(0) ""
35+
string(6) "äöü"
36+
string(0) ""
37+
string(6) "äöü"
38+
string(3) "foo"
39+
string(0) ""
40+
string(3) "foo"
41+
string(0) ""
42+
string(6) "äöü"
43+
string(0) ""
44+
string(6) "äöü"
45+
string(0) ""
46+
string(2) "ä"
47+
string(4) "äö"
48+
string(0) ""

ext/intl/grapheme/grapheme_string.c

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -371,22 +371,20 @@ PHP_FUNCTION(grapheme_substr)
371371
RETURN_THROWS();
372372
}
373373

374-
if ( OUTSIDE_STRING(lstart, str_len)) {
375-
zend_argument_value_error(2, "must be contained in argument #1 ($string)");
374+
if (lstart < INT32_MIN || lstart > INT32_MAX) {
375+
zend_argument_value_error(2, "is too large");
376376
RETURN_THROWS();
377377
}
378378

379-
/* we checked that it will fit: */
380379
start = (int32_t) lstart;
381380

382-
if(no_length) {
381+
if (no_length) {
383382
length = str_len;
384383
}
385384

386-
if(length < INT32_MIN) {
387-
length = INT32_MIN;
388-
} else if(length > INT32_MAX) {
389-
length = INT32_MAX;
385+
if (length < INT32_MIN || length > INT32_MAX) {
386+
zend_argument_value_error(3, "is too large");
387+
RETURN_THROWS();
390388
}
391389

392390
/* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */
@@ -451,15 +449,17 @@ PHP_FUNCTION(grapheme_substr)
451449
start += iter_val;
452450
}
453451

454-
if ( 0 != start || sub_str_start_pos >= ustr_len ) {
455-
456-
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: start not contained in string", 1 );
457-
458-
if (ustr) {
459-
efree(ustr);
452+
if (0 != start) {
453+
if (start > 0) {
454+
if (ustr) {
455+
efree(ustr);
456+
}
457+
ubrk_close(bi);
458+
RETURN_EMPTY_STRING();
460459
}
461-
ubrk_close(bi);
462-
RETURN_FALSE;
460+
461+
sub_str_start_pos = 0;
462+
ubrk_first(bi);
463463
}
464464

465465
/* OK to convert here since if str_len were big, convert above would fail */
@@ -526,20 +526,17 @@ PHP_FUNCTION(grapheme_substr)
526526
ubrk_close(bi);
527527

528528
if ( UBRK_DONE == sub_str_end_pos) {
529-
if(length < 0) {
530-
zend_argument_value_error(3, "must be contained in argument #1 ($string)");
529+
if (length < 0) {
531530
efree(ustr);
532-
RETURN_THROWS();
531+
RETURN_EMPTY_STRING();
533532
} else {
534533
sub_str_end_pos = ustr_len;
535534
}
536535
}
537536

538-
if(sub_str_start_pos > sub_str_end_pos) {
539-
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_substr: length is beyond start", 1 );
540-
537+
if (sub_str_start_pos > sub_str_end_pos) {
541538
efree(ustr);
542-
RETURN_FALSE;
539+
RETURN_EMPTY_STRING();
543540
}
544541

545542
status = U_ZERO_ERROR;

ext/intl/grapheme/grapheme_util.c

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -57,20 +57,6 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
5757
return;
5858
}
5959

60-
if ((l < 0 && -l > str_len2)) {
61-
return;
62-
} else if (l > 0 && l > str_len2) {
63-
l = str_len2;
64-
}
65-
66-
if (f > str_len2 || (f < 0 && -f > str_len2)) {
67-
return;
68-
}
69-
70-
if (l < 0 && str_len2 < f - l) {
71-
return;
72-
}
73-
7460
/* if "from" position is negative, count start position from the end
7561
* of the string
7662
*/
@@ -79,8 +65,9 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
7965
if (f < 0) {
8066
f = 0;
8167
}
82-
}
83-
68+
} else if (f > str_len2) {
69+
f = str_len2;
70+
}
8471

8572
/* if "length" position is negative, set it to the length
8673
* needed to stop that many chars from the end of the string
@@ -90,20 +77,12 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
9077
if (l < 0) {
9178
l = 0;
9279
}
93-
}
94-
95-
if (f >= str_len2) {
96-
return;
97-
}
98-
99-
if ((f + l) > str_len2) {
100-
l = str_len - f;
101-
}
80+
} else if (l > str_len2 - f) {
81+
l = str_len2 - f;
82+
}
10283

10384
*sub_str = str + f;
10485
*sub_str_len = l;
105-
106-
return;
10786
}
10887
/* }}} */
10988

ext/intl/tests/bug62759.phpt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ var_dump(intl_get_error_message());
1414
var_dump(grapheme_substr('déjà', -1, 0));
1515
?>
1616
--EXPECT--
17-
bool(false)
1817
string(0) ""
19-
bool(false)
20-
string(61) "grapheme_substr: invalid parameters: U_ILLEGAL_ARGUMENT_ERROR"
2118
string(0) ""
22-
bool(false)
23-
string(65) "grapheme_substr: length is beyond start: U_ILLEGAL_ARGUMENT_ERROR"
19+
string(0) ""
20+
string(12) "U_ZERO_ERROR"
21+
string(0) ""
22+
string(0) ""
23+
string(12) "U_ZERO_ERROR"
2424
string(0) ""

0 commit comments

Comments
 (0)