Skip to content

Commit cdfd72c

Browse files
committed
PHP 8.0: Make stream wrapper and windows drive checks locale-independent
- Avoid registering/detecting stream wrappers in locale-independent ways. - Avoid locale dependence for Windows drive letter names in zend_virtual_cwd - Make parse_url stop depending on locale Related to https://bugs.php.net/bug.php?id=52923 iscntrl is locale-dependent which seems to corrupt certain bytes. - Make FILTER_VALIDATE_HOSTNAME with flag FILTER_VALIDATE_DOMAIN locale-independent Somewhat related to https://wiki.php.net/rfc/strtolower-ascii but I don't think most of these should have been locale-dependent in the first place - the code may not have considered locales E.g. on Linux, `setlocale(LC_ALL, 'de_DE');` (if the locale is installed and it succeeds) will have some values for alpha/cntrl in the range 128-256 where the C locale has no values. To avoid this locale-dependence in older php versions, applications can set `setlocale(LC_CTYPE, 'C')`.
1 parent 096a01c commit cdfd72c

File tree

10 files changed

+105
-12
lines changed

10 files changed

+105
-12
lines changed

Zend/zend_compile.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1941,7 +1941,7 @@ ZEND_API size_t zend_dirname(char *path, size_t len)
19411941
/* Note that on Win32 CWD is per drive (heritage from CP/M).
19421942
* This means dirname("c:foo") maps to "c:." or "c:" - which means CWD on C: drive.
19431943
*/
1944-
if ((2 <= len) && isalpha((int)((unsigned char *)path)[0]) && (':' == path[1])) {
1944+
if ((2 <= len) && zend_isalpha_ascii((int)((unsigned char *)path)[0]) && (':' == path[1])) {
19451945
/* Skip over the drive spec (if any) so as not to change */
19461946
path += 2;
19471947
len_adjust += 2;

Zend/zend_operators.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,47 @@ static const unsigned char tolower_map[256] = {
6666

6767
#define zend_tolower_ascii(c) (tolower_map[(unsigned char)(c)])
6868

69+
/* ctype's isalpha varies based on locale, which is not what we want for many use cases.
70+
* This is what it'd be in the "C" locale. */
71+
ZEND_API const bool zend_isalpha_map[256] = {
72+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
73+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
74+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
75+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
76+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
77+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
78+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
79+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
80+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
81+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
82+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
83+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
84+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
85+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
86+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
87+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
88+
};
89+
90+
/* ctype's isalnum is isalpha + isdigit(0-9) */
91+
ZEND_API const bool zend_isalnum_map[256] = {
92+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
93+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
94+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
95+
1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
96+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
97+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
98+
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
99+
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
100+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
101+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
102+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
103+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
104+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
105+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
106+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
107+
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
108+
};
109+
69110
/**
70111
* Functions using locale lowercase:
71112
zend_binary_strncasecmp_l

Zend/zend_operators.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,13 @@ ZEND_API int ZEND_FASTCALL string_compare_function(zval *op1, zval *op2);
414414
ZEND_API int ZEND_FASTCALL string_case_compare_function(zval *op1, zval *op2);
415415
ZEND_API int ZEND_FASTCALL string_locale_compare_function(zval *op1, zval *op2);
416416

417+
/* NOTE: The locale-independent alternatives to ctype(isalpha/isalnum) were added to fix bugs in php 7.3 patch releases, and should not be used externally until php 8.2 */
418+
ZEND_API extern const bool zend_isalpha_map[256];
419+
ZEND_API extern const bool zend_isalnum_map[256];
420+
421+
#define zend_isalpha_ascii(c) (zend_isalpha_map[(unsigned char)(c)])
422+
#define zend_isalnum_ascii(c) (zend_isalnum_map[(unsigned char)(c)])
423+
417424
ZEND_API void ZEND_FASTCALL zend_str_tolower(char *str, size_t length);
418425
ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *source, size_t length);
419426
ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup(const char *source, size_t length);

Zend/zend_virtual_cwd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ typedef unsigned short mode_t;
8282
#define IS_UNC_PATH(path, len) \
8383
(len >= 2 && IS_SLASH(path[0]) && IS_SLASH(path[1]))
8484
#define IS_ABSOLUTE_PATH(path, len) \
85-
(len >= 2 && (/* is local */isalpha(path[0]) && path[1] == ':' || /* is UNC */IS_SLASH(path[0]) && IS_SLASH(path[1])))
85+
(len >= 2 && (/* is local */zend_isalpha_ascii(path[0]) && path[1] == ':' || /* is UNC */IS_SLASH(path[0]) && IS_SLASH(path[1])))
8686

8787
#else
8888
#ifdef HAVE_DIRENT_H

ext/filter/logical_filters.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -520,21 +520,21 @@ static int _php_filter_validate_domain(char * domain, int len, zend_long flags)
520520
}
521521

522522
/* First char must be alphanumeric */
523-
if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
523+
if(*s == '.' || (hostname && !zend_isalnum_ascii((int)*(unsigned char *)s))) {
524524
return 0;
525525
}
526526

527527
while (s < e) {
528528
if (*s == '.') {
529529
/* The first and the last character of a label must be alphanumeric */
530-
if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
530+
if (*(s + 1) == '.' || (hostname && (!zend_isalnum_ascii((int)*(unsigned char *)(s - 1)) || !zend_isalnum_ascii((int)*(unsigned char *)(s + 1))))) {
531531
return 0;
532532
}
533533

534534
/* Reset label length counter */
535535
i = 1;
536536
} else {
537-
if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
537+
if (i > 63 || (hostname && *s != '-' && !zend_isalnum_ascii((int)*(unsigned char *)s))) {
538538
return 0;
539539
}
540540

@@ -561,7 +561,7 @@ static int is_userinfo_valid(zend_string *str)
561561
const char *valid = "-._~!$&'()*+,;=:";
562562
const char *p = ZSTR_VAL(str);
563563
while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
564-
if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
564+
if (zend_isalnum_ascii(*p) || strchr(valid, *p)) {
565565
p++;
566566
} else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
567567
p += 3;
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
--TEST--
2+
FILTER_VALIDATE_DOMAIN FILTER_FLAG_HOSTNAME should not be locale dependent
3+
--EXTENSIONS--
4+
filter
5+
--SKIPIF--
6+
<?php // try to activate a single-byte german locale
7+
if (!setlocale(LC_ALL, "de_DE")) {
8+
print "skip Can't find german locale";
9+
}
10+
?>
11+
--FILE--
12+
<?php
13+
var_dump(filter_var('٪', FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME));
14+
setlocale(LC_ALL, "de_DE");
15+
var_dump(filter_var('٪', FILTER_VALIDATE_DOMAIN, FILTER_FLAG_HOSTNAME));
16+
?>
17+
--EXPECT--
18+
bool(false)
19+
bool(false)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
--TEST--
2+
Stream wrappers should not be locale dependent
3+
--SKIPIF--
4+
<?php // try to activate a single-byte german locale
5+
if (!setlocale(LC_ALL, "de_DE")) {
6+
print "skip Can't find german locale";
7+
}
8+
?>
9+
--INI--
10+
allow_url_fopen=1
11+
display_errors=stderr
12+
--FILE--
13+
<?php
14+
setlocale(LC_ALL, "de_DE");
15+
class testwrapper {
16+
}
17+
18+
var_dump(stream_wrapper_register("test٪", 'testwrapper', STREAM_IS_URL));
19+
20+
echo 'stream_open: ';
21+
fopen("test٪://test", 'r');
22+
?>
23+
--EXPECTF--
24+
Warning: stream_wrapper_register(): Invalid protocol scheme specified. Unable to register wrapper class testwrapper to test٪:// in %s on line 6
25+
bool(false)
26+
stream_open: Warning: fopen(test٪://test): Failed to open stream: No such file or directory in %s on line 9

main/fopen_wrappers.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ PHPAPI zend_string *php_resolve_path(const char *filename, size_t filename_lengt
481481
}
482482

483483
/* Don't resolve paths which contain protocol (except of file://) */
484-
for (p = filename; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
484+
for (p = filename; zend_isalnum_ascii((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
485485
if ((*p == ':') && (p - filename > 1) && (p[1] == '/') && (p[2] == '/')) {
486486
wrapper = php_stream_locate_url_wrapper(filename, &actual_path, STREAM_OPEN_FOR_INCLUDE);
487487
if (wrapper == &php_plain_files_wrapper) {
@@ -517,7 +517,7 @@ PHPAPI zend_string *php_resolve_path(const char *filename, size_t filename_lengt
517517
/* Check for stream wrapper */
518518
int is_stream_wrapper = 0;
519519

520-
for (p = ptr; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
520+
for (p = ptr; zend_isalnum_ascii((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
521521
if ((*p == ':') && (p - ptr > 1) && (p[1] == '/') && (p[2] == '/')) {
522522
/* .:// or ..:// is not a stream wrapper */
523523
if (p[-1] != '.' || p[-2] != '.' || p - 2 != ptr) {
@@ -586,7 +586,7 @@ PHPAPI zend_string *php_resolve_path(const char *filename, size_t filename_lengt
586586
actual_path = trypath;
587587

588588
/* Check for stream wrapper */
589-
for (p = trypath; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
589+
for (p = trypath; zend_isalnum_ascii((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++);
590590
if ((*p == ':') && (p - trypath > 1) && (p[1] == '/') && (p[2] == '/')) {
591591
wrapper = php_stream_locate_url_wrapper(trypath, &actual_path, STREAM_OPEN_FOR_INCLUDE);
592592
if (!wrapper) {

main/streams/streams.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,7 +1724,7 @@ static inline int php_stream_wrapper_scheme_validate(const char *protocol, unsig
17241724
unsigned int i;
17251725

17261726
for(i = 0; i < protocol_len; i++) {
1727-
if (!isalnum((int)protocol[i]) &&
1727+
if (!zend_isalnum_ascii((int)protocol[i]) &&
17281728
protocol[i] != '+' &&
17291729
protocol[i] != '-' &&
17301730
protocol[i] != '.') {
@@ -1804,7 +1804,7 @@ PHPAPI php_stream_wrapper *php_stream_locate_url_wrapper(const char *path, const
18041804
return (php_stream_wrapper*)((options & STREAM_LOCATE_WRAPPERS_ONLY) ? NULL : &php_plain_files_wrapper);
18051805
}
18061806

1807-
for (p = path; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++) {
1807+
for (p = path; zend_isalnum_ascii((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++) {
18081808
n++;
18091809
}
18101810

main/streams/transports.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ PHPAPI php_stream *_php_stream_xport_create(const char *name, size_t namelen, in
9393
}
9494
}
9595

96-
for (p = name; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++) {
96+
for (p = name; zend_isalnum_ascii((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++) {
9797
n++;
9898
}
9999

0 commit comments

Comments
 (0)