Skip to content

Commit 21418b5

Browse files
committed
ext/intl: SpoofChecker::setAllowedChars support.
To limit the acceptable range of acceptable unicode chars via individual ones or via a pattern.
1 parent b537f01 commit 21418b5

File tree

6 files changed

+180
-16
lines changed

6 files changed

+180
-16
lines changed

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ PHP NEWS
2020
. /dev/poll events.mechanism for Solaris/Illumos setting had been retired.
2121
(David Carlier)
2222

23+
- Intl:
24+
. Added SpoofChecker::setAllowedChars to set unicode chars ranges.
25+
(David Carlier)
26+
2327
- Opcache:
2428
. Fixed bug GH-13775 (Memory leak possibly related to opcache SHM placement).
2529
(Arnaud)

UPGRADING

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,8 @@ PHP 8.4 UPGRADE NOTES
609609
Selectors.
610610
. Added IntlDateFormatter::parseToCalendar which behaves like
611611
IntlDateFormatter::parse except the time zone is updated.
612+
. Added SpoofChecker::setAllowedChars to limit the range of unicode
613+
chars.
612614

613615
- MBString:
614616
. Added mb_trim, mb_ltrim and mb_rtrim functions.

ext/intl/spoofchecker/spoofchecker.stub.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,17 @@ class Spoofchecker
4040
public const int HIDDEN_OVERLAY = UNKNOWN;
4141
#endif
4242

43+
/** @cvalue USET_IGNORE_SPACE */
44+
public const int IGNORE_SPACE = UNKNOWN;
45+
/** @cvalue USET_CASE_INSENSITIVE */
46+
public const int CASE_INSENSITIVE = UNKNOWN;
47+
/** @cvalue USET_ADD_CASE_MAPPINGS */
48+
public const int ADD_CASE_MAPPINGS = UNKNOWN;
49+
#if U_ICU_VERSION_MAJOR_NUM >= 73
50+
/** @cvalue USET_SIMPLE_CASE_INSENSITIVE */
51+
public const int SIMPLE_CASE_INSENSITIVE = UNKNOWN;
52+
#endif
53+
4354
public function __construct() {}
4455

4556
/**
@@ -64,4 +75,5 @@ public function setChecks(int $checks): void {}
6475
/** @tentative-return-type */
6576
public function setRestrictionLevel(int $level): void {}
6677
#endif
78+
public function setAllowedChars(string $pattern, int $patternOptions = 0): void {}
6779
}

ext/intl/spoofchecker/spoofchecker_arginfo.h

Lines changed: 34 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ext/intl/spoofchecker/spoofchecker_main.c

Lines changed: 84 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,29 @@
1717
#endif
1818

1919
#include "php_intl.h"
20+
#include "intl_convert.h"
2021
#include "spoofchecker_class.h"
2122

2223
/* {{{ Checks if a given text contains any suspicious characters */
2324
PHP_METHOD(Spoofchecker, isSuspicious)
2425
{
2526
int32_t ret, errmask;
26-
char *text;
27-
size_t text_len;
27+
zend_string *text;
2828
zval *error_code = NULL;
2929
SPOOFCHECKER_METHOD_INIT_VARS;
3030

3131
ZEND_PARSE_PARAMETERS_START(1, 2)
32-
Z_PARAM_STRING(text, text_len)
32+
Z_PARAM_STR(text)
3333
Z_PARAM_OPTIONAL
3434
Z_PARAM_ZVAL(error_code)
3535
ZEND_PARSE_PARAMETERS_END();
3636

3737
SPOOFCHECKER_METHOD_FETCH_OBJECT;
3838

3939
#if U_ICU_VERSION_MAJOR_NUM >= 58
40-
ret = uspoof_check2UTF8(co->uspoof, text, text_len, co->uspoofres, SPOOFCHECKER_ERROR_CODE_P(co));
40+
ret = uspoof_check2UTF8(co->uspoof, ZSTR_VAL(text), ZSTR_LEN(text), co->uspoofres, SPOOFCHECKER_ERROR_CODE_P(co));
4141
#else
42-
ret = uspoof_checkUTF8(co->uspoof, text, text_len, NULL, SPOOFCHECKER_ERROR_CODE_P(co));
42+
ret = uspoof_checkUTF8(co->uspoof, ZSTR_VAL(text), ZSTR_LEN(text), NULL, SPOOFCHECKER_ERROR_CODE_P(co));
4343
#endif
4444

4545
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
@@ -65,23 +65,22 @@ PHP_METHOD(Spoofchecker, isSuspicious)
6565
PHP_METHOD(Spoofchecker, areConfusable)
6666
{
6767
int ret;
68-
char *s1, *s2;
69-
size_t s1_len, s2_len;
68+
zend_string *s1, *s2;
7069
zval *error_code = NULL;
7170
SPOOFCHECKER_METHOD_INIT_VARS;
7271

7372
ZEND_PARSE_PARAMETERS_START(2, 3)
74-
Z_PARAM_STRING(s1, s1_len)
75-
Z_PARAM_STRING(s2, s2_len)
73+
Z_PARAM_STR(s1)
74+
Z_PARAM_STR(s2)
7675
Z_PARAM_OPTIONAL
7776
Z_PARAM_ZVAL(error_code)
7877
ZEND_PARSE_PARAMETERS_END();
7978

8079
SPOOFCHECKER_METHOD_FETCH_OBJECT;
81-
if(s1_len > INT32_MAX || s2_len > INT32_MAX) {
80+
if(ZSTR_LEN(s1) > INT32_MAX || ZSTR_LEN(s2) > INT32_MAX) {
8281
SPOOFCHECKER_ERROR_CODE(co) = U_BUFFER_OVERFLOW_ERROR;
8382
} else {
84-
ret = uspoof_areConfusableUTF8(co->uspoof, s1, (int32_t)s1_len, s2, (int32_t)s2_len, SPOOFCHECKER_ERROR_CODE_P(co));
83+
ret = uspoof_areConfusableUTF8(co->uspoof, ZSTR_VAL(s1), (int32_t)ZSTR_LEN(s1), ZSTR_VAL(s2), (int32_t)ZSTR_LEN(s2), SPOOFCHECKER_ERROR_CODE_P(co));
8584
}
8685
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
8786
php_error_docref(NULL, E_WARNING, "(%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co)));
@@ -98,17 +97,16 @@ PHP_METHOD(Spoofchecker, areConfusable)
9897
/* {{{ Locales to use when running checks */
9998
PHP_METHOD(Spoofchecker, setAllowedLocales)
10099
{
101-
char *locales;
102-
size_t locales_len;
100+
zend_string *locales;
103101
SPOOFCHECKER_METHOD_INIT_VARS;
104102

105103
ZEND_PARSE_PARAMETERS_START(1, 1)
106-
Z_PARAM_STRING(locales, locales_len)
104+
Z_PARAM_STR(locales)
107105
ZEND_PARSE_PARAMETERS_END();
108106

109107
SPOOFCHECKER_METHOD_FETCH_OBJECT;
110108

111-
uspoof_setAllowedLocales(co->uspoof, locales, SPOOFCHECKER_ERROR_CODE_P(co));
109+
uspoof_setAllowedLocales(co->uspoof, ZSTR_VAL(locales), SPOOFCHECKER_ERROR_CODE_P(co));
112110

113111
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
114112
php_error_docref(NULL, E_WARNING, "(%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co)));
@@ -167,3 +165,74 @@ PHP_METHOD(Spoofchecker, setRestrictionLevel)
167165
}
168166
/* }}} */
169167
#endif
168+
169+
PHP_METHOD(Spoofchecker, setAllowedChars)
170+
{
171+
zend_string *pattern;
172+
UChar *upattern = NULL;
173+
int32_t upattern_len = 0;
174+
zend_long pattern_option = 0;
175+
SPOOFCHECKER_METHOD_INIT_VARS;
176+
177+
ZEND_PARSE_PARAMETERS_START(1, 2)
178+
Z_PARAM_STR(pattern)
179+
Z_PARAM_OPTIONAL
180+
Z_PARAM_LONG(pattern_option)
181+
ZEND_PARSE_PARAMETERS_END();
182+
SPOOFCHECKER_METHOD_FETCH_OBJECT;
183+
184+
if (ZSTR_LEN(pattern) > INT32_MAX) {
185+
zend_argument_value_error(1, "must be less than or equal to " ZEND_LONG_FMT " bytes long", INT32_MAX);
186+
RETURN_THROWS();
187+
}
188+
189+
/* uset_applyPattern requires to start with a regex range char */
190+
if (ZSTR_VAL(pattern)[0] != '[' || ZSTR_VAL(pattern)[ZSTR_LEN(pattern) -1] != ']') {
191+
zend_argument_value_error(1, "must be a valid regular expression character set pattern");
192+
RETURN_THROWS();
193+
}
194+
195+
intl_convert_utf8_to_utf16(&upattern, &upattern_len, ZSTR_VAL(pattern), ZSTR_LEN(pattern), SPOOFCHECKER_ERROR_CODE_P(co));
196+
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
197+
zend_argument_value_error(1, "string conversion to unicode encoding failed (%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co)));
198+
RETURN_THROWS();
199+
}
200+
201+
USet *set = uset_openEmpty();
202+
203+
/* pattern is either USE_IGNORE_SPACE alone or in conjunction with the following flags (but mutually exclusive) */
204+
if (pattern_option &&
205+
pattern_option != USET_IGNORE_SPACE &&
206+
#if U_ICU_VERSION_MAJOR_NUM >= 73
207+
pattern_option != (USET_IGNORE_SPACE|USET_SIMPLE_CASE_INSENSITIVE) &&
208+
#endif
209+
pattern_option != (USET_IGNORE_SPACE|USET_CASE_INSENSITIVE) &&
210+
pattern_option != (USET_IGNORE_SPACE|USET_ADD_CASE_MAPPINGS)) {
211+
zend_argument_value_error(2, "must be a valid pattern option, 0 or (SpoofChecker::IGNORE_SPACE|(<none> or SpoofChecker::USET_CASE_INSENSITIVE or SpoofChecker::USET_ADD_CASE_MAPPINGS"
212+
#if U_ICU_VERSION_MAJOR_NUM >= 73
213+
" or SpoofChecker::USET_SIMPLE_CASE_INSENSITIVE"
214+
#endif
215+
"))"
216+
);
217+
uset_close(set);
218+
efree(upattern);
219+
RETURN_THROWS();
220+
}
221+
222+
uset_applyPattern(set, upattern, upattern_len, (uint32_t)pattern_option, SPOOFCHECKER_ERROR_CODE_P(co));
223+
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
224+
zend_argument_value_error(1, "must be a valid regular expression character set pattern (%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co)));
225+
uset_close(set);
226+
efree(upattern);
227+
RETURN_THROWS();
228+
}
229+
230+
uset_compact(set);
231+
uspoof_setAllowedChars(co->uspoof, set, SPOOFCHECKER_ERROR_CODE_P(co));
232+
uset_close(set);
233+
efree(upattern);
234+
235+
if (U_FAILURE(SPOOFCHECKER_ERROR_CODE(co))) {
236+
php_error_docref(NULL, E_WARNING, "(%d) %s", SPOOFCHECKER_ERROR_CODE(co), u_errorName(SPOOFCHECKER_ERROR_CODE(co)));
237+
}
238+
}

ext/intl/tests/spoofchecker_008.phpt

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
--TEST--
2+
spoofchecker with locale settings
3+
--EXTENSIONS--
4+
intl
5+
--SKIPIF--
6+
<?php if(!class_exists("Spoofchecker")) print 'skip'; ?>
7+
--FILE--
8+
<?php
9+
10+
$s = new Spoofchecker();
11+
12+
$s->setAllowedChars('[a-z]');
13+
var_dump($s->isSuspicious("123"));
14+
$s->setAllowedChars('[1-3]');
15+
var_dump($s->isSuspicious("123"));
16+
$s->setAllowedChars('[a-z]', SpoofChecker::IGNORE_SPACE | SpoofChecker::CASE_INSENSITIVE);
17+
var_dump($s->isSuspicious("ABC"));
18+
19+
try {
20+
$s->setAllowedChars('[a-z]', 1024);
21+
} catch (\ValueError $e) {
22+
echo $e->getMessage() . PHP_EOL;
23+
}
24+
25+
try {
26+
$s->setAllowedChars("A-Z]");
27+
} catch (\ValueError $e) {
28+
echo $e->getMessage() . PHP_EOL;
29+
}
30+
31+
try {
32+
$s->setAllowedChars("[A-Z");
33+
} catch (\ValueError $e) {
34+
echo $e->getMessage();
35+
}
36+
37+
?>
38+
--EXPECTF--
39+
bool(true)
40+
bool(false)
41+
bool(false)
42+
Spoofchecker::setAllowedChars(): Argument #2 ($patternOptions) must be a valid pattern option, 0 or (SpoofChecker::IGNORE_SPACE|(<none> or SpoofChecker::USET_CASE_INSENSITIVE%s))
43+
Spoofchecker::setAllowedChars(): Argument #1 ($pattern) must be a valid regular expression character set pattern
44+
Spoofchecker::setAllowedChars(): Argument #1 ($pattern) must be a valid regular expression character set pattern

0 commit comments

Comments
 (0)