Skip to content

Commit 17a0fbb

Browse files
feature #469 Mb trim functions (Fan2Shrek)
This PR was squashed before being merged into the 1.x branch. Discussion ---------- Mb trim functions Fixes #458 Implementaion of `mb_trim`, `mb_ltrim` and `mb_rtrim` Tests from the php-src repository Commits ------- dd8aab5 Improve mb_*trim polyfills bf383b3 Mb trim functions
2 parents 697a1da + dd8aab5 commit 17a0fbb

File tree

12 files changed

+441
-25
lines changed

12 files changed

+441
-25
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ Polyfills are provided for:
7070
- the `mb_ucfirst` and `mb_lcfirst` functions introduced in PHP 8.4;
7171
- the `array_find`, `array_find_key`, `array_any` and `array_all` functions introduced in PHP 8.4;
7272
- the `Deprecated` attribute introduced in PHP 8.4;
73+
- the `mb_trim`, `mb_ltrim` and `mb_rtrim` functions introduced in PHP 8.4;
7374

7475
It is strongly recommended to upgrade your PHP version and/or install the missing
7576
extensions whenever possible. This polyfill should be used only when there is no

src/Mbstring/Mbstring.php

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@
5050
* - mb_substr_count - Count the number of substring occurrences
5151
* - mb_ucfirst - Make a string's first character uppercase
5252
* - mb_lcfirst - Make a string's first character lowercase
53+
* - mb_trim - Strip whitespace (or other characters) from the beginning and end of a string
54+
* - mb_ltrim - Strip whitespace (or other characters) from the beginning of a string
55+
* - mb_rtrim - Strip whitespace (or other characters) from the end of a string
5356
*
5457
* Not implemented:
5558
* - mb_convert_kana - Convert "kana" one from another ("zen-kaku", "han-kaku" and more)
@@ -83,7 +86,7 @@ final class Mbstring
8386
public static function mb_convert_encoding($s, $toEncoding, $fromEncoding = null)
8487
{
8588
if (\is_array($s)) {
86-
if (PHP_VERSION_ID < 70200) {
89+
if (\PHP_VERSION_ID < 70200) {
8790
trigger_error('mb_convert_encoding() expects parameter 1 to be string, array given', \E_USER_WARNING);
8891

8992
return null;
@@ -980,17 +983,75 @@ private static function getEncoding($encoding)
980983
return $encoding;
981984
}
982985

986+
public static function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string
987+
{
988+
return self::mb_internal_trim('{^[%s]+|[%1$s]+$}Du', $string, $characters, $encoding, __FUNCTION__);
989+
}
990+
991+
public static function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string
992+
{
993+
return self::mb_internal_trim('{^[%s]+}Du', $string, $characters, $encoding, __FUNCTION__);
994+
}
995+
996+
public static function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string
997+
{
998+
return self::mb_internal_trim('{[%s]+$}D', $string, $characters, $encoding, __FUNCTION__);
999+
}
1000+
1001+
private static function mb_internal_trim(string $regex, string $string, ?string $characters, ?string $encoding, string $function): string
1002+
{
1003+
if (null === $encoding) {
1004+
$encoding = self::mb_internal_encoding();
1005+
} else {
1006+
self::assertEncoding($encoding, $function.'(): Argument #3 ($encoding) must be a valid encoding, "%s" given');
1007+
}
1008+
1009+
if ('' === $characters) {
1010+
return null === $encoding ? $string : self::mb_convert_encoding($string, $encoding);
1011+
}
1012+
1013+
if ('UTF-8' === $encoding) {
1014+
$encoding = null;
1015+
if (!preg_match('//u', $string)) {
1016+
$string = @iconv('UTF-8', 'UTF-8//IGNORE', $string);
1017+
}
1018+
if (null !== $characters && !preg_match('//u', $characters)) {
1019+
$characters = @iconv('UTF-8', 'UTF-8//IGNORE', $characters);
1020+
}
1021+
} else {
1022+
$string = iconv($encoding, 'UTF-8//IGNORE', $string);
1023+
1024+
if (null !== $characters) {
1025+
$characters = iconv($encoding, 'UTF-8//IGNORE', $characters);
1026+
}
1027+
}
1028+
1029+
if (null === $characters) {
1030+
$characters = "\\0 \f\n\r\t\v\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}";
1031+
} else {
1032+
$characters = preg_quote($characters);
1033+
}
1034+
1035+
$string = preg_replace(sprintf($regex, $characters), '', $string);
1036+
1037+
if (null === $encoding) {
1038+
return $string;
1039+
}
1040+
1041+
return iconv('UTF-8', $encoding.'//IGNORE', $string);
1042+
}
1043+
9831044
private static function assertEncoding(string $encoding, string $errorFormat): void
9841045
{
9851046
try {
9861047
$validEncoding = @self::mb_check_encoding('', $encoding);
9871048
} catch (\ValueError $e) {
988-
throw new \ValueError(\sprintf($errorFormat, $encoding));
1049+
throw new \ValueError(sprintf($errorFormat, $encoding));
9891050
}
9901051

9911052
// BC for PHP 7.3 and lower
9921053
if (!$validEncoding) {
993-
throw new \ValueError(\sprintf($errorFormat, $encoding));
1054+
throw new \ValueError(sprintf($errorFormat, $encoding));
9941055
}
9951056
}
9961057
}

src/Mbstring/bootstrap.php

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,19 @@ function mb_ucfirst(string $string, ?string $encoding = null): string { return p
144144
function mb_lcfirst(string $string, ?string $encoding = null): string { return p\Mbstring::mb_lcfirst($string, $encoding); }
145145
}
146146

147+
if (!function_exists('mb_trim')) {
148+
function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Mbstring::mb_trim($string, $characters, $encoding); }
149+
}
150+
151+
if (!function_exists('mb_ltrim')) {
152+
function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Mbstring::mb_ltrim($string, $characters, $encoding); }
153+
}
154+
155+
if (!function_exists('mb_rtrim')) {
156+
function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Mbstring::mb_rtrim($string, $characters, $encoding); }
157+
}
158+
159+
147160
if (extension_loaded('mbstring')) {
148161
return;
149162
}

src/Mbstring/bootstrap80.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,18 @@ function mb_ucfirst($string, ?string $encoding = null): string { return p\Mbstri
140140
function mb_lcfirst($string, ?string $encoding = null): string { return p\Mbstring::mb_lcfirst($string, $encoding); }
141141
}
142142

143+
if (!function_exists('mb_trim')) {
144+
function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Mbstring::mb_trim($string, $characters, $encoding); }
145+
}
146+
147+
if (!function_exists('mb_ltrim')) {
148+
function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Mbstring::mb_ltrim($string, $characters, $encoding); }
149+
}
150+
151+
if (!function_exists('mb_rtrim')) {
152+
function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Mbstring::mb_rtrim($string, $characters, $encoding); }
153+
}
154+
143155
if (extension_loaded('mbstring')) {
144156
return;
145157
}

src/Php72/bootstrap.php

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,15 @@ function utf8_decode($string) { return p\Php72::utf8_decode($string); }
4646
if (!function_exists('spl_object_id')) {
4747
function spl_object_id($object) { return p\Php72::spl_object_id($object); }
4848
}
49-
if (!function_exists('mb_ord')) {
50-
function mb_ord($string, $encoding = null) { return p\Php72::mb_ord($string, $encoding); }
51-
}
52-
if (!function_exists('mb_chr')) {
53-
function mb_chr($codepoint, $encoding = null) { return p\Php72::mb_chr($codepoint, $encoding); }
54-
}
55-
if (!function_exists('mb_scrub')) {
56-
function mb_scrub($string, $encoding = null) { $encoding = null === $encoding ? mb_internal_encoding() : $encoding; return mb_convert_encoding($string, $encoding, $encoding); }
49+
50+
if (extension_loaded('mbstring')) {
51+
if (!function_exists('mb_ord')) {
52+
function mb_ord($string, $encoding = null) { return p\Php72::mb_ord($string, $encoding); }
53+
}
54+
if (!function_exists('mb_chr')) {
55+
function mb_chr($codepoint, $encoding = null) { return p\Php72::mb_chr($codepoint, $encoding); }
56+
}
57+
if (!function_exists('mb_scrub')) {
58+
function mb_scrub($string, $encoding = null) { $encoding = null === $encoding ? mb_internal_encoding() : $encoding; return mb_convert_encoding($string, $encoding, $encoding); }
59+
}
5760
}

src/Php74/bootstrap.php

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
if (!function_exists('get_mangled_object_vars')) {
1919
function get_mangled_object_vars($object) { return p\Php74::get_mangled_object_vars($object); }
2020
}
21-
if (!function_exists('mb_str_split') && function_exists('mb_substr')) {
22-
function mb_str_split($string, $length = 1, $encoding = null) { return p\Php74::mb_str_split($string, $length, $encoding); }
23-
}
2421
if (!function_exists('password_algos')) {
2522
function password_algos() { return p\Php74::password_algos(); }
2623
}
24+
if (extension_loaded('mbstring')) {
25+
if (!function_exists('mb_str_split')) {
26+
function mb_str_split($string, $length = 1, $encoding = null) { return p\Php74::mb_str_split($string, $length, $encoding); }
27+
}
28+
}

src/Php83/bootstrap.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919
function json_validate(string $json, int $depth = 512, int $flags = 0): bool { return p\Php83::json_validate($json, $depth, $flags); }
2020
}
2121

22-
if (!function_exists('mb_str_pad') && function_exists('mb_substr')) {
23-
function mb_str_pad(string $string, int $length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT, ?string $encoding = null): string { return p\Php83::mb_str_pad($string, $length, $pad_string, $pad_type, $encoding); }
22+
if (extension_loaded('mbstring')) {
23+
if (!function_exists('mb_str_pad')) {
24+
function mb_str_pad(string $string, int $length, string $pad_string = ' ', int $pad_type = STR_PAD_RIGHT, ?string $encoding = null): string { return p\Php83::mb_str_pad($string, $length, $pad_string, $pad_type, $encoding); }
25+
}
2426
}
2527

2628
if (!function_exists('stream_context_set_options')) {

src/Php84/Php84.php

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
/**
1515
* @author Ayesh Karunaratne <[email protected]>
16+
* @author Pierre Ambroise <[email protected]>
1617
*
1718
* @internal
1819
*/
@@ -107,4 +108,65 @@ public static function array_all(array $array, callable $callback): bool
107108

108109
return true;
109110
}
111+
112+
public static function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string
113+
{
114+
return self::mb_internal_trim('{^[%s]+|[%1$s]+$}Du', $string, $characters, $encoding, __FUNCTION__);
115+
}
116+
117+
public static function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string
118+
{
119+
return self::mb_internal_trim('{^[%s]+}Du', $string, $characters, $encoding, __FUNCTION__);
120+
}
121+
122+
public static function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string
123+
{
124+
return self::mb_internal_trim('{[%s]+$}Du', $string, $characters, $encoding, __FUNCTION__);
125+
}
126+
127+
private static function mb_internal_trim(string $regex, string $string, ?string $characters, ?string $encoding, string $function): string
128+
{
129+
if (null === $encoding) {
130+
$encoding = mb_internal_encoding();
131+
}
132+
133+
try {
134+
$validEncoding = @mb_check_encoding('', $encoding);
135+
} catch (\ValueError $e) {
136+
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given', $function, $encoding));
137+
}
138+
139+
// BC for PHP 7.3 and lower
140+
if (!$validEncoding) {
141+
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given', $function, $encoding));
142+
}
143+
144+
if ('' === $characters) {
145+
return null === $encoding ? $string : mb_convert_encoding($string, $encoding);
146+
}
147+
148+
if ('UTF-8' === $encoding || \in_array(strtolower($encoding), ['utf-8', 'utf8'], true)) {
149+
$encoding = 'UTF-8';
150+
}
151+
152+
$string = mb_convert_encoding($string, 'UTF-8', $encoding);
153+
154+
if (null !== $characters) {
155+
$characters = mb_convert_encoding($characters, 'UTF-8', $encoding);
156+
}
157+
158+
if (null === $characters) {
159+
$characters = "\\0 \f\n\r\t\v\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}";
160+
} else {
161+
$characters = preg_quote($characters);
162+
}
163+
164+
$string = preg_replace(sprintf($regex, $characters), '', $string);
165+
166+
if ('UTF-8' === $encoding) {
167+
return $string;
168+
}
169+
170+
return mb_convert_encoding($string, $encoding, 'UTF-8');
171+
}
110172
}

src/Php84/Resources/stubs/Deprecated.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ final class Deprecated
1515
{
1616
public readonly ?string $message;
1717
public readonly ?string $since;
18-
18+
1919
public function __construct(?string $message = null, ?string $since = null)
2020
{
2121
$this->message = $message;

src/Php84/bootstrap.php

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,6 @@
1515
return;
1616
}
1717

18-
if (!function_exists('mb_ucfirst')) {
19-
function mb_ucfirst($string, ?string $encoding = null): string { return p\Php84::mb_ucfirst($string, $encoding); }
20-
}
21-
22-
if (!function_exists('mb_lcfirst')) {
23-
function mb_lcfirst($string, ?string $encoding = null): string { return p\Php84::mb_lcfirst($string, $encoding); }
24-
}
25-
2618
if (!function_exists('array_find')) {
2719
function array_find(array $array, callable $callback) { return p\Php84::array_find($array, $callback); }
2820
}
@@ -38,3 +30,25 @@ function array_any(array $array, callable $callback): bool { return p\Php84::arr
3830
if (!function_exists('array_all')) {
3931
function array_all(array $array, callable $callback): bool { return p\Php84::array_all($array, $callback); }
4032
}
33+
34+
if (extension_loaded('mbstring')) {
35+
if (!function_exists('mb_ucfirst')) {
36+
function mb_ucfirst($string, ?string $encoding = null): string { return p\Php84::mb_ucfirst($string, $encoding); }
37+
}
38+
39+
if (!function_exists('mb_lcfirst')) {
40+
function mb_lcfirst($string, ?string $encoding = null): string { return p\Php84::mb_lcfirst($string, $encoding); }
41+
}
42+
43+
if (!function_exists('mb_trim')) {
44+
function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Php84::mb_trim($string, $characters, $encoding); }
45+
}
46+
47+
if (!function_exists('mb_ltrim')) {
48+
function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Php84::mb_ltrim($string, $characters, $encoding); }
49+
}
50+
51+
if (!function_exists('mb_rtrim')) {
52+
function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string { return p\Php84::mb_rtrim($string, $characters, $encoding); }
53+
}
54+
}

0 commit comments

Comments
 (0)