Skip to content

Commit f74109d

Browse files
theodorejbnikic
authored andcommitted
Implement numeric literal separators
RFC: https://wiki.php.net/rfc/numeric_literal_separator
1 parent ec77cca commit f74109d

11 files changed

+197
-28
lines changed

UPGRADING

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,15 @@ PHP 7.4 UPGRADE NOTES
186186

187187
RFC: https://wiki.php.net/rfc/spread_operator_for_array
188188

189+
. Added support for underscore separators in numeric literals. Some examples:
190+
191+
6.674_083e-11; // float
192+
299_792_458; // decimal
193+
0xCAFE_F00D; // hexadecimal
194+
0b0101_1111; // binary
195+
196+
RFC: https://wiki.php.net/rfc/numeric_literal_separator
197+
189198
. Support for WeakReferences has been added.
190199
RFC: https://wiki.php.net/rfc/weakrefs
191200

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
--TEST--
2+
Valid use of numeric literal separator
3+
--FILE--
4+
<?php
5+
var_dump(299_792_458 === 299792458);
6+
var_dump(135_00 === 13500);
7+
var_dump(96_485.332_12 === 96485.33212);
8+
var_dump(6.626_070_15e-34 === 6.62607015e-34);
9+
var_dump(6.674_083e-11 === 6.674083e-11);
10+
var_dump(0xCAFE_F00D === 0xCAFEF00D);
11+
var_dump(0x54_4A_42 === 0x544A42);
12+
var_dump(0b0101_1111 === 0b01011111);
13+
var_dump(0b01_0000_10 === 0b01000010);
14+
var_dump(0137_041 === 0137041);
15+
var_dump(0_124 === 0124);
16+
--EXPECT--
17+
bool(true)
18+
bool(true)
19+
bool(true)
20+
bool(true)
21+
bool(true)
22+
bool(true)
23+
bool(true)
24+
bool(true)
25+
bool(true)
26+
bool(true)
27+
bool(true)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
--TEST--
2+
Invalid use: trailing underscore
3+
--FILE--
4+
<?php
5+
100_;
6+
--EXPECTF--
7+
Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
--TEST--
2+
Invalid use: adjacent underscores
3+
--FILE--
4+
<?php
5+
10__0;
6+
--EXPECTF--
7+
Parse error: syntax error, unexpected '__0' (T_STRING) in %s on line %d
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
--TEST--
2+
Invalid use: underscore left of period
3+
--FILE--
4+
<?php
5+
100_.0;
6+
--EXPECTF--
7+
Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
--TEST--
2+
Invalid use: underscore right of period
3+
--FILE--
4+
<?php
5+
100._0;
6+
--EXPECTF--
7+
Parse error: syntax error, unexpected '_0' (T_STRING) in %s on line %d
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
--TEST--
2+
Invalid use: underscore next to 0x
3+
--FILE--
4+
<?php
5+
0x_0123;
6+
--EXPECTF--
7+
Parse error: syntax error, unexpected 'x_0123' (T_STRING) in %s on line %d
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
--TEST--
2+
Invalid use: underscore next to 0b
3+
--FILE--
4+
<?php
5+
0b_0101;
6+
--EXPECTF--
7+
Parse error: syntax error, unexpected 'b_0101' (T_STRING) in %s on line %d
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
--TEST--
2+
Invalid use: underscore left of e
3+
--FILE--
4+
<?php
5+
1_e2;
6+
--EXPECTF--
7+
Parse error: syntax error, unexpected '_e2' (T_STRING) in %s on line %d
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
--TEST--
2+
Invalid use: underscore right of e
3+
--FILE--
4+
<?php
5+
1e_2;
6+
--EXPECTF--
7+
Parse error: syntax error, unexpected 'e_2' (T_STRING) in %s on line %d

Zend/zend_language_scanner.l

Lines changed: 105 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,21 @@ do { \
120120

121121
BEGIN_EXTERN_C()
122122

123+
static void strip_underscores(char *str, int *len)
124+
{
125+
char *src = str, *dest = str;
126+
while (*src != '\0') {
127+
if (*src != '_') {
128+
*dest = *src;
129+
dest++;
130+
} else {
131+
--(*len);
132+
}
133+
src++;
134+
}
135+
*dest = '\0';
136+
}
137+
123138
static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
124139
{
125140
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
@@ -1245,11 +1260,11 @@ restart:
12451260

12461261
/*!re2c
12471262
re2c:yyfill:check = 0;
1248-
LNUM [0-9]+
1249-
DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
1263+
LNUM [0-9]+(_[0-9]+)*
1264+
DNUM ({LNUM}?"."{LNUM})|({LNUM}"."{LNUM}?)
12501265
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1251-
HNUM "0x"[0-9a-fA-F]+
1252-
BNUM "0b"[01]+
1266+
HNUM "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)*
1267+
BNUM "0b"[01]+(_[01]+)*
12531268
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
12541269
WHITESPACE [ \n\r\t]+
12551270
TABS_AND_SPACES [ \t]*
@@ -1760,94 +1775,138 @@ NEWLINE ("\r"|"\n"|"\r\n")
17601775
}
17611776

17621777
<ST_IN_SCRIPTING>{BNUM} {
1763-
char *bin = yytext + 2; /* Skip "0b" */
1764-
int len = yyleng - 2;
1765-
char *end;
1778+
/* The +/- 2 skips "0b" */
1779+
int len = yyleng - 2, contains_underscores;
1780+
char *end, *bin = yytext + 2;
17661781

17671782
/* Skip any leading 0s */
1768-
while (*bin == '0') {
1783+
while (*bin == '0' || *bin == '_') {
17691784
++bin;
17701785
--len;
17711786
}
17721787

1788+
contains_underscores = (memchr(bin, '_', len) != NULL);
1789+
1790+
if (contains_underscores) {
1791+
bin = estrndup(bin, len);
1792+
strip_underscores(bin, &len);
1793+
}
1794+
17731795
if (len < SIZEOF_ZEND_LONG * 8) {
17741796
if (len == 0) {
17751797
ZVAL_LONG(zendlval, 0);
17761798
} else {
17771799
errno = 0;
17781800
ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
1779-
ZEND_ASSERT(!errno && end == yytext + yyleng);
1801+
ZEND_ASSERT(!errno && end == bin + len);
1802+
}
1803+
if (contains_underscores) {
1804+
efree(bin);
17801805
}
17811806
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
17821807
} else {
17831808
ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
17841809
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1785-
ZEND_ASSERT(end == yytext + yyleng);
1810+
ZEND_ASSERT(end == bin + len);
1811+
if (contains_underscores) {
1812+
efree(bin);
1813+
}
17861814
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
17871815
}
17881816
}
17891817

17901818
<ST_IN_SCRIPTING>{LNUM} {
1791-
char *end;
1792-
if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
1819+
int len = yyleng, contains_underscores;
1820+
char *end, *lnum = yytext;
1821+
1822+
contains_underscores = (memchr(lnum, '_', len) != NULL);
1823+
1824+
if (contains_underscores) {
1825+
lnum = estrndup(lnum, len);
1826+
strip_underscores(lnum, &len);
1827+
}
1828+
1829+
if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
17931830
errno = 0;
17941831
/* base must be passed explicitly for correct parse error on Windows */
1795-
ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, yytext[0] == '0' ? 8 : 10));
1832+
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, lnum[0] == '0' ? 8 : 10));
17961833
/* This isn't an assert, we need to ensure 019 isn't valid octal
17971834
* Because the lexing itself doesn't do that for us
17981835
*/
1799-
if (end != yytext + yyleng) {
1836+
if (end != lnum + len) {
18001837
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
18011838
ZVAL_UNDEF(zendlval);
1839+
if (contains_underscores) {
1840+
efree(lnum);
1841+
}
18021842
if (PARSER_MODE()) {
18031843
RETURN_TOKEN(T_ERROR);
18041844
}
18051845
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
18061846
}
18071847
} else {
18081848
errno = 0;
1809-
ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
1849+
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
18101850
if (errno == ERANGE) { /* Overflow */
18111851
errno = 0;
1812-
if (yytext[0] == '0') { /* octal overflow */
1813-
ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
1852+
if (lnum[0] == '0') { /* octal overflow */
1853+
ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end));
18141854
} else {
1815-
ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
1855+
ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
18161856
}
18171857
/* Also not an assert for the same reason */
1818-
if (end != yytext + yyleng) {
1858+
if (end != lnum + len) {
18191859
zend_throw_exception(zend_ce_parse_error,
18201860
"Invalid numeric literal", 0);
18211861
ZVAL_UNDEF(zendlval);
1862+
if (contains_underscores) {
1863+
efree(lnum);
1864+
}
18221865
if (PARSER_MODE()) {
18231866
RETURN_TOKEN(T_ERROR);
18241867
}
18251868
}
1869+
if (contains_underscores) {
1870+
efree(lnum);
1871+
}
18261872
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
18271873
}
18281874
/* Also not an assert for the same reason */
1829-
if (end != yytext + yyleng) {
1875+
if (end != lnum + len) {
18301876
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
18311877
ZVAL_UNDEF(zendlval);
1878+
if (contains_underscores) {
1879+
efree(lnum);
1880+
}
18321881
if (PARSER_MODE()) {
18331882
RETURN_TOKEN(T_ERROR);
18341883
}
18351884
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
18361885
}
18371886
}
18381887
ZEND_ASSERT(!errno);
1888+
if (contains_underscores) {
1889+
efree(lnum);
1890+
}
18391891
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
18401892
}
18411893

18421894
<ST_IN_SCRIPTING>{HNUM} {
1843-
char *hex = yytext + 2; /* Skip "0x" */
1844-
int len = yyleng - 2;
1845-
char *end;
1895+
/* The +/- 2 skips "0x" */
1896+
int len = yyleng - 2, contains_underscores;
1897+
char *end, *hex = yytext + 2;
18461898

18471899
/* Skip any leading 0s */
1848-
while (*hex == '0') {
1849-
hex++;
1850-
len--;
1900+
while (*hex == '0' || *hex == '_') {
1901+
++hex;
1902+
--len;
1903+
}
1904+
1905+
contains_underscores = (memchr(hex, '_', len) != NULL);
1906+
1907+
if (contains_underscores) {
1908+
hex = estrndup(hex, len);
1909+
strip_underscores(hex, &len);
18511910
}
18521911

18531912
if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
@@ -1858,11 +1917,17 @@ NEWLINE ("\r"|"\n"|"\r\n")
18581917
ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
18591918
ZEND_ASSERT(!errno && end == hex + len);
18601919
}
1920+
if (contains_underscores) {
1921+
efree(hex);
1922+
}
18611923
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
18621924
} else {
18631925
ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
18641926
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
18651927
ZEND_ASSERT(end == hex + len);
1928+
if (contains_underscores) {
1929+
efree(hex);
1930+
}
18661931
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
18671932
}
18681933
}
@@ -1894,10 +1959,22 @@ string:
18941959

18951960
<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
18961961
const char *end;
1962+
int len = yyleng, contains_underscores;
1963+
char *dnum = yytext;
18971964

1898-
ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
1965+
contains_underscores = (memchr(dnum, '_', len) != NULL);
1966+
1967+
if (contains_underscores) {
1968+
dnum = estrndup(dnum, len);
1969+
strip_underscores(dnum, &len);
1970+
}
1971+
1972+
ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end));
18991973
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1900-
ZEND_ASSERT(end == yytext + yyleng);
1974+
ZEND_ASSERT(end == dnum + len);
1975+
if (contains_underscores) {
1976+
efree(dnum);
1977+
}
19011978
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
19021979
}
19031980

0 commit comments

Comments
 (0)