@@ -120,6 +120,21 @@ do { \
120
120
121
121
BEGIN_EXTERN_C ()
122
122
123
+ static void strip_underscores(char *str, int *len)
124
+ {
125
+ char *src = str, *dest = str;
126
+ while (*src != ' \0 ' ) {
127
+ if (*src != ' _' ) {
128
+ *dest = *src;
129
+ dest++;
130
+ } else {
131
+ --(*len);
132
+ }
133
+ src++;
134
+ }
135
+ *dest = ' \0 ' ;
136
+ }
137
+
123
138
static size_t encoding_filter_script_to_internal (unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
124
139
{
125
140
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding ();
@@ -1245,11 +1260,11 @@ restart:
1245
1260
1246
1261
/*!re2c
1247
1262
re2c:yyfill:check = 0;
1248
- LNUM [0-9]+
1249
- DNUM ([0-9]* "."[0-9]+ )|([0-9]+ "."[0-9]* )
1263
+ LNUM [0-9]+(_[0-9]+)*
1264
+ DNUM ({LNUM}? "."{LNUM} )|({LNUM} "."{LNUM}? )
1250
1265
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
1251
- HNUM "0x"[0-9a-fA-F]+
1252
- BNUM "0b"[01]+
1266
+ HNUM "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)*
1267
+ BNUM "0b"[01]+(_[01]+)*
1253
1268
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
1254
1269
WHITESPACE [ \n\r\t]+
1255
1270
TABS_AND_SPACES [ \t]*
@@ -1760,94 +1775,138 @@ NEWLINE ("\r"|"\n"|"\r\n")
1760
1775
}
1761
1776
1762
1777
<ST_IN_SCRIPTING>{BNUM} {
1763
- char *bin = yytext + 2 ; /* Skip "0b" */
1764
- int len = yyleng - 2 ;
1765
- char *end;
1778
+ /* The +/- 2 skips "0b" */
1779
+ int len = yyleng - 2 , contains_underscores ;
1780
+ char *end, *bin = yytext + 2 ;
1766
1781
1767
1782
/* Skip any leading 0s */
1768
- while (*bin == ' 0' ) {
1783
+ while (*bin == ' 0' || *bin == ' _ ' ) {
1769
1784
++bin;
1770
1785
--len;
1771
1786
}
1772
1787
1788
+ contains_underscores = (memchr (bin, ' _' , len) != NULL );
1789
+
1790
+ if (contains_underscores) {
1791
+ bin = estrndup (bin, len);
1792
+ strip_underscores (bin, &len);
1793
+ }
1794
+
1773
1795
if (len < SIZEOF_ZEND_LONG * 8 ) {
1774
1796
if (len == 0 ) {
1775
1797
ZVAL_LONG (zendlval, 0 );
1776
1798
} else {
1777
1799
errno = 0 ;
1778
1800
ZVAL_LONG (zendlval, ZEND_STRTOL (bin, &end, 2 ));
1779
- ZEND_ASSERT (!errno && end == yytext + yyleng);
1801
+ ZEND_ASSERT (!errno && end == bin + len);
1802
+ }
1803
+ if (contains_underscores) {
1804
+ efree (bin);
1780
1805
}
1781
1806
RETURN_TOKEN_WITH_VAL (T_LNUMBER);
1782
1807
} else {
1783
1808
ZVAL_DOUBLE (zendlval, zend_bin_strtod (bin, (const char **)&end));
1784
1809
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1785
- ZEND_ASSERT (end == yytext + yyleng);
1810
+ ZEND_ASSERT (end == bin + len);
1811
+ if (contains_underscores) {
1812
+ efree (bin);
1813
+ }
1786
1814
RETURN_TOKEN_WITH_VAL (T_DNUMBER);
1787
1815
}
1788
1816
}
1789
1817
1790
1818
<ST_IN_SCRIPTING>{LNUM} {
1791
- char *end;
1792
- if (yyleng < MAX_LENGTH_OF_LONG - 1 ) { /* Won't overflow */
1819
+ int len = yyleng, contains_underscores;
1820
+ char *end, *lnum = yytext;
1821
+
1822
+ contains_underscores = (memchr (lnum, ' _' , len) != NULL );
1823
+
1824
+ if (contains_underscores) {
1825
+ lnum = estrndup (lnum, len);
1826
+ strip_underscores (lnum, &len);
1827
+ }
1828
+
1829
+ if (len < MAX_LENGTH_OF_LONG - 1 ) { /* Won't overflow */
1793
1830
errno = 0 ;
1794
1831
/* base must be passed explicitly for correct parse error on Windows */
1795
- ZVAL_LONG (zendlval, ZEND_STRTOL (yytext , &end, yytext [0 ] == ' 0' ? 8 : 10 ));
1832
+ ZVAL_LONG (zendlval, ZEND_STRTOL (lnum , &end, lnum [0 ] == ' 0' ? 8 : 10 ));
1796
1833
/* This isn't an assert, we need to ensure 019 isn't valid octal
1797
1834
* Because the lexing itself doesn't do that for us
1798
1835
*/
1799
- if (end != yytext + yyleng ) {
1836
+ if (end != lnum + len ) {
1800
1837
zend_throw_exception (zend_ce_parse_error, " Invalid numeric literal" , 0 );
1801
1838
ZVAL_UNDEF (zendlval);
1839
+ if (contains_underscores) {
1840
+ efree (lnum);
1841
+ }
1802
1842
if (PARSER_MODE ()) {
1803
1843
RETURN_TOKEN (T_ERROR);
1804
1844
}
1805
1845
RETURN_TOKEN_WITH_VAL (T_LNUMBER);
1806
1846
}
1807
1847
} else {
1808
1848
errno = 0 ;
1809
- ZVAL_LONG (zendlval, ZEND_STRTOL (yytext , &end, 0 ));
1849
+ ZVAL_LONG (zendlval, ZEND_STRTOL (lnum , &end, 0 ));
1810
1850
if (errno == ERANGE) { /* Overflow */
1811
1851
errno = 0 ;
1812
- if (yytext [0 ] == ' 0' ) { /* octal overflow */
1813
- ZVAL_DOUBLE (zendlval, zend_oct_strtod (yytext , (const char **)&end));
1852
+ if (lnum [0 ] == ' 0' ) { /* octal overflow */
1853
+ ZVAL_DOUBLE (zendlval, zend_oct_strtod (lnum , (const char **)&end));
1814
1854
} else {
1815
- ZVAL_DOUBLE (zendlval, zend_strtod (yytext , (const char **)&end));
1855
+ ZVAL_DOUBLE (zendlval, zend_strtod (lnum , (const char **)&end));
1816
1856
}
1817
1857
/* Also not an assert for the same reason */
1818
- if (end != yytext + yyleng ) {
1858
+ if (end != lnum + len ) {
1819
1859
zend_throw_exception (zend_ce_parse_error,
1820
1860
" Invalid numeric literal" , 0 );
1821
1861
ZVAL_UNDEF (zendlval);
1862
+ if (contains_underscores) {
1863
+ efree (lnum);
1864
+ }
1822
1865
if (PARSER_MODE ()) {
1823
1866
RETURN_TOKEN (T_ERROR);
1824
1867
}
1825
1868
}
1869
+ if (contains_underscores) {
1870
+ efree (lnum);
1871
+ }
1826
1872
RETURN_TOKEN_WITH_VAL (T_DNUMBER);
1827
1873
}
1828
1874
/* Also not an assert for the same reason */
1829
- if (end != yytext + yyleng ) {
1875
+ if (end != lnum + len ) {
1830
1876
zend_throw_exception (zend_ce_parse_error, " Invalid numeric literal" , 0 );
1831
1877
ZVAL_UNDEF (zendlval);
1878
+ if (contains_underscores) {
1879
+ efree (lnum);
1880
+ }
1832
1881
if (PARSER_MODE ()) {
1833
1882
RETURN_TOKEN (T_ERROR);
1834
1883
}
1835
1884
RETURN_TOKEN_WITH_VAL (T_DNUMBER);
1836
1885
}
1837
1886
}
1838
1887
ZEND_ASSERT (!errno);
1888
+ if (contains_underscores) {
1889
+ efree (lnum);
1890
+ }
1839
1891
RETURN_TOKEN_WITH_VAL (T_LNUMBER);
1840
1892
}
1841
1893
1842
1894
<ST_IN_SCRIPTING>{HNUM} {
1843
- char *hex = yytext + 2 ; /* Skip "0x" */
1844
- int len = yyleng - 2 ;
1845
- char *end;
1895
+ /* The +/- 2 skips "0x" */
1896
+ int len = yyleng - 2 , contains_underscores ;
1897
+ char *end, *hex = yytext + 2 ;
1846
1898
1847
1899
/* Skip any leading 0s */
1848
- while (*hex == ' 0' ) {
1849
- hex++;
1850
- len--;
1900
+ while (*hex == ' 0' || *hex == ' _' ) {
1901
+ ++hex;
1902
+ --len;
1903
+ }
1904
+
1905
+ contains_underscores = (memchr (hex, ' _' , len) != NULL );
1906
+
1907
+ if (contains_underscores) {
1908
+ hex = estrndup (hex, len);
1909
+ strip_underscores (hex, &len);
1851
1910
}
1852
1911
1853
1912
if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= ' 7' )) {
@@ -1858,11 +1917,17 @@ NEWLINE ("\r"|"\n"|"\r\n")
1858
1917
ZVAL_LONG (zendlval, ZEND_STRTOL (hex, &end, 16 ));
1859
1918
ZEND_ASSERT (!errno && end == hex + len);
1860
1919
}
1920
+ if (contains_underscores) {
1921
+ efree (hex);
1922
+ }
1861
1923
RETURN_TOKEN_WITH_VAL (T_LNUMBER);
1862
1924
} else {
1863
1925
ZVAL_DOUBLE (zendlval, zend_hex_strtod (hex, (const char **)&end));
1864
1926
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1865
1927
ZEND_ASSERT (end == hex + len);
1928
+ if (contains_underscores) {
1929
+ efree (hex);
1930
+ }
1866
1931
RETURN_TOKEN_WITH_VAL (T_DNUMBER);
1867
1932
}
1868
1933
}
@@ -1894,10 +1959,22 @@ string:
1894
1959
1895
1960
<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
1896
1961
const char *end;
1962
+ int len = yyleng, contains_underscores;
1963
+ char *dnum = yytext;
1897
1964
1898
- ZVAL_DOUBLE (zendlval, zend_strtod (yytext, &end));
1965
+ contains_underscores = (memchr (dnum, ' _' , len) != NULL );
1966
+
1967
+ if (contains_underscores) {
1968
+ dnum = estrndup (dnum, len);
1969
+ strip_underscores (dnum, &len);
1970
+ }
1971
+
1972
+ ZVAL_DOUBLE (zendlval, zend_strtod (dnum, &end));
1899
1973
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
1900
- ZEND_ASSERT (end == yytext + yyleng);
1974
+ ZEND_ASSERT (end == dnum + len);
1975
+ if (contains_underscores) {
1976
+ efree (dnum);
1977
+ }
1901
1978
RETURN_TOKEN_WITH_VAL (T_DNUMBER);
1902
1979
}
1903
1980
0 commit comments