Skip to content

Commit 4e99e81

Browse files
committed
special case literal char handling
we must be able to know the difference between a one char string and a one char literal string this moves one char string handling to lexer it removes the ability of opcache to cache one char strings marked literal TODO known strings?
1 parent 905fddc commit 4e99e81

21 files changed

+471
-17
lines changed

Zend/zend_compile.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,13 @@ static int lookup_cv(zend_string *name) /* {{{ */{
504504
static inline zend_string *zval_make_interned_string(zval *zv) /* {{{ */
505505
{
506506
ZEND_ASSERT(Z_TYPE_P(zv) == IS_STRING);
507+
508+
if (ZSTR_IS_LITERAL_CHAR(Z_STR_P(zv))) {
509+
Z_TYPE_FLAGS_P(zv) = 0;
510+
511+
return Z_STR_P(zv);
512+
}
513+
507514
Z_STR_P(zv) = zend_new_interned_string(Z_STR_P(zv));
508515
if (ZSTR_IS_INTERNED(Z_STR_P(zv))) {
509516
Z_TYPE_FLAGS_P(zv) = 0;

Zend/zend_language_scanner.l

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ ZEND_API void zend_prepare_string_for_scanning(zval *str, zend_string *filename)
761761
}
762762

763763
zend_ast* zend_get_scanned_string_ast(zval *zendlval, uint32_t start_line) {
764-
if (Z_TYPE_P(zendlval) == IS_STRING && !ZSTR_IS_INTERNED(Z_STR_P(zendlval))) {
764+
if (Z_TYPE_P(zendlval) == IS_STRING && Z_STRLEN_P(zendlval) > 1) {
765765
zend_string *string =
766766
Z_STR_P(zendlval);
767767

@@ -904,7 +904,7 @@ ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter
904904
ZVAL_STRINGL(zendlval, s, sz); \
905905
efree(s); \
906906
} else if (yyleng == 1) { \
907-
ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext))); \
907+
ZVAL_INTERNED_STR(zendlval, ZSTR_LITERAL_CHAR((zend_uchar)*(yytext))); \
908908
} else { \
909909
ZVAL_STRINGL(zendlval, yytext, yyleng); \
910910
}
@@ -916,13 +916,13 @@ static zend_result zend_scan_escape_string(zval *zendlval, char *str, int len, c
916916

917917
if (len <= 1) {
918918
if (len < 1) {
919-
ZVAL_EMPTY_STRING(zendlval);
919+
ZVAL_INTERNED_STR(zendlval, ZSTR_EMPTY_LITERAL_ALLOC());
920920
} else {
921921
zend_uchar c = (zend_uchar)*str;
922922
if (c == '\n' || c == '\r') {
923923
CG(zend_lineno)++;
924924
}
925-
ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
925+
ZVAL_INTERNED_STR(zendlval, ZSTR_LITERAL_CHAR(c));
926926
}
927927
goto skip_escape_conversion;
928928
}
@@ -2130,7 +2130,7 @@ string:
21302130

21312131
<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM}|{ONUM} { /* Offset must be treated as a string */
21322132
if (yyleng == 1) {
2133-
ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext)));
2133+
ZVAL_INTERNED_STR(zendlval, ZSTR_LITERAL_CHAR((zend_uchar)*(yytext)));
21342134
} else {
21352135
ZVAL_STRINGL(zendlval, yytext, yyleng);
21362136
}
@@ -2285,7 +2285,7 @@ inline_char_handler:
22852285
yyless(readsize);
22862286
}
22872287
} else if (yyleng == 1) {
2288-
ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*yytext));
2288+
ZVAL_INTERNED_STR(zendlval, ZSTR_LITERAL_CHAR((zend_uchar)*yytext));
22892289
} else {
22902290
ZVAL_STRINGL(zendlval, yytext, yyleng);
22912291
}
@@ -2460,13 +2460,13 @@ inline_char_handler:
24602460

24612461
if (yyleng-bprefix-2 <= 1) {
24622462
if (yyleng-bprefix-2 < 1) {
2463-
ZVAL_EMPTY_STRING(zendlval);
2463+
ZVAL_INTERNED_STR(zendlval, ZSTR_EMPTY_LITERAL_ALLOC());
24642464
} else {
24652465
zend_uchar c = (zend_uchar)*(yytext+bprefix+1);
24662466
if (c == '\n' || c == '\r') {
24672467
CG(zend_lineno)++;
24682468
}
2469-
ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c));
2469+
ZVAL_INTERNED_STR(zendlval, ZSTR_LITERAL_CHAR(c));
24702470
}
24712471
goto skip_escape_conversion;
24722472
}

Zend/zend_string.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ static zend_new_interned_string_func_t interned_string_request_handler = zend_ne
4141
static zend_string_init_interned_func_t interned_string_init_request_handler = zend_string_init_interned_request;
4242

4343
ZEND_API zend_string *zend_empty_string = NULL;
44+
ZEND_API zend_string *zend_empty_literal = NULL;
4445
ZEND_API zend_string *zend_one_char_string[256];
46+
ZEND_API zend_string *zend_one_char_literal[256];
4547
ZEND_API zend_string **zend_known_strings = NULL;
4648

4749
ZEND_API zend_ulong ZEND_FASTCALL zend_string_hash_func(zend_string *str)
@@ -109,6 +111,39 @@ ZEND_API void zend_interned_strings_init(void)
109111
str = zend_string_init(known_strings[i], strlen(known_strings[i]), 1);
110112
zend_known_strings[i] = zend_new_interned_string_permanent(str);
111113
}
114+
115+
/* literal chars */
116+
for (i = 0; i < 256; i++) {
117+
zend_string *string = pemalloc(_ZSTR_STRUCT_SIZE(1), 1);
118+
119+
if (!string) {
120+
break;
121+
}
122+
123+
ZSTR_LEN(string) = 1;
124+
ZSTR_VAL(string)[0] = i;
125+
ZSTR_VAL(string)[1] = 0;
126+
GC_SET_REFCOUNT(string, 2);
127+
GC_TYPE_INFO(string) =
128+
GC_STRING | IS_STR_LITERAL | (IS_STR_INTERNED|IS_STR_PERMANENT) << GC_FLAGS_SHIFT;
129+
zend_string_hash_func(string);
130+
zend_one_char_literal[i] = string;
131+
}
132+
133+
/* literal empty */
134+
zend_string *empty = pemalloc(_ZSTR_STRUCT_SIZE(0), 1);
135+
136+
if (!empty) {
137+
return;
138+
}
139+
140+
ZSTR_VAL(empty)[0] = 0;
141+
ZSTR_LEN(empty) = 0;
142+
GC_SET_REFCOUNT(empty, 2);
143+
GC_TYPE_INFO(empty) =
144+
GC_STRING | IS_STR_LITERAL | (IS_STR_INTERNED|IS_STR_PERMANENT) << GC_FLAGS_SHIFT;
145+
zend_string_hash_func(empty);
146+
zend_empty_literal = empty;
112147
}
113148

114149
ZEND_API void zend_interned_strings_dtor(void)

Zend/zend_string.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ ZEND_API extern zend_string *zend_empty_string;
5353
ZEND_API extern zend_string *zend_one_char_string[256];
5454
ZEND_API extern zend_string **zend_known_strings;
5555

56+
ZEND_API extern zend_string *zend_one_char_literal[256];
57+
ZEND_API extern zend_string *zend_empty_literal;
58+
5659
END_EXTERN_C()
5760

5861
/* Shortcuts */
@@ -76,7 +79,9 @@ END_EXTERN_C()
7679
#define ZSTR_IS_INTERNED(s) (GC_FLAGS(s) & IS_STR_INTERNED)
7780

7881
#define ZSTR_EMPTY_ALLOC() zend_empty_string
82+
#define ZSTR_EMPTY_LITERAL_ALLOC() zend_empty_literal
7983
#define ZSTR_CHAR(c) zend_one_char_string[c]
84+
#define ZSTR_LITERAL_CHAR(c) zend_one_char_literal[c]
8085
#define ZSTR_KNOWN(idx) zend_known_strings[idx]
8186

8287
#define _ZSTR_HEADER_SIZE XtOffsetOf(zend_string, val)
@@ -331,6 +336,7 @@ static zend_always_inline void zend_string_release_ex(zend_string *s, bool persi
331336
}
332337

333338
#define ZSTR_IS_LITERAL(s) (GC_TYPE_INFO(s) & IS_STR_LITERAL)
339+
#define ZSTR_IS_LITERAL_CHAR(s) (ZSTR_IS_LITERAL(s) && (ZSTR_LEN(s) <= 1))
334340

335341
static zend_always_inline zend_string* zend_string_set_literal(zend_string *s) {
336342
if (UNEXPECTED(ZSTR_IS_LITERAL(s))) {

ext/opcache/ZendAccelerator.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -427,12 +427,12 @@ static void accel_interned_strings_save_state(void)
427427
}
428428

429429
static zend_always_inline zend_string *accel_find_interned_string(zend_string *str)
430-
{
430+
{
431431
zend_ulong h;
432432
uint32_t pos;
433433
zend_string *s;
434434

435-
if (IS_ACCEL_INTERNED(str)) {
435+
if (IS_ACCEL_INTERNED(str) || ZSTR_IS_LITERAL_CHAR(str)) {
436436
/* this is already an interned string */
437437
return str;
438438
}
@@ -471,7 +471,7 @@ zend_string* ZEND_FASTCALL accel_new_interned_string(zend_string *str)
471471
return str;
472472
}
473473

474-
if (IS_ACCEL_INTERNED(str)) {
474+
if (IS_ACCEL_INTERNED(str) || ZSTR_IS_LITERAL_CHAR(str)) {
475475
/* this is already an interned string */
476476
return str;
477477
}
@@ -1368,7 +1368,7 @@ static zend_string* accel_new_interned_key(zend_string *key)
13681368
{
13691369
zend_string *new_key;
13701370

1371-
if (zend_accel_in_shm(key)) {
1371+
if (zend_accel_in_shm(key) || ZSTR_IS_LITERAL_CHAR(key)) {
13721372
return key;
13731373
}
13741374
GC_ADDREF(key);
@@ -2414,7 +2414,7 @@ static zend_class_entry* zend_accel_inheritance_cache_add(zend_class_entry *ce,
24142414
entry->dependencies = (zend_class_dependency*)ZCG(mem);
24152415
ZEND_HASH_FOREACH_STR_KEY_PTR(dependencies, dep_name, dep_ce) {
24162416
#if ZEND_DEBUG
2417-
ZEND_ASSERT(zend_accel_in_shm(dep_name));
2417+
ZEND_ASSERT(zend_accel_in_shm(dep_name) || ZSTR_IS_LITERAL_CHAR(dep_name));
24182418
#endif
24192419
entry->dependencies[i].name = dep_name;
24202420
entry->dependencies[i].ce = dep_ce;

ext/opcache/jit/zend_jit_helpers.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1188,7 +1188,7 @@ static void ZEND_FASTCALL zend_jit_fast_assign_concat_helper(zval *op1, zval *op
11881188
zend_throw_error(NULL, "String size overflow");
11891189
return;
11901190
}
1191-
1191+
11921192
if (UNEXPECTED(ZSTR_IS_LITERAL(Z_STR_P(op1)) && ZSTR_IS_LITERAL(Z_STR_P(op2)))) {
11931193
literal = true;
11941194
}

ext/opcache/zend_file_cache.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ static int zend_file_cache_flock(int fd, int type)
135135
} while (0)
136136
#define SERIALIZE_STR(ptr) do { \
137137
if (ptr) { \
138-
if (IS_ACCEL_INTERNED(ptr)) { \
138+
if (IS_ACCEL_INTERNED(ptr) || ZSTR_IS_LITERAL_CHAR(ptr)) { \
139139
(ptr) = zend_file_cache_serialize_interned((zend_string*)(ptr), info); \
140140
} else { \
141141
ZEND_ASSERT(IS_UNSERIALIZED(ptr)); \

ext/opcache/zend_persist.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969
} \
7070
} while (0)
7171
#define zend_accel_store_interned_string(str) do { \
72-
if (!IS_ACCEL_INTERNED(str)) { \
72+
if (!IS_ACCEL_INTERNED(str) && !ZSTR_IS_LITERAL_CHAR(str)) { \
7373
zend_accel_store_string(str); \
7474
} \
7575
} while (0)

ext/opcache/zend_persist_calc.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
# define ADD_STRING(str) ADD_DUP_SIZE((str), _ZSTR_STRUCT_SIZE(ZSTR_LEN(str)))
3434

3535
# define ADD_INTERNED_STRING(str) do { \
36-
if (ZCG(current_persistent_script)->corrupted) { \
36+
if (ZSTR_IS_LITERAL_CHAR(str)) { \
37+
break; \
38+
} else if (ZCG(current_persistent_script)->corrupted) { \
3739
ADD_STRING(str); \
3840
} else if (!IS_ACCEL_INTERNED(str)) { \
3941
zend_string *tmp = accel_new_interned_string(str); \
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
--TEST--
2+
Literal Strings
3+
--ENV--
4+
TAINTED=strings
5+
--FILE--
6+
<?php
7+
var_dump(
8+
is_literal("strings"),
9+
is_literal($_ENV["TAINTED"]));
10+
?>
11+
--EXPECT--
12+
bool(true)
13+
bool(false)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
--TEST--
2+
Literal Variables Compile Time Concat
3+
--FILE--
4+
<?php
5+
var_dump(
6+
"literal" . "string",
7+
is_literal("literal" . "string"),
8+
24 . "" . 42,
9+
is_literal(24 . "" . 42));
10+
?>
11+
--EXPECT--
12+
string(13) "literalstring"
13+
bool(true)
14+
string(4) "2442"
15+
bool(false)
16+
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
--TEST--
2+
Literal Variables VM Concat
3+
--ENV--
4+
TAINTED=strings
5+
--FILE--
6+
<?php
7+
$literal = "literal";
8+
$string = "strings";
9+
10+
$twoFour = 24;
11+
$fourTwo = 42;
12+
13+
var_dump(
14+
$literal . " " . $string,
15+
is_literal($literal . $string),
16+
$twoFour . "" . $fourTwo,
17+
is_literal($twoFour . "" . $fourTwo),
18+
"not " . "literal " . $_ENV["TAINTED"],
19+
is_literal("not " . "literal " . $_ENV["TAINTED"]),
20+
$literal . $string,
21+
is_literal($literal . $string),
22+
'literal' . 'strings',
23+
is_literal('literal' . 'strings'),
24+
);
25+
26+
$literal .= $string;
27+
28+
var_dump(
29+
$literal,
30+
is_literal($literal));
31+
32+
$literal .= $_ENV["TAINTED"];
33+
34+
var_dump(
35+
$literal,
36+
is_literal($literal));
37+
?>
38+
--EXPECT--
39+
string(15) "literal strings"
40+
bool(true)
41+
string(4) "2442"
42+
bool(false)
43+
string(19) "not literal strings"
44+
bool(false)
45+
string(14) "literalstrings"
46+
bool(true)
47+
string(14) "literalstrings"
48+
bool(true)
49+
string(14) "literalstrings"
50+
bool(true)
51+
string(21) "literalstringsstrings"
52+
bool(false)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
--TEST--
2+
Literal Variables VM Rope
3+
--ENV--
4+
TAINTED=strings
5+
--FILE--
6+
<?php
7+
$literal = "literal";
8+
$string = "strings";
9+
10+
$twoFour = 24;
11+
$fourTwo = 42;
12+
13+
var_dump(
14+
"{$literal}{$string}",
15+
is_literal("{$literal}{$string}"),
16+
"{$twoFour}{$fourTwo}",
17+
is_literal("{$twoFour}{$fourTwo}"),
18+
"not {$literal} {$_ENV["TAINTED"]}",
19+
is_literal("not {$literal} {$_ENV["TAINTED"]}")
20+
);
21+
?>
22+
--EXPECT--
23+
string(14) "literalstrings"
24+
bool(true)
25+
string(4) "2442"
26+
bool(false)
27+
string(19) "not literal strings"
28+
bool(false)

0 commit comments

Comments
 (0)