Skip to content

Commit bb21d19

Browse files
committed
Update IR
IR commit: fe4ba285bc576d83bea4a8099fb7315b8bc8c7fb
1 parent 3fcf6ff commit bb21d19

File tree

8 files changed

+156
-65
lines changed

8 files changed

+156
-65
lines changed

ext/opcache/jit/ir/ir.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ void ir_strtab_free(ir_strtab *strtab);
541541
# define IR_DEBUG_GCM_SPLIT (1<<28)
542542
# define IR_DEBUG_SCHEDULE (1<<29)
543543
# define IR_DEBUG_RA (1<<30)
544+
# define IR_DEBUG_BB_SCHEDULE (1U<<31)
544545
#endif
545546

546547
typedef struct _ir_ctx ir_ctx;

ext/opcache/jit/ir/ir_aarch64.dasc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,7 +1010,7 @@ binop_fp:
10101010
return IR_RETURN_FP;
10111011
}
10121012
case IR_IF:
1013-
if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
1013+
if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
10141014
op2_insn = &ctx->ir_base[insn->op2];
10151015
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) {
10161016
if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
@@ -1020,7 +1020,7 @@ binop_fp:
10201020
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
10211021
return IR_CMP_AND_BRANCH_FP;
10221022
}
1023-
} else if (op2_insn->op == IR_OVERFLOW) {
1023+
} else if (op2_insn->op == IR_OVERFLOW && ir_in_same_block(ctx, insn->op2)) {
10241024
ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW;
10251025
return IR_OVERFLOW_AND_BRANCH;
10261026
}
@@ -1033,7 +1033,7 @@ binop_fp:
10331033
}
10341034
case IR_GUARD:
10351035
case IR_GUARD_NOT:
1036-
if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
1036+
if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) {
10371037
op2_insn = &ctx->ir_base[insn->op2];
10381038
if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT
10391039
// TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
@@ -1047,7 +1047,7 @@ binop_fp:
10471047
ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
10481048
return IR_GUARD_CMP_FP;
10491049
}
1050-
} else if (op2_insn->op == IR_OVERFLOW) {
1050+
} else if (op2_insn->op == IR_OVERFLOW && ir_in_same_block(ctx, insn->op2)) {
10511051
ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW;
10521052
return IR_GUARD_OVERFLOW;
10531053
}

ext/opcache/jit/ir/ir_cfg.c

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,13 +1151,11 @@ static void ir_insert_chain_before(ir_chain *chains, uint32_t c, uint32_t before
11511151
}
11521152

11531153
#ifndef IR_DEBUG_BB_SCHEDULE_GRAPH
1154-
# define IR_DEBUG_BB_SCHEDULE_GRAPH 0
1155-
#endif
1156-
#ifndef IR_DEBUG_BB_SCHEDULE_EDGES
1157-
# define IR_DEBUG_BB_SCHEDULE_EDGES 0
1158-
#endif
1159-
#ifndef IR_DEBUG_BB_SCHEDULE_CHAINS
1160-
# define IR_DEBUG_BB_SCHEDULE_CHAINS 0
1154+
# ifdef IR_DEBUG
1155+
# define IR_DEBUG_BB_SCHEDULE_GRAPH 1
1156+
# else
1157+
# define IR_DEBUG_BB_SCHEDULE_GRAPH 0
1158+
# endif
11611159
#endif
11621160

11631161
#if IR_DEBUG_BB_SCHEDULE_GRAPH
@@ -1210,20 +1208,17 @@ static void ir_dump_cfg_freq_graph(ir_ctx *ctx, float *bb_freq, uint32_t edges_c
12101208
}
12111209
#endif
12121210

1213-
#if IR_DEBUG_BB_SCHEDULE_EDGES
1211+
#ifdef IR_DEBUG
12141212
static void ir_dump_edges(ir_ctx *ctx, uint32_t edges_count, ir_edge_info *edges)
12151213
{
12161214
uint32_t i;
12171215

12181216
fprintf(stderr, "Edges:\n");
12191217
for (i = 0; i < edges_count; i++) {
1220-
fprintf(stderr, "\tBB%d -> BB%d [label=\"%0.3f\"]\n", edges[i].from, edges[i].to, edges[i].freq);
1218+
fprintf(stderr, "\tBB%d -> BB%d %0.3f\n", edges[i].from, edges[i].to, edges[i].freq);
12211219
}
1222-
fprintf(stderr, "}\n");
12231220
}
1224-
#endif
12251221

1226-
#if IR_DEBUG_BB_SCHEDULE_CHAINS
12271222
static void ir_dump_chains(ir_ctx *ctx, ir_chain *chains)
12281223
{
12291224
uint32_t b, tail, i;
@@ -1507,8 +1502,10 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
15071502
/* 2. Sort EDGEs according to their frequencies */
15081503
qsort(edges, edges_count, sizeof(ir_edge_info), ir_edge_info_cmp);
15091504

1510-
#if IR_DEBUG_BB_SCHEDULE_EDGES
1511-
ir_dump_edges(ctx, edges_count, edges);
1505+
#ifdef IR_DEBUG
1506+
if (ctx->flags & IR_DEBUG_BB_SCHEDULE) {
1507+
ir_dump_edges(ctx, edges_count, edges);
1508+
}
15121509
#endif
15131510

15141511
/* 3. Process EDGEs in the decreasing frequency order and join the connected chains */
@@ -1555,13 +1552,17 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
15551552
}
15561553

15571554
#if IR_DEBUG_BB_SCHEDULE_GRAPH
1558-
ir_dump_cfg_freq_graph(ctx, bb_freq, edges_count, edges, chains);
1555+
if (ctx->flags & IR_DEBUG_BB_SCHEDULE) {
1556+
ir_dump_cfg_freq_graph(ctx, bb_freq, edges_count, edges, chains);
1557+
}
15591558
#endif
15601559

15611560
ir_mem_free(bb_freq);
15621561

1563-
#if IR_DEBUG_BB_SCHEDULE_CHAINS
1564-
ir_dump_chains(ctx, chains);
1562+
#ifdef IR_DEBUG
1563+
if (ctx->flags & IR_DEBUG_BB_SCHEDULE) {
1564+
ir_dump_chains(ctx, chains);
1565+
}
15651566
#endif
15661567

15671568
/* 4. Merge empty entry blocks */
@@ -1585,8 +1586,10 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
15851586
}
15861587
}
15871588

1588-
#if IR_DEBUG_BB_SCHEDULE_CHAINS
1589-
ir_dump_chains(ctx, chains);
1589+
#ifdef IR_DEBUG
1590+
if (ctx->flags & IR_DEBUG_BB_SCHEDULE) {
1591+
ir_dump_chains(ctx, chains);
1592+
}
15901593
#endif
15911594
}
15921595

@@ -1619,8 +1622,10 @@ static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
16191622
}
16201623
}
16211624

1622-
#if IR_DEBUG_BB_SCHEDULE_CHAINS
1623-
ir_dump_chains(ctx, chains);
1625+
#ifdef IR_DEBUG
1626+
if (ctx->flags & IR_DEBUG_BB_SCHEDULE) {
1627+
ir_dump_chains(ctx, chains);
1628+
}
16241629
#endif
16251630

16261631
/* 7. Form a final BB order */

ext/opcache/jit/ir/ir_gcm.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca)
8484
return lca;
8585
}
8686

87+
#if 0 /* This is not necessary anymore. Conditions may be fused with IF across BBs. */
8788
if (ctx->ir_base[ref].op >= IR_EQ && ctx->ir_base[ref].op <= IR_UGT) {
8889
ir_use_list *use_list = &ctx->use_lists[ref];
8990

@@ -96,6 +97,7 @@ static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca)
9697
}
9798
}
9899
}
100+
#endif
99101

100102
flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
101103
if ((flags & IR_BB_LOOP_WITH_ENTRY)
@@ -487,9 +489,19 @@ static void ir_gcm_schedule_late(ir_ctx *ctx, ir_ref ref, uint32_t b)
487489
b = ir_gcm_select_best_block(ctx, ref, lca);
488490

489491
ctx->cfg_map[ref] = b;
490-
if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) {
491-
/* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */
492-
ctx->cfg_map[ref + 1] = b;
492+
493+
/* OVERFLOW is a projection of ADD/SUB/MUL_OV and must be scheduled into the same block */
494+
if (ctx->ir_base[ref].op >= IR_ADD_OV && ctx->ir_base[ref].op <= IR_MUL_OV) {
495+
ir_use_list *use_list = &ctx->use_lists[ref];
496+
ir_ref n, *p, use;
497+
498+
for (n = use_list->count, p = &ctx->use_edges[use_list->refs]; n < 0; p++, n--) {
499+
use = *p;
500+
if (ctx->ir_base[use].op == IR_OVERFLOW) {
501+
ctx->cfg_map[use] = b;
502+
break;
503+
}
504+
}
493505
}
494506
}
495507
}

ext/opcache/jit/ir/ir_private.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,17 @@ IR_ALWAYS_INLINE void ir_bitqueue_init(ir_bitqueue *q, uint32_t n)
582582
q->set = ir_bitset_malloc(n);
583583
}
584584

585+
IR_ALWAYS_INLINE void ir_bitqueue_grow(ir_bitqueue *q, uint32_t n)
586+
{
587+
uint32_t len = ir_bitset_len(n);
588+
IR_ASSERT(len >= q->len);
589+
if (len > q->len) {
590+
q->set = ir_mem_realloc(q->set, len * (IR_BITSET_BITS / 8));
591+
memset(q->set + q->len, 0, (len - q->len) * (IR_BITSET_BITS / 8));
592+
q->len = len;
593+
}
594+
}
595+
585596
IR_ALWAYS_INLINE void ir_bitqueue_free(ir_bitqueue *q)
586597
{
587598
ir_mem_free(q->set);

ext/opcache/jit/ir/ir_ra.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2115,7 +2115,7 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
21152115
ir_insn *insn;
21162116
uint32_t len;
21172117
ir_bitset todo, ready;
2118-
bool have_constants = 0;
2118+
bool have_constants_or_addresses = 0;
21192119

21202120
bb = &ctx->cfg_blocks[b];
21212121
if (!(bb->flags & IR_BB_DESSA_MOVES)) {
@@ -2141,8 +2141,8 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
21412141
insn = &ctx->ir_base[ref];
21422142
if (insn->op == IR_PHI) {
21432143
input = ir_insn_op(insn, k);
2144-
if (IR_IS_CONST_REF(input)) {
2145-
have_constants = 1;
2144+
if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) {
2145+
have_constants_or_addresses = 1;
21462146
} else if (ctx->vregs[input] != ctx->vregs[ref]) {
21472147
s = ctx->vregs[input];
21482148
d = ctx->vregs[ref];
@@ -2204,13 +2204,13 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
22042204
ir_mem_free(todo);
22052205
ir_mem_free(loc);
22062206

2207-
if (have_constants) {
2207+
if (have_constants_or_addresses) {
22082208
for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
22092209
ref = *p;
22102210
insn = &ctx->ir_base[ref];
22112211
if (insn->op == IR_PHI) {
22122212
input = ir_insn_op(insn, k);
2213-
if (IR_IS_CONST_REF(input)) {
2213+
if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) {
22142214
emit_copy(ctx, insn->type, input, ref);
22152215
}
22162216
}

ext/opcache/jit/ir/ir_sccp.c

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,8 @@ static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bi
347347
for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
348348
ir_ref input = *p;
349349
*p = IR_UNUSED;
350-
if (input > 0 && _values[input].op == IR_BOTTOM) {
350+
/* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
351+
if (input > 0 && _values[input].op > IR_COPY) {
351352
ir_use_list_remove_all(ctx, input, ref);
352353
if (ir_is_dead(ctx, input)) {
353354
/* schedule DCE */
@@ -396,13 +397,12 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r
396397
for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
397398
ir_ref input = *p;
398399
*p = IR_UNUSED;
399-
if (input > 0) {
400+
/* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
401+
if (input > 0 && _values[input].op > IR_COPY) {
400402
ir_use_list_remove_all(ctx, input, ref);
401-
if (_values[input].op == IR_BOTTOM) {
402-
if (ir_is_dead(ctx, input)) {
403-
/* schedule DCE */
404-
ir_bitqueue_add(worklist, input);
405-
}
403+
if (ir_is_dead(ctx, input)) {
404+
/* schedule DCE */
405+
ir_bitqueue_add(worklist, input);
406406
}
407407
}
408408
}
@@ -429,8 +429,9 @@ static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_r
429429
}
430430
}
431431
#endif
432-
/* schedule folding */
433-
if (worklist && _values[use].op == IR_BOTTOM) {
432+
/* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
433+
if (worklist && _values[use].op > IR_COPY) {
434+
/* schedule folding */
434435
ir_bitqueue_add(worklist, use);
435436
}
436437
}
@@ -1067,7 +1068,7 @@ static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type typ
10671068
return ir_const(ctx, new_val, type);
10681069
}
10691070

1070-
static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type)
1071+
static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type, ir_bitqueue *worklist)
10711072
{
10721073
uint32_t optx = IR_OPTX(op, type, 1);
10731074
ir_ref ref;
@@ -1079,6 +1080,7 @@ static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op,
10791080
if (!IR_IS_CONST_REF(src_ref)) {
10801081
ir_use_list_remove_one(ctx, src_ref, var_ref);
10811082
}
1083+
ir_bitqueue_add(worklist, ref);
10821084
return ref;
10831085
}
10841086
}
@@ -1091,6 +1093,8 @@ static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op,
10911093
if (!IR_IS_CONST_REF(src_ref)) {
10921094
ir_use_list_replace_one(ctx, src_ref, var_ref, ref);
10931095
}
1096+
ir_bitqueue_grow(worklist, ref + 1);
1097+
ir_bitqueue_add(worklist, ref);
10941098
return ref;
10951099
}
10961100

@@ -1162,17 +1166,15 @@ static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bi
11621166
&& !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
11631167
ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
11641168
} else {
1165-
ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type);
1166-
ir_bitqueue_add(worklist, ctx->ir_base[use].op1);
1169+
ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
11671170
}
11681171
}
11691172
if (use_insn->op2 != ref) {
11701173
if (IR_IS_CONST_REF(use_insn->op2)
11711174
&& !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
11721175
ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
11731176
} else {
1174-
ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type);
1175-
ir_bitqueue_add(worklist, ctx->ir_base[use].op2);
1177+
ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
11761178
}
11771179
}
11781180
}
@@ -1185,8 +1187,7 @@ static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bi
11851187
&& !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) {
11861188
ctx->ir_base[ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type);
11871189
} else {
1188-
ctx->ir_base[ref].op2 = ir_ext_ref(ctx, ref, phi_insn->op2, op, type);
1189-
ir_bitqueue_add(worklist, ctx->ir_base[ref].op2);
1190+
ctx->ir_base[ref].op2 = ir_ext_ref(ctx, ref, phi_insn->op2, op, type, worklist);
11901191
}
11911192

11921193
return 1;

0 commit comments

Comments
 (0)