@@ -279,6 +279,7 @@ const char *ir_reg_name(int8_t reg, ir_type type)
279
279
_(MUL_PWR2) \
280
280
_(DIV_PWR2) \
281
281
_(MOD_PWR2) \
282
+ _(SDIV_PWR2) \
282
283
_(OP_INT) \
283
284
_(OP_FP) \
284
285
_(BINOP_INT) \
@@ -320,7 +321,7 @@ const char *ir_rule_name[IR_LAST_OP] = {
320
321
};
321
322
322
323
/* register allocation */
323
- int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints)
324
+ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints)
324
325
{
325
326
uint32_t rule = ir_rule(ctx, ref);
326
327
const ir_insn *insn;
@@ -415,6 +416,22 @@ int ir_get_target_constraints(const ir_ctx *ctx, ir_ref ref, ir_target_constrain
415
416
n++;
416
417
}
417
418
break;
419
+ case IR_SDIV_PWR2:
420
+ flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG;
421
+ insn = &ctx->ir_base[ref];
422
+ n = 0;
423
+ if (IR_IS_CONST_REF(insn->op1)) {
424
+ constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
425
+ n++;
426
+ }
427
+ if (IR_IS_CONST_REF(insn->op2)) {
428
+ int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1;
429
+ if (!aarch64_may_encode_imm12(offset)) {
430
+ constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
431
+ n++;
432
+ }
433
+ }
434
+ break;
418
435
case IR_CTPOP:
419
436
flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG;
420
437
insn = &ctx->ir_base[ref];
@@ -713,9 +730,12 @@ binop_fp:
713
730
// const
714
731
} else if (op2_insn->val.u64 == 1) {
715
732
return IR_COPY_INT;
716
- } else if (IR_IS_TYPE_UNSIGNED(insn->type) && IR_IS_POWER_OF_TWO(op2_insn->val.u64)) {
717
- // TODO: signed division by power of two ???
718
- return IR_DIV_PWR2;
733
+ } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) {
734
+ if (IR_IS_TYPE_UNSIGNED(insn->type)) {
735
+ return IR_DIV_PWR2;
736
+ } else {
737
+ return IR_SDIV_PWR2;
738
+ }
719
739
}
720
740
}
721
741
return IR_BINOP_INT;
@@ -1298,10 +1318,7 @@ static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src)
1298
1318
ir_insn *insn = &ctx->ir_base[src];
1299
1319
1300
1320
if (insn->op == IR_SYM || insn->op == IR_FUNC) {
1301
- const char *name = ir_get_str(ctx, insn->val.name);
1302
- void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
1303
- ctx->loader->resolve_sym_name(ctx->loader, name, insn->op == IR_FUNC) :
1304
- ir_resolve_sym_name(name);
1321
+ void *addr = ir_sym_val(ctx, insn);
1305
1322
IR_ASSERT(addr);
1306
1323
ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr);
1307
1324
} else if (insn->op == IR_STR) {
@@ -2050,19 +2067,75 @@ static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2050
2067
if (insn->op == IR_MUL) {
2051
2068
uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
2052
2069
if (shift == 1) {
2053
- | ASM_REG_REG_REG_OP add, insn-> type, def_reg, op1_reg, op1_reg
2070
+ | ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op1_reg
2054
2071
} else {
2055
- | ASM_REG_REG_IMM_OP lsl, insn-> type, def_reg, op1_reg, shift
2072
+ | ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift
2056
2073
}
2057
2074
} else if (insn->op == IR_DIV) {
2058
2075
uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
2059
- IR_ASSERT(IR_IS_TYPE_UNSIGNED(insn-> type));
2060
- | ASM_REG_REG_IMM_OP lsr, insn-> type, def_reg, op1_reg, shift
2076
+ IR_ASSERT(IR_IS_TYPE_UNSIGNED(type));
2077
+ | ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift
2061
2078
} else {
2062
2079
IR_ASSERT(insn->op == IR_MOD);
2063
- IR_ASSERT(IR_IS_TYPE_UNSIGNED(insn-> type));
2080
+ IR_ASSERT(IR_IS_TYPE_UNSIGNED(type));
2064
2081
uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1;
2065
- | ASM_REG_REG_IMM_OP and, insn->type, def_reg, op1_reg, mask
2082
+ | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, mask
2083
+ }
2084
+ if (IR_REG_SPILLED(ctx->regs[def][0])) {
2085
+ ir_emit_store(ctx, type, def, def_reg);
2086
+ }
2087
+ }
2088
+
2089
+ static void ir_emit_sdiv_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2090
+ {
2091
+ ir_backend_data *data = ctx->data;
2092
+ dasm_State **Dst = &data->dasm_state;
2093
+ ir_type type = insn->type;
2094
+ ir_ref op1 = insn->op1;
2095
+ ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2096
+ ir_reg op1_reg = ctx->regs[def][1];
2097
+ ir_reg op2_reg = ctx->regs[def][2];
2098
+ uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
2099
+ int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1;
2100
+
2101
+ IR_ASSERT(IR_IS_CONST_REF(insn->op2));
2102
+ IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op));
2103
+ IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && def_reg != op1_reg);
2104
+
2105
+ if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) {
2106
+ op1_reg = IR_REG_NUM(op1_reg);
2107
+ ir_emit_load(ctx, type, op1_reg, op1);
2108
+ }
2109
+
2110
+ if (op2_reg != IR_REG_NONE) {
2111
+ ir_emit_load_imm_int(ctx, type, op2_reg, offset);
2112
+ }
2113
+
2114
+ if (ir_type_size[type] == 8) {
2115
+ | cmp Rx(op1_reg), #0
2116
+ if (op2_reg != IR_REG_NONE) {
2117
+ | add Rx(def_reg), Rx(op1_reg), Rx(op2_reg)
2118
+ } else {
2119
+ | add Rx(def_reg), Rx(op1_reg), #offset
2120
+ }
2121
+ | csel Rx(def_reg), Rx(def_reg), Rx(op1_reg), lt
2122
+ | asr Rx(def_reg), Rx(def_reg), #shift
2123
+ } else {
2124
+ | cmp Rw(op1_reg), #0
2125
+ if (op2_reg != IR_REG_NONE) {
2126
+ | add Rw(def_reg), Rw(op1_reg), Rw(op2_reg)
2127
+ } else {
2128
+ | add Rw(def_reg), Rw(op1_reg), #offset
2129
+ }
2130
+ | csel Rw(def_reg), Rw(def_reg), Rw(op1_reg), lt
2131
+ if (ir_type_size[type] == 4) {
2132
+ | asr Rw(def_reg), Rw(def_reg), #shift
2133
+ } else if (ir_type_size[type] == 2) {
2134
+ | ubfx Rw(def_reg), Rw(def_reg), #shift, #16
2135
+ } else {
2136
+ IR_ASSERT(ir_type_size[type] == 1);
2137
+ | ubfx Rw(def_reg), Rw(def_reg), #shift, #8
2138
+ }
2066
2139
}
2067
2140
if (IR_REG_SPILLED(ctx->regs[def][0])) {
2068
2141
ir_emit_store(ctx, type, def, def_reg);
@@ -5549,6 +5622,9 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
5549
5622
case IR_MOD_PWR2:
5550
5623
ir_emit_mul_div_mod_pwr2(ctx, i, insn);
5551
5624
break;
5625
+ case IR_SDIV_PWR2:
5626
+ ir_emit_sdiv_pwr2(ctx, i, insn);
5627
+ break;
5552
5628
case IR_SHIFT:
5553
5629
ir_emit_shift(ctx, i, insn);
5554
5630
break;
0 commit comments