10#include "yjit_codegen.h"
14static uint8_t *code_for_exit_from_stub = NULL;
20ctx_sp_opnd(
ctx_t *ctx, int32_t offset_bytes)
22 int32_t offset = (ctx->sp_offset *
sizeof(
VALUE)) + offset_bytes;
23 return mem_opnd(64, REG_SP, offset);
34 if (rb_yjit_opts.no_type_prop) {
35 mapping.type = TYPE_UNKNOWN;
39 if (ctx->stack_size < MAX_TEMP_TYPES) {
40 ctx->temp_mapping[ctx->stack_size] = mapping.mapping;
41 ctx->temp_types[ctx->stack_size] = mapping.type;
43 RUBY_ASSERT(mapping.mapping.kind != TEMP_LOCAL || mapping.mapping.idx < MAX_LOCAL_TYPES);
44 RUBY_ASSERT(mapping.mapping.kind != TEMP_STACK || mapping.mapping.idx == 0);
45 RUBY_ASSERT(mapping.mapping.kind != TEMP_SELF || mapping.mapping.idx == 0);
52 int32_t offset = (ctx->sp_offset - 1) *
sizeof(VALUE);
53 return mem_opnd(64, REG_SP, offset);
65 return ctx_stack_push_mapping(ctx, mapping);
72ctx_stack_push_self(
ctx_t *ctx)
75 return ctx_stack_push_mapping(ctx, mapping);
82ctx_stack_push_local(
ctx_t *ctx,
size_t local_idx)
84 if (local_idx >= MAX_LOCAL_TYPES) {
85 return ctx_stack_push(ctx, TYPE_UNKNOWN);
93 return ctx_stack_push_mapping(ctx, mapping);
101ctx_stack_pop(
ctx_t *ctx,
size_t n)
106 int32_t offset = (ctx->sp_offset - 1) *
sizeof(VALUE);
107 x86opnd_t top = mem_opnd(64, REG_SP, offset);
110 for (
size_t i = 0; i < n; ++i)
112 size_t idx = ctx->stack_size - i - 1;
113 if (idx < MAX_TEMP_TYPES) {
114 ctx->temp_types[idx] = TYPE_UNKNOWN;
115 ctx->temp_mapping[idx] = MAP_STACK;
119 ctx->stack_size -= n;
129ctx_stack_opnd(
ctx_t *ctx, int32_t idx)
132 int32_t offset = (ctx->sp_offset - 1 - idx) *
sizeof(VALUE);
133 x86opnd_t opnd = mem_opnd(64, REG_SP, offset);
145 return ctx->self_type;
148 int stack_idx = ctx->stack_size - 1 - opnd.idx;
151 if (stack_idx >= MAX_TEMP_TYPES)
156 switch (mapping.kind) {
158 return ctx->self_type;
161 return ctx->temp_types[ctx->stack_size - 1 - opnd.idx];
165 return ctx->local_types[mapping.idx];
173#define UPGRADE_TYPE(dest, src) do { \
174 RUBY_ASSERT(type_diff((src), (dest)) != INT_MAX); \
188 if (rb_yjit_opts.no_type_prop)
192 UPGRADE_TYPE(ctx->self_type,
type);
197 int stack_idx = ctx->stack_size - 1 - opnd.idx;
200 if (stack_idx >= MAX_TEMP_TYPES)
205 switch (mapping.kind) {
207 UPGRADE_TYPE(ctx->self_type,
type);
211 UPGRADE_TYPE(ctx->temp_types[stack_idx],
type);
216 UPGRADE_TYPE(ctx->local_types[mapping.idx],
type);
230 type_mapping.type = ctx_get_opnd_type(ctx, opnd);
233 type_mapping.mapping = MAP_SELF;
238 int stack_idx = ctx->stack_size - 1 - opnd.idx;
240 if (stack_idx < MAX_TEMP_TYPES) {
241 type_mapping.mapping = ctx->temp_mapping[stack_idx];
246 RUBY_ASSERT(type_mapping.type.type == ETYPE_UNKNOWN);
247 type_mapping.mapping = MAP_STACK;
263 int stack_idx = ctx->stack_size - 1 - opnd.idx;
266 if (rb_yjit_opts.no_type_prop)
270 if (stack_idx >= MAX_TEMP_TYPES)
273 ctx->temp_mapping[stack_idx] = type_mapping.mapping;
276 ctx->temp_types[stack_idx] = type_mapping.type;
286 if (rb_yjit_opts.no_type_prop)
289 if (idx >= MAX_LOCAL_TYPES)
293 for (
int i = 0; i < MAX_TEMP_TYPES; i++) {
295 if (mapping->kind == TEMP_LOCAL && mapping->idx == idx) {
296 ctx->temp_types[i] = ctx->local_types[mapping->idx];
297 *mapping = MAP_STACK;
301 ctx->local_types[idx] =
type;
307ctx_clear_local_types(
ctx_t *ctx)
311 for (
int i = 0; i < MAX_TEMP_TYPES; i++) {
313 if (mapping->kind == TEMP_LOCAL) {
315 ctx->temp_types[i] = ctx->local_types[mapping->idx];
316 *mapping = MAP_STACK;
318 RUBY_ASSERT(mapping->kind == TEMP_STACK || mapping->kind == TEMP_SELF);
320 memset(&ctx->local_types, 0,
sizeof(ctx->local_types));
326yjit_type_of_value(VALUE val)
332 else if (
NIL_P(val)) {
335 else if (val ==
Qtrue) {
342 return TYPE_STATIC_SYMBOL;
377 return "unknown immediate";
379 else if (
type.is_heap) {
380 return "unknown heap";
421 if (dst.is_heap && !src.is_heap)
425 if (dst.is_imm && !src.is_imm)
429 if (dst.type != ETYPE_UNKNOWN && dst.type != src.type)
432 if (dst.is_heap != src.is_heap)
435 if (dst.is_imm != src.is_imm)
438 if (dst.type != src.type)
454 if (dst->chain_depth != 0)
459 if (src->chain_depth != 0)
462 if (dst->stack_size != src->stack_size)
465 if (dst->sp_offset != src->sp_offset)
472 int self_diff = type_diff(src->self_type, dst->self_type);
474 if (self_diff == INT_MAX)
480 for (
size_t i = 0; i < MAX_LOCAL_TYPES; ++i)
484 int temp_diff = type_diff(t_src, t_dst);
486 if (temp_diff == INT_MAX)
493 for (
size_t i = 0; i < src->stack_size; ++i)
498 if (m_dst.mapping.kind != m_src.mapping.kind) {
499 if (m_dst.mapping.kind == TEMP_STACK) {
508 else if (m_dst.mapping.idx != m_src.mapping.idx) {
512 int temp_diff = type_diff(m_src.type, m_dst.type);
514 if (temp_diff == INT_MAX)
525yjit_get_version_array(
const rb_iseq_t *iseq,
unsigned idx)
529 if (rb_darray_size(body->yjit_blocks) == 0) {
533 RUBY_ASSERT((
unsigned)rb_darray_size(body->yjit_blocks) == body->iseq_size);
534 return rb_darray_get(body->yjit_blocks, idx);
538static size_t get_num_versions(
blockid_t blockid)
540 return rb_darray_size(yjit_get_version_array(blockid.iseq, blockid.idx));
545add_block_version(
block_t *block)
547 const blockid_t blockid = block->blockid;
552 RUBY_ASSERT(!(block->blockid.idx == 0 && block->ctx.stack_size > 0));
555 if (rb_darray_size(body->yjit_blocks) == 0) {
557 int32_t casted = (int32_t)body->iseq_size;
558 if ((
unsigned)casted != body->iseq_size) {
561 if (!rb_darray_make(&body->yjit_blocks, casted)) {
562 rb_bug(
"allocation failed");
567 yjit_runtime_counters.compiled_iseq_count++;
571 RUBY_ASSERT((int32_t)blockid.idx < rb_darray_size(body->yjit_blocks));
575 if (!rb_darray_append(block_array_ref, block)) {
576 rb_bug(
"allocation failed");
583 rb_darray_foreach(block->cme_dependencies, cme_dependency_idx, cme_dep) {
589 uint32_t *offset_element;
590 rb_darray_foreach(block->gc_object_offsets, offset_idx, offset_element) {
591 uint32_t offset_to_value = *offset_element;
592 uint8_t *value_address = cb_get_ptr(cb, offset_to_value);
601 yjit_runtime_counters.compiled_block_count++;
606branch_code_size(
const branch_t *branch)
608 return branch->end_addr - branch->start_addr;
615 if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
620 const uint32_t old_write_pos = cb->write_pos;
621 const bool branch_terminates_block = branch->end_addr == branch->block->end_addr;
625 cb_set_write_ptr(cb, branch->start_addr);
626 branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
627 branch->end_addr = cb_get_write_ptr(cb);
629 if (branch_terminates_block) {
631 branch->block->end_addr = branch->end_addr;
641 if (old_write_pos > cb->write_pos) {
643 cb_set_pos(cb, old_write_pos);
653make_branch_entry(
block_t *block,
const ctx_t *src_ctx, branchgen_fn gen_fn)
660 branch->block = block;
662 branch->gen_fn = gen_fn;
663 branch->shape = SHAPE_DEFAULT;
666 rb_darray_append(&block->outgoing, branch);
679 int best_diff = INT_MAX;
682 rb_darray_for(versions, idx) {
683 block_t *version = rb_darray_get(versions, idx);
684 int diff = ctx_diff(ctx, &version->ctx);
688 if (diff < best_diff) {
689 best_version = version;
695 if (rb_yjit_opts.greedy_versioning)
698 if ((uint32_t)rb_darray_size(versions) + 1 < rb_yjit_opts.max_versions && best_diff > 0) {
712 if (ctx->chain_depth > 0)
716 if (get_num_versions(blockid) + 1 >= rb_yjit_opts.max_versions) {
720 ctx_t generic_ctx = DEFAULT_CTX;
721 generic_ctx.stack_size = ctx->stack_size;
722 generic_ctx.sp_offset = ctx->sp_offset;
731static void yjit_free_block(
block_t *block);
742 enum { MAX_PER_BATCH = 64 };
744 int compiled_count = 0;
745 bool batch_success =
true;
749 block = gen_single_block(blockid, start_ctx, ec);
752 add_block_version(block);
754 batch[compiled_count] = block;
757 batch_success = block;
760 while (batch_success) {
762 if (rb_darray_size(block->outgoing) == 0) {
768 branch_t *last_branch = rb_darray_back(block->outgoing);
771 if (last_branch->dst_addrs[0] || last_branch->dst_addrs[1]) {
775 if (last_branch->targets[0].iseq == NULL) {
776 rb_bug(
"invalid target for last branch");
780 blockid_t requested_id = last_branch->targets[0];
781 const ctx_t *requested_ctx = &last_branch->target_ctxs[0];
783 batch_success = compiled_count < MAX_PER_BATCH;
785 block = gen_single_block(requested_id, requested_ctx, ec);
786 batch_success = block;
790 if (!batch_success) {
795 last_branch->dst_addrs[0] = block->start_addr;
796 rb_darray_append(&block->incoming, last_branch);
797 last_branch->blocks[0] = block;
800 RUBY_ASSERT(block->start_addr == last_branch->end_addr);
803 add_block_version(block);
805 batch[compiled_count] = block;
816 for (
int block_idx = 0; block_idx < compiled_count; block_idx++) {
817 block_t *
const to_free = batch[block_idx];
820 rb_yjit_block_array_t versions = yjit_get_version_array(to_free->blockid.iseq, to_free->blockid.idx);
821 block_array_remove(versions, to_free);
824 yjit_free_block(to_free);
828 yjit_runtime_counters.compilation_failure++;
840 if (iseq->body->iseq_encoded != ec->cfp->pc) {
849 uint8_t *code_ptr = yjit_entry_prologue(cb, iseq);
852 block_t *block = gen_block_version(blockid, &DEFAULT_CTX, ec);
854 cb_mark_all_executable(ocb);
855 cb_mark_all_executable(cb);
858 if (!block || block->end_idx == insn_idx) {
870 uint8_t *dst_addr = NULL;
877 const ptrdiff_t branch_size_on_entry = branch_code_size(branch);
881 blockid_t target = branch->targets[target_idx];
882 const ctx_t *target_ctx = &branch->target_ctxs[target_idx];
886 if (branch->blocks[target_idx]) {
887 dst_addr = branch->dst_addrs[target_idx];
898 VALUE *
const original_interp_sp = ec->cfp->sp;
899 ec->cfp->sp += target_ctx->sp_offset;
903 ec->cfp->pc = yjit_iseq_pc_at_idx(target.iseq, target.idx);
906 block_t *p_block = find_block_version(target, target_ctx);
910 const uint8_t branch_old_shape = branch->shape;
911 bool branch_modified =
false;
914 if (cb_get_write_ptr(cb) == branch->end_addr) {
916 RUBY_ASSERT(branch->end_addr == branch->block->end_addr);
919 branch->shape = (uint8_t)target_idx;
922 regenerate_branch(cb, branch);
923 branch_modified =
true;
928 cb_set_write_ptr(cb, branch->end_addr);
932 p_block = gen_block_version(target, target_ctx, ec);
934 if (!p_block && branch_modified) {
937 branch->shape = branch_old_shape;
938 regenerate_branch(cb, branch);
944 RUBY_ASSERT(!(branch->shape == (uint8_t)target_idx && p_block->start_addr != branch->end_addr));
947 rb_darray_append(&p_block->incoming, branch);
950 dst_addr = p_block->start_addr;
951 branch->dst_addrs[target_idx] = dst_addr;
954 branch->blocks[target_idx] = p_block;
957 regenerate_branch(cb, branch);
960 ec->cfp->sp = original_interp_sp;
970 dst_addr = code_for_exit_from_stub;
973 cb_mark_all_executable(ocb);
974 cb_mark_all_executable(cb);
977 const ptrdiff_t new_branch_size = branch_code_size(branch);
979 RUBY_ASSERT_ALWAYS(new_branch_size <= branch_size_on_entry &&
"branch stubs should not enlarge branches");
998 block_t *p_block = find_block_version(target, ctx);
1003 rb_darray_append(&p_block->incoming, branch);
1004 branch->blocks[target_idx] = p_block;
1007 return p_block->start_addr;
1011 const long MAX_CODE_SIZE = 64;
1012 if (ocb->write_pos + MAX_CODE_SIZE >= cb->mem_size) {
1017 uint8_t *stub_addr = cb_get_ptr(ocb, ocb->write_pos);
1020 mov(ocb, C_ARG_REGS[2], REG_EC);
1021 mov(ocb, C_ARG_REGS[1], imm_opnd(target_idx));
1022 mov(ocb, C_ARG_REGS[0], const_ptr_opnd(branch));
1023 call_ptr(ocb, REG0, (
void *)&branch_stub_hit);
1029 RUBY_ASSERT(cb_get_ptr(ocb, ocb->write_pos) - stub_addr <= MAX_CODE_SIZE);
1037 const ctx_t *src_ctx,
1047 branch_t *branch = make_branch_entry(jit->block, src_ctx, gen_fn);
1048 branch->targets[0] = target0;
1049 branch->targets[1] = target1;
1050 branch->target_ctxs[0] = *ctx0;
1051 branch->target_ctxs[1] = ctx1? *ctx1:DEFAULT_CTX;
1054 branch->dst_addrs[0] = get_branch_target(target0, ctx0, branch, 0);
1055 branch->dst_addrs[1] = ctx1? get_branch_target(target1, ctx1, branch, 1):NULL;
1058 branch->start_addr = cb_get_write_ptr(cb);
1059 regenerate_branch(cb, branch);
1063gen_jump_branch(
codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1074 jmp_ptr(cb, target0);
1088 branch_t *branch = make_branch_entry(jit->block, ctx, gen_jump_branch);
1089 branch->targets[0] = target0;
1090 branch->target_ctxs[0] = *ctx;
1092 block_t *p_block = find_block_version(target0, ctx);
1096 rb_darray_append(&p_block->incoming, branch);
1098 branch->dst_addrs[0] = p_block->start_addr;
1099 branch->blocks[0] = p_block;
1100 branch->shape = SHAPE_DEFAULT;
1103 branch->start_addr = cb_get_write_ptr(cb);
1104 gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT);
1105 branch->end_addr = cb_get_write_ptr(cb);
1110 branch->dst_addrs[0] = NULL;
1111 branch->shape = SHAPE_NEXT0;
1112 branch->start_addr = cb_get_write_ptr(cb);
1113 branch->end_addr = cb_get_write_ptr(cb);
1126 if (cur_ctx->chain_depth != 0) {
1130 ctx_t next_ctx = *cur_ctx;
1132 if (next_ctx.chain_depth >= UINT8_MAX) {
1133 rb_bug(
"max block version chain depth reached");
1136 next_ctx.chain_depth += 1;
1138 branch_t *branch = make_branch_entry(jit->block, cur_ctx, gen_jump_branch);
1141 branch->target_ctxs[0] = next_ctx;
1142 branch->targets[0] = (
blockid_t){ jit->block->blockid.iseq, jit->insn_idx };
1143 branch->dst_addrs[0] = get_branch_target(branch->targets[0], &next_ctx, branch, 0);
1147 branch->start_addr = cb_get_write_ptr(cb);
1148 gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT);
1149 branch->end_addr = cb_get_write_ptr(cb);
1154yjit_free_block(
block_t *block)
1156 yjit_unlink_method_lookup_dependency(block);
1157 yjit_block_assumptions_free(block);
1160 rb_darray_for(block->incoming, incoming_idx) {
1162 branch_t *pred_branch = rb_darray_get(block->incoming, incoming_idx);
1165 for (
size_t succ_idx = 0; succ_idx < 2; succ_idx++) {
1166 if (pred_branch->blocks[succ_idx] == block) {
1167 pred_branch->blocks[succ_idx] = NULL;
1173 rb_darray_for(block->outgoing, branch_idx) {
1174 branch_t *out_branch = rb_darray_get(block->outgoing, branch_idx);
1177 for (
size_t succ_idx = 0; succ_idx < 2; succ_idx++) {
1178 block_t *succ = out_branch->blocks[succ_idx];
1184 rb_darray_for(succ->incoming, incoming_idx) {
1185 branch_t *pred_branch = rb_darray_get(succ->incoming, incoming_idx);
1186 if (pred_branch == out_branch) {
1187 rb_darray_remove_unordered(succ->incoming, incoming_idx);
1197 rb_darray_free(block->incoming);
1198 rb_darray_free(block->outgoing);
1199 rb_darray_free(block->gc_object_offsets);
1209 rb_darray_foreach(block_array, idx, element) {
1210 if (*element == block) {
1211 rb_darray_remove_unordered(block_array, idx);
1223 const rb_iseq_t *
const iseq = blockid.iseq;
1230invalidate_block_version(
block_t *block)
1232 ASSERT_vm_locking();
1237 verify_blockid(block->blockid);
1239 const rb_iseq_t *iseq = block->blockid.iseq;
1246 block_array_remove(versions, block);
1249 uint8_t *code_ptr = block->start_addr;
1260 if (block->entry_exit == block->start_addr) {
1264 else if (block->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
1266 uint32_t cur_pos = cb->write_pos;
1267 cb_set_write_ptr(cb, block->start_addr);
1268 jmp_ptr(cb, block->entry_exit);
1269 RUBY_ASSERT_ALWAYS(cb_get_ptr(cb, cb->write_pos) < block->end_addr &&
"invalidation wrote past end of block");
1270 cb_set_pos(cb, cur_pos);
1275 rb_darray_for(block->incoming, incoming_idx) {
1276 branch_t *branch = rb_darray_get(block->incoming, incoming_idx);
1277 uint32_t target_idx = (branch->dst_addrs[0] == code_ptr)? 0:1;
1278 RUBY_ASSERT(branch->dst_addrs[target_idx] == code_ptr);
1282 branch->blocks[target_idx] = NULL;
1285 if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
1290 uint8_t *branch_target = get_branch_target(
1297 if (!branch_target) {
1303 branch_target = block->entry_exit;
1306 branch->dst_addrs[target_idx] = branch_target;
1309 bool target_next = (block->start_addr == branch->end_addr);
1315 branch->shape = SHAPE_DEFAULT;
1319 regenerate_branch(cb, branch);
1321 if (target_next && branch->end_addr > block->end_addr) {
1322 fprintf(stderr,
"branch_block_idx=%u block_idx=%u over=%ld block_size=%ld\n",
1323 branch->block->blockid.idx,
1325 branch->end_addr - block->end_addr,
1326 block->end_addr - block->start_addr);
1327 yjit_print_iseq(branch->block->blockid.iseq);
1328 rb_bug(
"yjit invalidate rewrote branch past end of invalidated block");
1341 if (block->blockid.idx == 0) {
1342 iseq->body->jit_func = 0;
1353 yjit_free_block(block);
1356 yjit_runtime_counters.invalidation_count++;
1359 cb_mark_all_executable(ocb);
1360 cb_mark_all_executable(cb);
1368 gen_code_for_exit_from_stub();
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define T_STRING
Old name of RUBY_T_STRING.
#define Qundef
Old name of RUBY_Qundef.
#define SPECIAL_CONST_P
Old name of RB_SPECIAL_CONST_P.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define STATIC_SYM_P
Old name of RB_STATIC_SYM_P.
#define T_HASH
Old name of RUBY_T_HASH.
#define FLONUM_P
Old name of RB_FLONUM_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define Qfalse
Old name of RUBY_Qfalse.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
#define NIL_P
Old name of RB_NIL_P.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Defines RBIMPL_HAS_BUILTIN.
#define RBIMPL_ATTR_MAYBE_UNUSED()
Wraps (or simulates) [[maybe_unused]]
VALUE type(ANYARGS)
ANYARGS-ed function type.
Basic block version Represents a portion of an iseq compiled with a given context Note: care must be ...
Store info about an outgoing branch in a code segment Note: care must be taken to minimize the size o...
Code generation context Contains information we can use to optimize code.
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...
uintptr_t VALUE
Type that represents a Ruby object.