Ruby 3.1.3p185 (2022-11-24 revision 1a6b16756e0ba6b95ab71a441357ed5484e33498)
yjit_codegen.c
1// This file is a fragment of the yjit.o compilation unit. See yjit.c.
2#include "internal.h"
3#include "gc.h"
4#include "internal/compile.h"
5#include "internal/class.h"
6#include "internal/hash.h"
7#include "internal/object.h"
8#include "internal/sanitizers.h"
9#include "internal/string.h"
10#include "internal/struct.h"
11#include "internal/variable.h"
12#include "internal/re.h"
13#include "probes.h"
14#include "probes_helper.h"
15#include "yjit.h"
16#include "yjit_iface.h"
17#include "yjit_core.h"
18#include "yjit_codegen.h"
19#include "yjit_asm.h"
20
21// Map from YARV opcodes to code generation functions
22static codegen_fn gen_fns[VM_INSTRUCTION_SIZE] = { NULL };
23
24// Map from method entries to code generation functions
25static st_table *yjit_method_codegen_table = NULL;
26
27// Code for exiting back to the interpreter from the leave instruction
28static void *leave_exit_code;
29
30// Code for full logic of returning from C method and exiting to the interpreter
31static uint32_t outline_full_cfunc_return_pos;
32
33// For implementing global code invalidation
35 uint32_t inline_patch_pos;
36 uint32_t outlined_target_pos;
37};
38
39typedef rb_darray(struct codepage_patch) patch_array_t;
40
41static patch_array_t global_inval_patches = NULL;
42
43// Print the current source location for debugging purposes
45static void
46jit_print_loc(jitstate_t *jit, const char *msg)
47{
48 char *ptr;
49 long len;
50 VALUE path = rb_iseq_path(jit->iseq);
51 RSTRING_GETMEM(path, ptr, len);
52 fprintf(stderr, "%s %.*s:%u\n", msg, (int)len, ptr, rb_iseq_line_no(jit->iseq, jit->insn_idx));
53}
54
55// dump an object for debugging purposes
57static void
58jit_obj_info_dump(codeblock_t *cb, x86opnd_t opnd) {
59 push_regs(cb);
60 mov(cb, C_ARG_REGS[0], opnd);
61 call_ptr(cb, REG0, (void *)rb_obj_info_dump);
62 pop_regs(cb);
63}
64
65// Get the current instruction's opcode
66static int
67jit_get_opcode(jitstate_t *jit)
68{
69 return jit->opcode;
70}
71
72// Get the index of the next instruction
73static uint32_t
74jit_next_insn_idx(jitstate_t *jit)
75{
76 return jit->insn_idx + insn_len(jit_get_opcode(jit));
77}
78
79// Get an instruction argument by index
80static VALUE
81jit_get_arg(jitstate_t *jit, size_t arg_idx)
82{
83 RUBY_ASSERT(arg_idx + 1 < (size_t)insn_len(jit_get_opcode(jit)));
84 return *(jit->pc + arg_idx + 1);
85}
86
87// Load a VALUE into a register and keep track of the reference if it is on the GC heap.
88static void
89jit_mov_gc_ptr(jitstate_t *jit, codeblock_t *cb, x86opnd_t reg, VALUE ptr)
90{
91 RUBY_ASSERT(reg.type == OPND_REG && reg.num_bits == 64);
92
93 // Load the pointer constant into the specified register
94 mov(cb, reg, const_ptr_opnd((void*)ptr));
95
96 // The pointer immediate is encoded as the last part of the mov written out
97 uint32_t ptr_offset = cb->write_pos - sizeof(VALUE);
98
99 if (!SPECIAL_CONST_P(ptr)) {
100 if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) {
101 rb_bug("allocation failed");
102 }
103 }
104}
105
106// Check if we are compiling the instruction at the stub PC
107// Meaning we are compiling the instruction that is next to execute
108static bool
109jit_at_current_insn(jitstate_t *jit)
110{
111 const VALUE *ec_pc = jit->ec->cfp->pc;
112 return (ec_pc == jit->pc);
113}
114
115// Peek at the nth topmost value on the Ruby stack.
116// Returns the topmost value when n == 0.
117static VALUE
118jit_peek_at_stack(jitstate_t *jit, ctx_t *ctx, int n)
119{
120 RUBY_ASSERT(jit_at_current_insn(jit));
121
122 // Note: this does not account for ctx->sp_offset because
123 // this is only available when hitting a stub, and while
124 // hitting a stub, cfp->sp needs to be up to date in case
125 // codegen functions trigger GC. See :stub-sp-flush:.
126 VALUE *sp = jit->ec->cfp->sp;
127
128 return *(sp - 1 - n);
129}
130
131static VALUE
132jit_peek_at_self(jitstate_t *jit, ctx_t *ctx)
133{
134 return jit->ec->cfp->self;
135}
136
138static VALUE
139jit_peek_at_local(jitstate_t *jit, ctx_t *ctx, int n)
140{
141 RUBY_ASSERT(jit_at_current_insn(jit));
142
143 int32_t local_table_size = jit->iseq->body->local_table_size;
144 RUBY_ASSERT(n < (int)jit->iseq->body->local_table_size);
145
146 const VALUE *ep = jit->ec->cfp->ep;
147 return ep[-VM_ENV_DATA_SIZE - local_table_size + n + 1];
148}
149
150// Save the incremented PC on the CFP
151// This is necessary when calleees can raise or allocate
152static void
153jit_save_pc(jitstate_t *jit, x86opnd_t scratch_reg)
154{
155 codeblock_t *cb = jit->cb;
156 mov(cb, scratch_reg, const_ptr_opnd(jit->pc + insn_len(jit->opcode)));
157 mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), scratch_reg);
158}
159
160// Save the current SP on the CFP
161// This realigns the interpreter SP with the JIT SP
162// Note: this will change the current value of REG_SP,
163// which could invalidate memory operands
164static void
165jit_save_sp(jitstate_t *jit, ctx_t *ctx)
166{
167 if (ctx->sp_offset != 0) {
168 x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
169 codeblock_t *cb = jit->cb;
170 lea(cb, REG_SP, stack_pointer);
171 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
172 ctx->sp_offset = 0;
173 }
174}
175
176// jit_save_pc() + jit_save_sp(). Should be used before calling a routine that
177// could:
178// - Perform GC allocation
179// - Take the VM lock through RB_VM_LOCK_ENTER()
180// - Perform Ruby method call
181static void
182jit_prepare_routine_call(jitstate_t *jit, ctx_t *ctx, x86opnd_t scratch_reg)
183{
184 jit->record_boundary_patch_point = true;
185 jit_save_pc(jit, scratch_reg);
186 jit_save_sp(jit, ctx);
187}
188
189// Record the current codeblock write position for rewriting into a jump into
190// the outlined block later. Used to implement global code invalidation.
191static void
192record_global_inval_patch(const codeblock_t *cb, uint32_t outline_block_target_pos)
193{
194 struct codepage_patch patch_point = { cb->write_pos, outline_block_target_pos };
195 if (!rb_darray_append(&global_inval_patches, patch_point)) rb_bug("allocation failed");
196}
197
198static bool jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit);
199
200#if YJIT_STATS
201
202// Add a comment at the current position in the code block
203static void
204_add_comment(codeblock_t *cb, const char *comment_str)
205{
206 // We can't add comments to the outlined code block
207 if (cb == ocb)
208 return;
209
210 // Avoid adding duplicate comment strings (can happen due to deferred codegen)
211 size_t num_comments = rb_darray_size(yjit_code_comments);
212 if (num_comments > 0) {
213 struct yjit_comment last_comment = rb_darray_get(yjit_code_comments, num_comments - 1);
214 if (last_comment.offset == cb->write_pos && strcmp(last_comment.comment, comment_str) == 0) {
215 return;
216 }
217 }
218
219 struct yjit_comment new_comment = (struct yjit_comment){ cb->write_pos, comment_str };
220 rb_darray_append(&yjit_code_comments, new_comment);
221}
222
223// Comments for generated machine code
224#define ADD_COMMENT(cb, comment) _add_comment((cb), (comment))
225
226// Verify the ctx's types and mappings against the compile-time stack, self,
227// and locals.
228static void
229verify_ctx(jitstate_t *jit, ctx_t *ctx)
230{
231 // Only able to check types when at current insn
232 RUBY_ASSERT(jit_at_current_insn(jit));
233
234 VALUE self_val = jit_peek_at_self(jit, ctx);
235 if (type_diff(yjit_type_of_value(self_val), ctx->self_type) == INT_MAX) {
236 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of self: %s", yjit_type_name(ctx->self_type), rb_obj_info(self_val));
237 }
238
239 for (int i = 0; i < ctx->stack_size && i < MAX_TEMP_TYPES; i++) {
240 temp_type_mapping_t learned = ctx_get_opnd_mapping(ctx, OPND_STACK(i));
241 VALUE val = jit_peek_at_stack(jit, ctx, i);
242 val_type_t detected = yjit_type_of_value(val);
243
244 if (learned.mapping.kind == TEMP_SELF) {
245 if (self_val != val) {
246 rb_bug("verify_ctx: stack value was mapped to self, but values did not match\n"
247 " stack: %s\n"
248 " self: %s",
249 rb_obj_info(val),
250 rb_obj_info(self_val));
251 }
252 }
253
254 if (learned.mapping.kind == TEMP_LOCAL) {
255 int local_idx = learned.mapping.idx;
256 VALUE local_val = jit_peek_at_local(jit, ctx, local_idx);
257 if (local_val != val) {
258 rb_bug("verify_ctx: stack value was mapped to local, but values did not match\n"
259 " stack: %s\n"
260 " local %i: %s",
261 rb_obj_info(val),
262 local_idx,
263 rb_obj_info(local_val));
264 }
265 }
266
267 if (type_diff(detected, learned.type) == INT_MAX) {
268 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value on stack: %s", yjit_type_name(learned.type), rb_obj_info(val));
269 }
270 }
271
272 int32_t local_table_size = jit->iseq->body->local_table_size;
273 for (int i = 0; i < local_table_size && i < MAX_TEMP_TYPES; i++) {
274 val_type_t learned = ctx->local_types[i];
275 VALUE val = jit_peek_at_local(jit, ctx, i);
276 val_type_t detected = yjit_type_of_value(val);
277
278 if (type_diff(detected, learned) == INT_MAX) {
279 rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of local: %s", yjit_type_name(learned), rb_obj_info(val));
280 }
281 }
282}
283
284#else
285
286#define ADD_COMMENT(cb, comment) ((void)0)
287#define verify_ctx(jit, ctx) ((void)0)
288
289#endif // if YJIT_STATS
290
291#if YJIT_STATS
292
293// Increment a profiling counter with counter_name
294#define GEN_COUNTER_INC(cb, counter_name) _gen_counter_inc(cb, &(yjit_runtime_counters . counter_name))
295static void
296_gen_counter_inc(codeblock_t *cb, int64_t *counter)
297{
298 if (!rb_yjit_opts.gen_stats) return;
299
300 // Use REG1 because there might be return value in REG0
301 mov(cb, REG1, const_ptr_opnd(counter));
302 cb_write_lock_prefix(cb); // for ractors.
303 add(cb, mem_opnd(64, REG1, 0), imm_opnd(1));
304}
305
306// Increment a counter then take an existing side exit.
307#define COUNTED_EXIT(jit, side_exit, counter_name) _counted_side_exit(jit, side_exit, &(yjit_runtime_counters . counter_name))
308static uint8_t *
309_counted_side_exit(jitstate_t* jit, uint8_t *existing_side_exit, int64_t *counter)
310{
311 if (!rb_yjit_opts.gen_stats) return existing_side_exit;
312
313 uint8_t *start = cb_get_ptr(jit->ocb, jit->ocb->write_pos);
314 _gen_counter_inc(jit->ocb, counter);
315 jmp_ptr(jit->ocb, existing_side_exit);
316 return start;
317}
318
319#else
320
321#define GEN_COUNTER_INC(cb, counter_name) ((void)0)
322#define COUNTED_EXIT(jit, side_exit, counter_name) side_exit
323
324#endif // if YJIT_STATS
325
326// Generate an exit to return to the interpreter
327static uint32_t
328yjit_gen_exit(VALUE *exit_pc, ctx_t *ctx, codeblock_t *cb)
329{
330 const uint32_t code_pos = cb->write_pos;
331
332 ADD_COMMENT(cb, "exit to interpreter");
333
334 // Generate the code to exit to the interpreters
335 // Write the adjusted SP back into the CFP
336 if (ctx->sp_offset != 0) {
337 x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
338 lea(cb, REG_SP, stack_pointer);
339 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
340 }
341
342 // Update CFP->PC
343 mov(cb, RAX, const_ptr_opnd(exit_pc));
344 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), RAX);
345
346 // Accumulate stats about interpreter exits
347#if YJIT_STATS
348 if (rb_yjit_opts.gen_stats) {
349 mov(cb, RDI, const_ptr_opnd(exit_pc));
350 call_ptr(cb, RSI, (void *)&yjit_count_side_exit_op);
351 }
352#endif
353
354 pop(cb, REG_SP);
355 pop(cb, REG_EC);
356 pop(cb, REG_CFP);
357
358 mov(cb, RAX, imm_opnd(Qundef));
359 ret(cb);
360
361 return code_pos;
362}
363
364// Generate a continuation for gen_leave() that exits to the interpreter at REG_CFP->pc.
365static uint8_t *
366yjit_gen_leave_exit(codeblock_t *cb)
367{
368 uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
369
370 // Note, gen_leave() fully reconstructs interpreter state and leaves the
371 // return value in RAX before coming here.
372
373 // Every exit to the interpreter should be counted
374 GEN_COUNTER_INC(cb, leave_interp_return);
375
376 pop(cb, REG_SP);
377 pop(cb, REG_EC);
378 pop(cb, REG_CFP);
379
380 ret(cb);
381
382 return code_ptr;
383}
384
385// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
386// to the interpreter when it cannot service a stub by generating new code.
387// Before coming here, branch_stub_hit() takes care of fully reconstructing
388// interpreter state.
389static void
390gen_code_for_exit_from_stub(void)
391{
392 codeblock_t *cb = ocb;
393 code_for_exit_from_stub = cb_get_ptr(cb, cb->write_pos);
394
395 GEN_COUNTER_INC(cb, exit_from_branch_stub);
396
397 pop(cb, REG_SP);
398 pop(cb, REG_EC);
399 pop(cb, REG_CFP);
400
401 mov(cb, RAX, imm_opnd(Qundef));
402 ret(cb);
403}
404
405// :side-exit:
406// Get an exit for the current instruction in the outlined block. The code
407// for each instruction often begins with several guards before proceeding
408// to do work. When guards fail, an option we have is to exit to the
409// interpreter at an instruction boundary. The piece of code that takes
410// care of reconstructing interpreter state and exiting out of generated
411// code is called the side exit.
412//
413// No guards change the logic for reconstructing interpreter state at the
414// moment, so there is one unique side exit for each context. Note that
415// it's incorrect to jump to the side exit after any ctx stack push/pop operations
416// since they change the logic required for reconstructing interpreter state.
417static uint8_t *
418yjit_side_exit(jitstate_t *jit, ctx_t *ctx)
419{
420 if (!jit->side_exit_for_pc) {
421 codeblock_t *ocb = jit->ocb;
422 uint32_t pos = yjit_gen_exit(jit->pc, ctx, ocb);
423 jit->side_exit_for_pc = cb_get_ptr(ocb, pos);
424 }
425
426 return jit->side_exit_for_pc;
427}
428
429// Ensure that there is an exit for the start of the block being compiled.
430// Block invalidation uses this exit.
431static void
432jit_ensure_block_entry_exit(jitstate_t *jit)
433{
434 block_t *block = jit->block;
435 if (block->entry_exit) return;
436
437 if (jit->insn_idx == block->blockid.idx) {
438 // We are compiling the first instruction in the block.
439 // Generate the exit with the cache in jitstate.
440 block->entry_exit = yjit_side_exit(jit, &block->ctx);
441 }
442 else {
443 VALUE *pc = yjit_iseq_pc_at_idx(block->blockid.iseq, block->blockid.idx);
444 uint32_t pos = yjit_gen_exit(pc, &block->ctx, ocb);
445 block->entry_exit = cb_get_ptr(ocb, pos);
446 }
447}
448
449// Generate a runtime guard that ensures the PC is at the start of the iseq,
450// otherwise take a side exit. This is to handle the situation of optional
451// parameters. When a function with optional parameters is called, the entry
452// PC for the method isn't necessarily 0, but we always generated code that
453// assumes the entry point is 0.
454static void
455yjit_pc_guard(codeblock_t *cb, const rb_iseq_t *iseq)
456{
457 RUBY_ASSERT(cb != NULL);
458
459 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, pc));
460 mov(cb, REG1, const_ptr_opnd(iseq->body->iseq_encoded));
461 xor(cb, REG0, REG1);
462
463 // xor should impact ZF, so we can jz here
464 uint32_t pc_is_zero = cb_new_label(cb, "pc_is_zero");
465 jz_label(cb, pc_is_zero);
466
467 // We're not starting at the first PC, so we need to exit.
468 GEN_COUNTER_INC(cb, leave_start_pc_non_zero);
469
470 pop(cb, REG_SP);
471 pop(cb, REG_EC);
472 pop(cb, REG_CFP);
473
474 mov(cb, RAX, imm_opnd(Qundef));
475 ret(cb);
476
477 // PC should be at the beginning
478 cb_write_label(cb, pc_is_zero);
479 cb_link_labels(cb);
480}
481
482// The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
483// like the interpreter. When tracing for c_return is enabled, we patch the code after
484// the C method return to call into this to fire the event.
485static void
486full_cfunc_return(rb_execution_context_t *ec, VALUE return_value)
487{
488 rb_control_frame_t *cfp = ec->cfp;
489 RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp);
490 const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
491
492 RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp));
493 RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC);
494
495 // CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
496
497 // Pop the C func's frame and fire the c_return TracePoint event
498 // Note that this is the same order as vm_call_cfunc_with_frame().
499 rb_vm_pop_frame(ec);
500 EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value);
501 // Note, this deviates from the interpreter in that users need to enable
502 // a c_return TracePoint for this DTrace hook to work. A reasonable change
503 // since the Ruby return event works this way as well.
504 RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
505
506 // Push return value into the caller's stack. We know that it's a frame that
507 // uses cfp->sp because we are patching a call done with gen_send_cfunc().
508 ec->cfp->sp[0] = return_value;
509 ec->cfp->sp++;
510}
511
512// Landing code for when c_return tracing is enabled. See full_cfunc_return().
513static void
514gen_full_cfunc_return(void)
515{
516 codeblock_t *cb = ocb;
517 outline_full_cfunc_return_pos = ocb->write_pos;
518
519 // This chunk of code expect REG_EC to be filled properly and
520 // RAX to contain the return value of the C method.
521
522 // Call full_cfunc_return()
523 mov(cb, C_ARG_REGS[0], REG_EC);
524 mov(cb, C_ARG_REGS[1], RAX);
525 call_ptr(cb, REG0, (void *)full_cfunc_return);
526
527 // Count the exit
528 GEN_COUNTER_INC(cb, traced_cfunc_return);
529
530 // Return to the interpreter
531 pop(cb, REG_SP);
532 pop(cb, REG_EC);
533 pop(cb, REG_CFP);
534
535 mov(cb, RAX, imm_opnd(Qundef));
536 ret(cb);
537}
538
539/*
540Compile an interpreter entry block to be inserted into an iseq
541Returns `NULL` if compilation fails.
542*/
543static uint8_t *
544yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq)
545{
546 RUBY_ASSERT(cb != NULL);
547
548 enum { MAX_PROLOGUE_SIZE = 1024 };
549
550 // Check if we have enough executable memory
551 if (cb->write_pos + MAX_PROLOGUE_SIZE >= cb->mem_size) {
552 return NULL;
553 }
554
555 const uint32_t old_write_pos = cb->write_pos;
556
557 // Align the current write position to cache line boundaries
558 cb_align_pos(cb, 64);
559
560 uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
561 ADD_COMMENT(cb, "yjit entry");
562
563 push(cb, REG_CFP);
564 push(cb, REG_EC);
565 push(cb, REG_SP);
566
567 // We are passed EC and CFP
568 mov(cb, REG_EC, C_ARG_REGS[0]);
569 mov(cb, REG_CFP, C_ARG_REGS[1]);
570
571 // Load the current SP from the CFP into REG_SP
572 mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
573
574 // Setup cfp->jit_return
575 // TODO: this could use an IP relative LEA instead of an 8 byte immediate
576 mov(cb, REG0, const_ptr_opnd(leave_exit_code));
577 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
578
579 // We're compiling iseqs that we *expect* to start at `insn_idx`. But in
580 // the case of optional parameters, the interpreter can set the pc to a
581 // different location depending on the optional parameters. If an iseq
582 // has optional parameters, we'll add a runtime check that the PC we've
583 // compiled for is the same PC that the interpreter wants us to run with.
584 // If they don't match, then we'll take a side exit.
585 if (iseq->body->param.flags.has_opt) {
586 yjit_pc_guard(cb, iseq);
587 }
588
589 // Verify MAX_PROLOGUE_SIZE
590 RUBY_ASSERT_ALWAYS(cb->write_pos - old_write_pos <= MAX_PROLOGUE_SIZE);
591
592 return code_ptr;
593}
594
595// Generate code to check for interrupts and take a side-exit.
596// Warning: this function clobbers REG0
597static void
598yjit_check_ints(codeblock_t *cb, uint8_t *side_exit)
599{
600 // Check for interrupts
601 // see RUBY_VM_CHECK_INTS(ec) macro
602 ADD_COMMENT(cb, "RUBY_VM_CHECK_INTS(ec)");
603 mov(cb, REG0_32, member_opnd(REG_EC, rb_execution_context_t, interrupt_mask));
604 not(cb, REG0_32);
605 test(cb, member_opnd(REG_EC, rb_execution_context_t, interrupt_flag), REG0_32);
606 jnz_ptr(cb, side_exit);
607}
608
609// Generate a stubbed unconditional jump to the next bytecode instruction.
610// Blocks that are part of a guard chain can use this to share the same successor.
611static void
612jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)
613{
614 // Reset the depth since in current usages we only ever jump to to
615 // chain_depth > 0 from the same instruction.
616 ctx_t reset_depth = *current_context;
617 reset_depth.chain_depth = 0;
618
619 blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) };
620
621 // We are at the end of the current instruction. Record the boundary.
622 if (jit->record_boundary_patch_point) {
623 uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, jit->ocb);
624 record_global_inval_patch(jit->cb, exit_pos);
625 jit->record_boundary_patch_point = false;
626 }
627
628 // Generate the jump instruction
629 gen_direct_jump(
630 jit,
631 &reset_depth,
632 jump_block
633 );
634}
635
636// Compile a sequence of bytecode instructions for a given basic block version.
637// Part of gen_block_version().
638static block_t *
639gen_single_block(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec)
640{
641 RUBY_ASSERT(cb != NULL);
642 verify_blockid(blockid);
643
644 // Allocate the new block
645 block_t *block = calloc(1, sizeof(block_t));
646 if (!block) {
647 return NULL;
648 }
649
650 // Copy the starting context to avoid mutating it
651 ctx_t ctx_copy = *start_ctx;
652 ctx_t *ctx = &ctx_copy;
653
654 // Limit the number of specialized versions for this block
655 *ctx = limit_block_versions(blockid, ctx);
656
657 // Save the starting context on the block.
658 block->blockid = blockid;
659 block->ctx = *ctx;
660
661 RUBY_ASSERT(!(blockid.idx == 0 && start_ctx->stack_size > 0));
662
663 const rb_iseq_t *iseq = block->blockid.iseq;
664 const unsigned int iseq_size = iseq->body->iseq_size;
665 uint32_t insn_idx = block->blockid.idx;
666 const uint32_t starting_insn_idx = insn_idx;
667
668 // Initialize a JIT state object
669 jitstate_t jit = {
670 .cb = cb,
671 .ocb = ocb,
672 .block = block,
673 .iseq = iseq,
674 .ec = ec
675 };
676
677 // Mark the start position of the block
678 block->start_addr = cb_get_write_ptr(cb);
679
680 // For each instruction to compile
681 while (insn_idx < iseq_size) {
682 // Get the current pc and opcode
683 VALUE *pc = yjit_iseq_pc_at_idx(iseq, insn_idx);
684 int opcode = yjit_opcode_at_pc(iseq, pc);
685 RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
686
687 // opt_getinlinecache wants to be in a block all on its own. Cut the block short
688 // if we run into it. See gen_opt_getinlinecache() for details.
689 if (opcode == BIN(opt_getinlinecache) && insn_idx > starting_insn_idx) {
690 jit_jump_to_next_insn(&jit, ctx);
691 break;
692 }
693
694 // Set the current instruction
695 jit.insn_idx = insn_idx;
696 jit.opcode = opcode;
697 jit.pc = pc;
698 jit.side_exit_for_pc = NULL;
699
700 // If previous instruction requested to record the boundary
701 if (jit.record_boundary_patch_point) {
702 // Generate an exit to this instruction and record it
703 uint32_t exit_pos = yjit_gen_exit(jit.pc, ctx, ocb);
704 record_global_inval_patch(cb, exit_pos);
705 jit.record_boundary_patch_point = false;
706 }
707
708 // Verify our existing assumption (DEBUG)
709 if (jit_at_current_insn(&jit)) {
710 verify_ctx(&jit, ctx);
711 }
712
713 // Lookup the codegen function for this instruction
714 codegen_fn gen_fn = gen_fns[opcode];
715 codegen_status_t status = YJIT_CANT_COMPILE;
716 if (gen_fn) {
717 if (0) {
718 fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
719 print_str(cb, insn_name(opcode));
720 }
721
722 // :count-placement:
723 // Count bytecode instructions that execute in generated code.
724 // Note that the increment happens even when the output takes side exit.
725 GEN_COUNTER_INC(cb, exec_instruction);
726
727 // Add a comment for the name of the YARV instruction
728 ADD_COMMENT(cb, insn_name(opcode));
729
730 // Call the code generation function
731 status = gen_fn(&jit, ctx, cb);
732 }
733
734 // If we can't compile this instruction
735 // exit to the interpreter and stop compiling
736 if (status == YJIT_CANT_COMPILE) {
737 // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE,
738 // the exit this generates would be wrong. We could save a copy of the entry context
739 // and assert that ctx is the same here.
740 uint32_t exit_off = yjit_gen_exit(jit.pc, ctx, cb);
741
742 // If this is the first instruction in the block, then we can use
743 // the exit for block->entry_exit.
744 if (insn_idx == block->blockid.idx) {
745 block->entry_exit = cb_get_ptr(cb, exit_off);
746 }
747 break;
748 }
749
750 // For now, reset the chain depth after each instruction as only the
751 // first instruction in the block can concern itself with the depth.
752 ctx->chain_depth = 0;
753
754 // Move to the next instruction to compile
755 insn_idx += insn_len(opcode);
756
757 // If the instruction terminates this block
758 if (status == YJIT_END_BLOCK) {
759 break;
760 }
761 }
762
763 // Mark the end position of the block
764 block->end_addr = cb_get_write_ptr(cb);
765
766 // Store the index of the last instruction in the block
767 block->end_idx = insn_idx;
768
769 // We currently can't handle cases where the request is for a block that
770 // doesn't go to the next instruction.
771 RUBY_ASSERT(!jit.record_boundary_patch_point);
772
773 // If code for the block doesn't fit, free the block and fail.
774 if (cb->dropped_bytes || ocb->dropped_bytes) {
775 yjit_free_block(block);
776 return NULL;
777 }
778
779 if (YJIT_DUMP_MODE >= 2) {
780 // Dump list of compiled instrutions
781 fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq);
782 for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) {
783 int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx));
784 fprintf(stderr, " %04d %s\n", idx, insn_name(opcode));
785 idx += insn_len(opcode);
786 }
787 }
788
789 return block;
790}
791
792static codegen_status_t gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb);
793
794static codegen_status_t
795gen_nop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
796{
797 // Do nothing
798 return YJIT_KEEP_COMPILING;
799}
800
801static codegen_status_t
802gen_dup(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
803{
804 // Get the top value and its type
805 x86opnd_t dup_val = ctx_stack_pop(ctx, 0);
806 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
807
808 // Push the same value on top
809 x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
810 mov(cb, REG0, dup_val);
811 mov(cb, loc0, REG0);
812
813 return YJIT_KEEP_COMPILING;
814}
815
816// duplicate stack top n elements
817static codegen_status_t
818gen_dupn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
819{
820 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
821
822 // In practice, seems to be only used for n==2
823 if (n != 2) {
824 return YJIT_CANT_COMPILE;
825 }
826
827 x86opnd_t opnd1 = ctx_stack_opnd(ctx, 1);
828 x86opnd_t opnd0 = ctx_stack_opnd(ctx, 0);
829 temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(1));
830 temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
831
832 x86opnd_t dst1 = ctx_stack_push_mapping(ctx, mapping1);
833 mov(cb, REG0, opnd1);
834 mov(cb, dst1, REG0);
835
836 x86opnd_t dst0 = ctx_stack_push_mapping(ctx, mapping0);
837 mov(cb, REG0, opnd0);
838 mov(cb, dst0, REG0);
839
840 return YJIT_KEEP_COMPILING;
841}
842
843static void
844stack_swap(ctx_t *ctx, codeblock_t *cb, int offset0, int offset1, x86opnd_t reg0, x86opnd_t reg1)
845{
846 x86opnd_t opnd0 = ctx_stack_opnd(ctx, offset0);
847 x86opnd_t opnd1 = ctx_stack_opnd(ctx, offset1);
848
849 temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset0));
850 temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset1));
851
852 mov(cb, reg0, opnd0);
853 mov(cb, reg1, opnd1);
854 mov(cb, opnd0, reg1);
855 mov(cb, opnd1, reg0);
856
857 ctx_set_opnd_mapping(ctx, OPND_STACK(offset0), mapping1);
858 ctx_set_opnd_mapping(ctx, OPND_STACK(offset1), mapping0);
859}
860
861// Swap top 2 stack entries
862static codegen_status_t
863gen_swap(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
864{
865 stack_swap(ctx , cb, 0, 1, REG0, REG1);
866 return YJIT_KEEP_COMPILING;
867}
868
869// set Nth stack entry to stack top
870static codegen_status_t
871gen_setn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
872{
873 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
874
875 // Set the destination
876 x86opnd_t top_val = ctx_stack_pop(ctx, 0);
877 x86opnd_t dst_opnd = ctx_stack_opnd(ctx, (int32_t)n);
878 mov(cb, REG0, top_val);
879 mov(cb, dst_opnd, REG0);
880
881 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
882 ctx_set_opnd_mapping(ctx, OPND_STACK(n), mapping);
883
884 return YJIT_KEEP_COMPILING;
885}
886
887// get nth stack value, then push it
888static codegen_status_t
889gen_topn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
890{
891 int32_t n = (int32_t)jit_get_arg(jit, 0);
892
893 // Get top n type / operand
894 x86opnd_t top_n_val = ctx_stack_opnd(ctx, n);
895 temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(n));
896
897 x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
898 mov(cb, REG0, top_n_val);
899 mov(cb, loc0, REG0);
900
901 return YJIT_KEEP_COMPILING;
902}
903
904static codegen_status_t
905gen_pop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
906{
907 // Decrement SP
908 ctx_stack_pop(ctx, 1);
909 return YJIT_KEEP_COMPILING;
910}
911
912// Pop n values off the stack
913static codegen_status_t
914gen_adjuststack(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
915{
916 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
917 ctx_stack_pop(ctx, n);
918 return YJIT_KEEP_COMPILING;
919}
920
921// new array initialized from top N values
922static codegen_status_t
923gen_newarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
924{
925 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
926
927 // Save the PC and SP because we are allocating
928 jit_prepare_routine_call(jit, ctx, REG0);
929
930 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
931
932 // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts);
933 mov(cb, C_ARG_REGS[0], REG_EC);
934 mov(cb, C_ARG_REGS[1], imm_opnd(n));
935 lea(cb, C_ARG_REGS[2], values_ptr);
936 call_ptr(cb, REG0, (void *)rb_ec_ary_new_from_values);
937
938 ctx_stack_pop(ctx, n);
939 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
940 mov(cb, stack_ret, RAX);
941
942 return YJIT_KEEP_COMPILING;
943}
944
945// dup array
946static codegen_status_t
947gen_duparray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
948{
949 VALUE ary = jit_get_arg(jit, 0);
950
951 // Save the PC and SP because we are allocating
952 jit_prepare_routine_call(jit, ctx, REG0);
953
954 // call rb_ary_resurrect(VALUE ary);
955 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary);
956 call_ptr(cb, REG0, (void *)rb_ary_resurrect);
957
958 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
959 mov(cb, stack_ret, RAX);
960
961 return YJIT_KEEP_COMPILING;
962}
963
964// dup hash
965static codegen_status_t
966gen_duphash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
967{
968 VALUE hash = jit_get_arg(jit, 0);
969
970 // Save the PC and SP because we are allocating
971 jit_prepare_routine_call(jit, ctx, REG0);
972
973 // call rb_hash_resurrect(VALUE hash);
974 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash);
975 call_ptr(cb, REG0, (void *)rb_hash_resurrect);
976
977 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
978 mov(cb, stack_ret, RAX);
979
980 return YJIT_KEEP_COMPILING;
981}
982
983VALUE rb_vm_splat_array(VALUE flag, VALUE ary);
984
985// call to_a on the array on the stack
986static codegen_status_t
987gen_splatarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
988{
989 VALUE flag = (VALUE) jit_get_arg(jit, 0);
990
991 // Save the PC and SP because the callee may allocate
992 // Note that this modifies REG_SP, which is why we do it first
993 jit_prepare_routine_call(jit, ctx, REG0);
994
995 // Get the operands from the stack
996 x86opnd_t ary_opnd = ctx_stack_pop(ctx, 1);
997
998 // Call rb_vm_splat_array(flag, ary)
999 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag);
1000 mov(cb, C_ARG_REGS[1], ary_opnd);
1001 call_ptr(cb, REG1, (void *) rb_vm_splat_array);
1002
1003 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
1004 mov(cb, stack_ret, RAX);
1005
1006 return YJIT_KEEP_COMPILING;
1007}
1008
1009// new range initialized from top 2 values
1010static codegen_status_t
1011gen_newrange(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1012{
1013 rb_num_t flag = (rb_num_t)jit_get_arg(jit, 0);
1014
1015 // rb_range_new() allocates and can raise
1016 jit_prepare_routine_call(jit, ctx, REG0);
1017
1018 // val = rb_range_new(low, high, (int)flag);
1019 mov(cb, C_ARG_REGS[0], ctx_stack_opnd(ctx, 1));
1020 mov(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, 0));
1021 mov(cb, C_ARG_REGS[2], imm_opnd(flag));
1022 call_ptr(cb, REG0, (void *)rb_range_new);
1023
1024 ctx_stack_pop(ctx, 2);
1025 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
1026 mov(cb, stack_ret, RAX);
1027
1028 return YJIT_KEEP_COMPILING;
1029}
1030
1031static void
1032guard_object_is_heap(codeblock_t *cb, x86opnd_t object_opnd, ctx_t *ctx, uint8_t *side_exit)
1033{
1034 ADD_COMMENT(cb, "guard object is heap");
1035
1036 // Test that the object is not an immediate
1037 test(cb, object_opnd, imm_opnd(RUBY_IMMEDIATE_MASK));
1038 jnz_ptr(cb, side_exit);
1039
1040 // Test that the object is not false or nil
1041 cmp(cb, object_opnd, imm_opnd(Qnil));
1043 jbe_ptr(cb, side_exit);
1044}
1045
1046static inline void
1047guard_object_is_array(codeblock_t *cb, x86opnd_t object_opnd, x86opnd_t flags_opnd, ctx_t *ctx, uint8_t *side_exit)
1048{
1049 ADD_COMMENT(cb, "guard object is array");
1050
1051 // Pull out the type mask
1052 mov(cb, flags_opnd, member_opnd(object_opnd, struct RBasic, flags));
1053 and(cb, flags_opnd, imm_opnd(RUBY_T_MASK));
1054
1055 // Compare the result with T_ARRAY
1056 cmp(cb, flags_opnd, imm_opnd(T_ARRAY));
1057 jne_ptr(cb, side_exit);
1058}
1059
1060// push enough nils onto the stack to fill out an array
1061static codegen_status_t
1062gen_expandarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1063{
1064 int flag = (int) jit_get_arg(jit, 1);
1065
1066 // If this instruction has the splat flag, then bail out.
1067 if (flag & 0x01) {
1068 GEN_COUNTER_INC(cb, expandarray_splat);
1069 return YJIT_CANT_COMPILE;
1070 }
1071
1072 // If this instruction has the postarg flag, then bail out.
1073 if (flag & 0x02) {
1074 GEN_COUNTER_INC(cb, expandarray_postarg);
1075 return YJIT_CANT_COMPILE;
1076 }
1077
1078 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1079
1080 // num is the number of requested values. If there aren't enough in the
1081 // array then we're going to push on nils.
1082 int num = (int)jit_get_arg(jit, 0);
1083 val_type_t array_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1084 x86opnd_t array_opnd = ctx_stack_pop(ctx, 1);
1085
1086 if (array_type.type == ETYPE_NIL) {
1087 // special case for a, b = nil pattern
1088 // push N nils onto the stack
1089 for (int i = 0; i < num; i++) {
1090 x86opnd_t push = ctx_stack_push(ctx, TYPE_NIL);
1091 mov(cb, push, imm_opnd(Qnil));
1092 }
1093 return YJIT_KEEP_COMPILING;
1094 }
1095
1096 // Move the array from the stack into REG0 and check that it's an array.
1097 mov(cb, REG0, array_opnd);
1098 guard_object_is_heap(cb, REG0, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
1099 guard_object_is_array(cb, REG0, REG1, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
1100
1101 // If we don't actually want any values, then just return.
1102 if (num == 0) {
1103 return YJIT_KEEP_COMPILING;
1104 }
1105
1106 // Pull out the embed flag to check if it's an embedded array.
1107 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1108 mov(cb, REG1, flags_opnd);
1109
1110 // Move the length of the embedded array into REG1.
1111 and(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_MASK));
1112 shr(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_SHIFT));
1113
1114 // Conditionally move the length of the heap array into REG1.
1115 test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
1116 cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.len));
1117
1118 // Only handle the case where the number of values in the array is greater
1119 // than or equal to the number of values requested.
1120 cmp(cb, REG1, imm_opnd(num));
1121 jl_ptr(cb, COUNTED_EXIT(jit, side_exit, expandarray_rhs_too_small));
1122
1123 // Load the address of the embedded array into REG1.
1124 // (struct RArray *)(obj)->as.ary
1125 lea(cb, REG1, member_opnd(REG0, struct RArray, as.ary));
1126
1127 // Conditionally load the address of the heap array into REG1.
1128 // (struct RArray *)(obj)->as.heap.ptr
1129 test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
1130 cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.ptr));
1131
1132 // Loop backward through the array and push each element onto the stack.
1133 for (int32_t i = (int32_t) num - 1; i >= 0; i--) {
1134 x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
1135 mov(cb, REG0, mem_opnd(64, REG1, i * SIZEOF_VALUE));
1136 mov(cb, top, REG0);
1137 }
1138
1139 return YJIT_KEEP_COMPILING;
1140}
1141
1142// new hash initialized from top N values
1143static codegen_status_t
1144gen_newhash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1145{
1146 int32_t num = (int32_t)jit_get_arg(jit, 0);
1147
1148 // Save the PC and SP because we are allocating
1149 jit_prepare_routine_call(jit, ctx, REG0);
1150
1151 if (num) {
1152 // val = rb_hash_new_with_size(num / 2);
1153 mov(cb, C_ARG_REGS[0], imm_opnd(num / 2));
1154 call_ptr(cb, REG0, (void *)rb_hash_new_with_size);
1155
1156 // save the allocated hash as we want to push it after insertion
1157 push(cb, RAX);
1158 push(cb, RAX); // alignment
1159
1160 // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
1161 mov(cb, C_ARG_REGS[0], imm_opnd(num));
1162 lea(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, num - 1));
1163 mov(cb, C_ARG_REGS[2], RAX);
1164 call_ptr(cb, REG0, (void *)rb_hash_bulk_insert);
1165
1166 pop(cb, RAX); // alignment
1167 pop(cb, RAX);
1168
1169 ctx_stack_pop(ctx, num);
1170 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
1171 mov(cb, stack_ret, RAX);
1172 }
1173 else {
1174 // val = rb_hash_new();
1175 call_ptr(cb, REG0, (void *)rb_hash_new);
1176
1177 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
1178 mov(cb, stack_ret, RAX);
1179 }
1180
1181 return YJIT_KEEP_COMPILING;
1182}
1183
1184// Push a constant value to the stack, including type information.
1185// The constant may be a heap object or a special constant.
1186static void
1187jit_putobject(jitstate_t *jit, ctx_t *ctx, VALUE arg)
1188{
1189 val_type_t val_type = yjit_type_of_value(arg);
1190 x86opnd_t stack_top = ctx_stack_push(ctx, val_type);
1191
1192 if (SPECIAL_CONST_P(arg)) {
1193 // Immediates will not move and do not need to be tracked for GC
1194 // Thanks to this we can mov directly to memory when possible.
1195
1196 // NOTE: VALUE -> int64_t cast below is implementation defined.
1197 // Hopefully it preserves the the bit pattern or raise a signal.
1198 // See N1256 section 6.3.1.3.
1199 x86opnd_t imm = imm_opnd((int64_t)arg);
1200
1201 // 64-bit immediates can't be directly written to memory
1202 if (imm.num_bits <= 32) {
1203 mov(cb, stack_top, imm);
1204 }
1205 else {
1206 mov(cb, REG0, imm);
1207 mov(cb, stack_top, REG0);
1208 }
1209 }
1210 else {
1211 // Load the value to push into REG0
1212 // Note that this value may get moved by the GC
1213 jit_mov_gc_ptr(jit, cb, REG0, arg);
1214
1215 // Write argument at SP
1216 mov(cb, stack_top, REG0);
1217 }
1218}
1219
1220static codegen_status_t
1221gen_putnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1222{
1223 jit_putobject(jit, ctx, Qnil);
1224 return YJIT_KEEP_COMPILING;
1225}
1226
1227static codegen_status_t
1228gen_putobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1229{
1230 VALUE arg = jit_get_arg(jit, 0);
1231
1232 jit_putobject(jit, ctx, arg);
1233 return YJIT_KEEP_COMPILING;
1234}
1235
1236static codegen_status_t
1237gen_putstring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1238{
1239 VALUE put_val = jit_get_arg(jit, 0);
1240
1241 // Save the PC and SP because the callee will allocate
1242 jit_prepare_routine_call(jit, ctx, REG0);
1243
1244 mov(cb, C_ARG_REGS[0], REG_EC);
1245 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val);
1246 call_ptr(cb, REG0, (void *)rb_ec_str_resurrect);
1247
1248 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_STRING);
1249 mov(cb, stack_top, RAX);
1250
1251 return YJIT_KEEP_COMPILING;
1252}
1253
1254static codegen_status_t
1255gen_putobject_int2fix(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1256{
1257 int opcode = jit_get_opcode(jit);
1258 int cst_val = (opcode == BIN(putobject_INT2FIX_0_))? 0:1;
1259
1260 jit_putobject(jit, ctx, INT2FIX(cst_val));
1261 return YJIT_KEEP_COMPILING;
1262}
1263
1264static codegen_status_t
1265gen_putself(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1266{
1267 // Load self from CFP
1268 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
1269
1270 // Write it on the stack
1271 x86opnd_t stack_top = ctx_stack_push_self(ctx);
1272 mov(cb, stack_top, REG0);
1273
1274 return YJIT_KEEP_COMPILING;
1275}
1276
1277static codegen_status_t
1278gen_putspecialobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1279{
1280 enum vm_special_object_type type = (enum vm_special_object_type)jit_get_arg(jit, 0);
1281
1282 if (type == VM_SPECIAL_OBJECT_VMCORE) {
1283 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_HEAP);
1284 jit_mov_gc_ptr(jit, cb, REG0, rb_mRubyVMFrozenCore);
1285 mov(cb, stack_top, REG0);
1286 return YJIT_KEEP_COMPILING;
1287 }
1288 else {
1289 // TODO: implement for VM_SPECIAL_OBJECT_CBASE and
1290 // VM_SPECIAL_OBJECT_CONST_BASE
1291 return YJIT_CANT_COMPILE;
1292 }
1293}
1294
1295// Get EP at level from CFP
1296static void
1297gen_get_ep(codeblock_t *cb, x86opnd_t reg, uint32_t level)
1298{
1299 // Load environment pointer EP from CFP
1300 mov(cb, reg, member_opnd(REG_CFP, rb_control_frame_t, ep));
1301
1302 while (level--) {
1303 // Get the previous EP from the current EP
1304 // See GET_PREV_EP(ep) macro
1305 // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03))
1306 mov(cb, reg, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
1307 and(cb, reg, imm_opnd(~0x03));
1308 }
1309}
1310
1311// Compute the index of a local variable from its slot index
1312static uint32_t
1313slot_to_local_idx(const rb_iseq_t *iseq, int32_t slot_idx)
1314{
1315 // Convoluted rules from local_var_name() in iseq.c
1316 int32_t local_table_size = iseq->body->local_table_size;
1317 int32_t op = slot_idx - VM_ENV_DATA_SIZE;
1318 int32_t local_idx = local_idx = local_table_size - op - 1;
1319 RUBY_ASSERT(local_idx >= 0 && local_idx < local_table_size);
1320 return (uint32_t)local_idx;
1321}
1322
1323static codegen_status_t
1324gen_getlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1325{
1326 // Compute the offset from BP to the local
1327 int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
1328 const int32_t offs = -(SIZEOF_VALUE * slot_idx);
1329 uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
1330
1331 // Load environment pointer EP (level 0) from CFP
1332 gen_get_ep(cb, REG0, 0);
1333
1334 // Load the local from the EP
1335 mov(cb, REG0, mem_opnd(64, REG0, offs));
1336
1337 // Write the local at SP
1338 x86opnd_t stack_top = ctx_stack_push_local(ctx, local_idx);
1339 mov(cb, stack_top, REG0);
1340
1341 return YJIT_KEEP_COMPILING;
1342}
1343
1344static codegen_status_t
1345gen_getlocal_generic(ctx_t *ctx, uint32_t local_idx, uint32_t level)
1346{
1347 gen_get_ep(cb, REG0, level);
1348
1349 // Load the local from the block
1350 // val = *(vm_get_ep(GET_EP(), level) - idx);
1351 const int32_t offs = -(SIZEOF_VALUE * local_idx);
1352 mov(cb, REG0, mem_opnd(64, REG0, offs));
1353
1354 // Write the local at SP
1355 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
1356 mov(cb, stack_top, REG0);
1357
1358 return YJIT_KEEP_COMPILING;
1359}
1360
1361static codegen_status_t
1362gen_getlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1363{
1364 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1365 int32_t level = (int32_t)jit_get_arg(jit, 1);
1366 return gen_getlocal_generic(ctx, idx, level);
1367}
1368
1369static codegen_status_t
1370gen_getlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1371{
1372 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1373 return gen_getlocal_generic(ctx, idx, 1);
1374}
1375
1376static codegen_status_t
1377gen_setlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1378{
1379 /*
1380 vm_env_write(const VALUE *ep, int index, VALUE v)
1381 {
1382 VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS];
1383 if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) {
1384 VM_STACK_ENV_WRITE(ep, index, v);
1385 }
1386 else {
1387 vm_env_write_slowpath(ep, index, v);
1388 }
1389 }
1390 */
1391
1392 int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
1393 uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
1394
1395 // Load environment pointer EP (level 0) from CFP
1396 gen_get_ep(cb, REG0, 0);
1397
1398 // flags & VM_ENV_FLAG_WB_REQUIRED
1399 x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
1400 test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
1401
1402 // Create a side-exit to fall back to the interpreter
1403 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1404
1405 // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
1406 jnz_ptr(cb, side_exit);
1407
1408 // Set the type of the local variable in the context
1409 val_type_t temp_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1410 ctx_set_local_type(ctx, local_idx, temp_type);
1411
1412 // Pop the value to write from the stack
1413 x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
1414 mov(cb, REG1, stack_top);
1415
1416 // Write the value at the environment pointer
1417 const int32_t offs = -8 * slot_idx;
1418 mov(cb, mem_opnd(64, REG0, offs), REG1);
1419
1420 return YJIT_KEEP_COMPILING;
1421}
1422
1423// Push Qtrue or Qfalse depending on whether the given keyword was supplied by
1424// the caller
1425static codegen_status_t
1426gen_checkkeyword(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1427{
1428 // When a keyword is unspecified past index 32, a hash will be used
1429 // instead. This can only happen in iseqs taking more than 32 keywords.
1430 if (jit->iseq->body->param.keyword->num >= 32) {
1431 return YJIT_CANT_COMPILE;
1432 }
1433
1434 // The EP offset to the undefined bits local
1435 int32_t bits_offset = (int32_t)jit_get_arg(jit, 0);
1436
1437 // The index of the keyword we want to check
1438 int32_t index = (int32_t)jit_get_arg(jit, 1);
1439
1440 // Load environment pointer EP
1441 gen_get_ep(cb, REG0, 0);
1442
1443 // VALUE kw_bits = *(ep - bits);
1444 x86opnd_t bits_opnd = mem_opnd(64, REG0, sizeof(VALUE) * -bits_offset);
1445
1446 // unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
1447 // if ((b & (0x01 << idx))) {
1448 //
1449 // We can skip the FIX2ULONG conversion by shifting the bit we test
1450 int64_t bit_test = 0x01 << (index + 1);
1451 test(cb, bits_opnd, imm_opnd(bit_test));
1452 mov(cb, REG0, imm_opnd(Qfalse));
1453 mov(cb, REG1, imm_opnd(Qtrue));
1454 cmovz(cb, REG0, REG1);
1455
1456 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
1457 mov(cb, stack_ret, REG0);
1458
1459 return YJIT_KEEP_COMPILING;
1460}
1461
1462static codegen_status_t
1463gen_setlocal_generic(jitstate_t *jit, ctx_t *ctx, uint32_t local_idx, uint32_t level)
1464{
1465 // Load environment pointer EP at level
1466 gen_get_ep(cb, REG0, level);
1467
1468 // flags & VM_ENV_FLAG_WB_REQUIRED
1469 x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
1470 test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
1471
1472 // Create a side-exit to fall back to the interpreter
1473 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1474
1475 // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
1476 jnz_ptr(cb, side_exit);
1477
1478 // Pop the value to write from the stack
1479 x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
1480 mov(cb, REG1, stack_top);
1481
1482 // Write the value at the environment pointer
1483 const int32_t offs = -(SIZEOF_VALUE * local_idx);
1484 mov(cb, mem_opnd(64, REG0, offs), REG1);
1485
1486 return YJIT_KEEP_COMPILING;
1487}
1488
1489static codegen_status_t
1490gen_setlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1491{
1492 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1493 int32_t level = (int32_t)jit_get_arg(jit, 1);
1494 return gen_setlocal_generic(jit, ctx, idx, level);
1495}
1496
1497static codegen_status_t
1498gen_setlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1499{
1500 int32_t idx = (int32_t)jit_get_arg(jit, 0);
1501 return gen_setlocal_generic(jit, ctx, idx, 1);
1502}
1503
1504static void
1505gen_jnz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1506{
1507 switch (shape) {
1508 case SHAPE_NEXT0:
1509 case SHAPE_NEXT1:
1510 RUBY_ASSERT(false);
1511 break;
1512
1513 case SHAPE_DEFAULT:
1514 jnz_ptr(cb, target0);
1515 break;
1516 }
1517}
1518
1519static void
1520gen_jz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1521{
1522 switch (shape) {
1523 case SHAPE_NEXT0:
1524 case SHAPE_NEXT1:
1525 RUBY_ASSERT(false);
1526 break;
1527
1528 case SHAPE_DEFAULT:
1529 jz_ptr(cb, target0);
1530 break;
1531 }
1532}
1533
1534static void
1535gen_jbe_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
1536{
1537 switch (shape) {
1538 case SHAPE_NEXT0:
1539 case SHAPE_NEXT1:
1540 RUBY_ASSERT(false);
1541 break;
1542
1543 case SHAPE_DEFAULT:
1544 jbe_ptr(cb, target0);
1545 break;
1546 }
1547}
1548
1549enum jcc_kinds {
1550 JCC_JNE,
1551 JCC_JNZ,
1552 JCC_JZ,
1553 JCC_JE,
1554 JCC_JBE,
1555 JCC_JNA,
1556};
1557
1558// Generate a jump to a stub that recompiles the current YARV instruction on failure.
1559// When depth_limitk is exceeded, generate a jump to a side exit.
1560static void
1561jit_chain_guard(enum jcc_kinds jcc, jitstate_t *jit, const ctx_t *ctx, uint8_t depth_limit, uint8_t *side_exit)
1562{
1563 branchgen_fn target0_gen_fn;
1564
1565 switch (jcc) {
1566 case JCC_JNE:
1567 case JCC_JNZ:
1568 target0_gen_fn = gen_jnz_to_target0;
1569 break;
1570 case JCC_JZ:
1571 case JCC_JE:
1572 target0_gen_fn = gen_jz_to_target0;
1573 break;
1574 case JCC_JBE:
1575 case JCC_JNA:
1576 target0_gen_fn = gen_jbe_to_target0;
1577 break;
1578 default:
1579 rb_bug("yjit: unimplemented jump kind");
1580 break;
1581 };
1582
1583 if (ctx->chain_depth < depth_limit) {
1584 ctx_t deeper = *ctx;
1585 deeper.chain_depth++;
1586
1587 gen_branch(
1588 jit,
1589 ctx,
1590 (blockid_t) { jit->iseq, jit->insn_idx },
1591 &deeper,
1592 BLOCKID_NULL,
1593 NULL,
1594 target0_gen_fn
1595 );
1596 }
1597 else {
1598 target0_gen_fn(cb, side_exit, NULL, SHAPE_DEFAULT);
1599 }
1600}
1601
1602enum {
1603 GETIVAR_MAX_DEPTH = 10, // up to 5 different classes, and embedded or not for each
1604 OPT_AREF_MAX_CHAIN_DEPTH = 2, // hashes and arrays
1605 SEND_MAX_DEPTH = 5, // up to 5 different classes
1606};
1607
1608VALUE rb_vm_set_ivar_idx(VALUE obj, uint32_t idx, VALUE val);
1609
1610// Codegen for setting an instance variable.
1611// Preconditions:
1612// - receiver is in REG0
1613// - receiver has the same class as CLASS_OF(comptime_receiver)
1614// - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
1615static codegen_status_t
1616gen_set_ivar(jitstate_t *jit, ctx_t *ctx, VALUE recv, VALUE klass, ID ivar_name)
1617{
1618 // Save the PC and SP because the callee may allocate
1619 // Note that this modifies REG_SP, which is why we do it first
1620 jit_prepare_routine_call(jit, ctx, REG0);
1621
1622 // Get the operands from the stack
1623 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
1624 x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
1625
1626 uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(recv, ivar_name);
1627
1628 // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value
1629 mov(cb, C_ARG_REGS[0], recv_opnd);
1630 mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index));
1631 mov(cb, C_ARG_REGS[2], val_opnd);
1632 call_ptr(cb, REG0, (void *)rb_vm_set_ivar_idx);
1633
1634 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1635 mov(cb, out_opnd, RAX);
1636
1637 return YJIT_KEEP_COMPILING;
1638}
1639
1640// Codegen for getting an instance variable.
1641// Preconditions:
1642// - receiver is in REG0
1643// - receiver has the same class as CLASS_OF(comptime_receiver)
1644// - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
1645static codegen_status_t
1646gen_get_ivar(jitstate_t *jit, ctx_t *ctx, const int max_chain_depth, VALUE comptime_receiver, ID ivar_name, insn_opnd_t reg0_opnd, uint8_t *side_exit)
1647{
1648 VALUE comptime_val_klass = CLASS_OF(comptime_receiver);
1649 const ctx_t starting_context = *ctx; // make a copy for use with jit_chain_guard
1650
1651 // If the class uses the default allocator, instances should all be T_OBJECT
1652 // NOTE: This assumes nobody changes the allocator of the class after allocation.
1653 // Eventually, we can encode whether an object is T_OBJECT or not
1654 // inside object shapes.
1655 if (!RB_TYPE_P(comptime_receiver, T_OBJECT) ||
1656 rb_get_alloc_func(comptime_val_klass) != rb_class_allocate_instance) {
1657 // General case. Call rb_ivar_get().
1658 // VALUE rb_ivar_get(VALUE obj, ID id)
1659 ADD_COMMENT(cb, "call rb_ivar_get()");
1660
1661 // The function could raise exceptions.
1662 jit_prepare_routine_call(jit, ctx, REG1);
1663
1664 mov(cb, C_ARG_REGS[0], REG0);
1665 mov(cb, C_ARG_REGS[1], imm_opnd((int64_t)ivar_name));
1666 call_ptr(cb, REG1, (void *)rb_ivar_get);
1667
1668 if (!reg0_opnd.is_self) {
1669 (void)ctx_stack_pop(ctx, 1);
1670 }
1671 // Push the ivar on the stack
1672 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1673 mov(cb, out_opnd, RAX);
1674
1675 // Jump to next instruction. This allows guard chains to share the same successor.
1676 jit_jump_to_next_insn(jit, ctx);
1677 return YJIT_END_BLOCK;
1678 }
1679
1680 /*
1681 // FIXME:
1682 // This check was added because of a failure in a test involving the
1683 // Nokogiri Document class where we see a T_DATA that still has the default
1684 // allocator.
1685 // Aaron Patterson argues that this is a bug in the C extension, because
1686 // people could call .allocate() on the class and still get a T_OBJECT
1687 // For now I added an extra dynamic check that the receiver is T_OBJECT
1688 // so we can safely pass all the tests in Shopify Core.
1689 //
1690 // Guard that the receiver is T_OBJECT
1691 // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK)
1692 ADD_COMMENT(cb, "guard receiver is T_OBJECT");
1693 mov(cb, REG1, member_opnd(REG0, struct RBasic, flags));
1694 and(cb, REG1, imm_opnd(RUBY_T_MASK));
1695 cmp(cb, REG1, imm_opnd(T_OBJECT));
1696 jit_chain_guard(JCC_JNE, jit, &starting_context, max_chain_depth, side_exit);
1697 */
1698
1699 // FIXME: Mapping the index could fail when there is too many ivar names. If we're
1700 // compiling for a branch stub that can cause the exception to be thrown from the
1701 // wrong PC.
1702 uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name);
1703
1704 // Pop receiver if it's on the temp stack
1705 if (!reg0_opnd.is_self) {
1706 (void)ctx_stack_pop(ctx, 1);
1707 }
1708
1709 // Compile time self is embedded and the ivar index lands within the object
1710 if (RB_FL_TEST_RAW(comptime_receiver, ROBJECT_EMBED) && ivar_index < ROBJECT_EMBED_LEN_MAX) {
1711 // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
1712
1713 // Guard that self is embedded
1714 // TODO: BT and JC is shorter
1715 ADD_COMMENT(cb, "guard embedded getivar");
1716 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1717 test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
1718 jit_chain_guard(JCC_JZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
1719
1720 // Load the variable
1721 x86opnd_t ivar_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.ary) + ivar_index * SIZEOF_VALUE);
1722 mov(cb, REG1, ivar_opnd);
1723
1724 // Guard that the variable is not Qundef
1725 cmp(cb, REG1, imm_opnd(Qundef));
1726 mov(cb, REG0, imm_opnd(Qnil));
1727 cmove(cb, REG1, REG0);
1728
1729 // Push the ivar on the stack
1730 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1731 mov(cb, out_opnd, REG1);
1732 }
1733 else {
1734 // Compile time value is *not* embedded.
1735
1736 // Guard that value is *not* embedded
1737 // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
1738 ADD_COMMENT(cb, "guard extended getivar");
1739 x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
1740 test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
1741 jit_chain_guard(JCC_JNZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
1742
1743 // check that the extended table is big enough
1744 if (ivar_index >= ROBJECT_EMBED_LEN_MAX + 1) {
1745 // Check that the slot is inside the extended table (num_slots > index)
1746 x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv));
1747 cmp(cb, num_slots, imm_opnd(ivar_index));
1748 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, getivar_idx_out_of_range));
1749 }
1750
1751 // Get a pointer to the extended table
1752 x86opnd_t tbl_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.heap.ivptr));
1753 mov(cb, REG0, tbl_opnd);
1754
1755 // Read the ivar from the extended table
1756 x86opnd_t ivar_opnd = mem_opnd(64, REG0, sizeof(VALUE) * ivar_index);
1757 mov(cb, REG0, ivar_opnd);
1758
1759 // Check that the ivar is not Qundef
1760 cmp(cb, REG0, imm_opnd(Qundef));
1761 mov(cb, REG1, imm_opnd(Qnil));
1762 cmove(cb, REG0, REG1);
1763
1764 // Push the ivar on the stack
1765 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
1766 mov(cb, out_opnd, REG0);
1767 }
1768
1769 // Jump to next instruction. This allows guard chains to share the same successor.
1770 jit_jump_to_next_insn(jit, ctx);
1771 return YJIT_END_BLOCK;
1772}
1773
1774static codegen_status_t
1775gen_getinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1776{
1777 // Defer compilation so we can specialize on a runtime `self`
1778 if (!jit_at_current_insn(jit)) {
1779 defer_compilation(jit, ctx);
1780 return YJIT_END_BLOCK;
1781 }
1782
1783 ID ivar_name = (ID)jit_get_arg(jit, 0);
1784
1785 VALUE comptime_val = jit_peek_at_self(jit, ctx);
1786 VALUE comptime_val_klass = CLASS_OF(comptime_val);
1787
1788 // Generate a side exit
1789 uint8_t *side_exit = yjit_side_exit(jit, ctx);
1790
1791 // Guard that the receiver has the same class as the one from compile time.
1792 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
1793
1794 jit_guard_known_klass(jit, ctx, comptime_val_klass, OPND_SELF, comptime_val, GETIVAR_MAX_DEPTH, side_exit);
1795
1796 return gen_get_ivar(jit, ctx, GETIVAR_MAX_DEPTH, comptime_val, ivar_name, OPND_SELF, side_exit);
1797}
1798
1799void rb_vm_setinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, VALUE val, IVC ic);
1800
1801static codegen_status_t
1802gen_setinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1803{
1804 ID id = (ID)jit_get_arg(jit, 0);
1805 IVC ic = (IVC)jit_get_arg(jit, 1);
1806
1807 // Save the PC and SP because the callee may allocate
1808 // Note that this modifies REG_SP, which is why we do it first
1809 jit_prepare_routine_call(jit, ctx, REG0);
1810
1811 // Get the operands from the stack
1812 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
1813
1814 // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
1815 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
1816 mov(cb, C_ARG_REGS[3], val_opnd);
1817 mov(cb, C_ARG_REGS[2], imm_opnd(id));
1818 mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic));
1819 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], (VALUE)jit->iseq);
1820 call_ptr(cb, REG0, (void *)rb_vm_setinstancevariable);
1821
1822 return YJIT_KEEP_COMPILING;
1823}
1824
1825bool rb_vm_defined(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, rb_num_t op_type, VALUE obj, VALUE v);
1826
1827static codegen_status_t
1828gen_defined(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1829{
1830 rb_num_t op_type = (rb_num_t)jit_get_arg(jit, 0);
1831 VALUE obj = (VALUE)jit_get_arg(jit, 1);
1832 VALUE pushval = (VALUE)jit_get_arg(jit, 2);
1833
1834 // Save the PC and SP because the callee may allocate
1835 // Note that this modifies REG_SP, which is why we do it first
1836 jit_prepare_routine_call(jit, ctx, REG0);
1837
1838 // Get the operands from the stack
1839 x86opnd_t v_opnd = ctx_stack_pop(ctx, 1);
1840
1841 // Call vm_defined(ec, reg_cfp, op_type, obj, v)
1842 mov(cb, C_ARG_REGS[0], REG_EC);
1843 mov(cb, C_ARG_REGS[1], REG_CFP);
1844 mov(cb, C_ARG_REGS[2], imm_opnd(op_type));
1845 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)obj);
1846 mov(cb, C_ARG_REGS[4], v_opnd);
1847 call_ptr(cb, REG0, (void *)rb_vm_defined);
1848
1849 // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
1850 // val = pushval;
1851 // }
1852 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)pushval);
1853 cmp(cb, AL, imm_opnd(0));
1854 mov(cb, RAX, imm_opnd(Qnil));
1855 cmovnz(cb, RAX, REG1);
1856
1857 // Push the return value onto the stack
1858 val_type_t out_type = SPECIAL_CONST_P(pushval)? TYPE_IMM:TYPE_UNKNOWN;
1859 x86opnd_t stack_ret = ctx_stack_push(ctx, out_type);
1860 mov(cb, stack_ret, RAX);
1861
1862 return YJIT_KEEP_COMPILING;
1863}
1864
1865static codegen_status_t
1866gen_checktype(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1867{
1868 enum ruby_value_type type_val = (enum ruby_value_type)jit_get_arg(jit, 0);
1869 // Only three types are emitted by compile.c
1870 if (type_val == T_STRING || type_val == T_ARRAY || type_val == T_HASH) {
1871 val_type_t val_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1872 x86opnd_t val = ctx_stack_pop(ctx, 1);
1873
1874 x86opnd_t stack_ret;
1875
1876 // Check if we know from type information
1877 if ((type_val == T_STRING && val_type.type == ETYPE_STRING) ||
1878 (type_val == T_ARRAY && val_type.type == ETYPE_ARRAY) ||
1879 (type_val == T_HASH && val_type.type == ETYPE_HASH)) {
1880 // guaranteed type match
1881 stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
1882 mov(cb, stack_ret, imm_opnd(Qtrue));
1883 return YJIT_KEEP_COMPILING;
1884 }
1885 else if (val_type.is_imm || val_type.type != ETYPE_UNKNOWN) {
1886 // guaranteed not to match T_STRING/T_ARRAY/T_HASH
1887 stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
1888 mov(cb, stack_ret, imm_opnd(Qfalse));
1889 return YJIT_KEEP_COMPILING;
1890 }
1891
1892 mov(cb, REG0, val);
1893 mov(cb, REG1, imm_opnd(Qfalse));
1894
1895 uint32_t ret = cb_new_label(cb, "ret");
1896
1897 if (!val_type.is_heap) {
1898 // if (SPECIAL_CONST_P(val)) {
1899 // Return Qfalse via REG1 if not on heap
1900 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
1901 jnz_label(cb, ret);
1902 cmp(cb, REG0, imm_opnd(Qnil));
1903 jbe_label(cb, ret);
1904 }
1905
1906 // Check type on object
1907 mov(cb, REG0, mem_opnd(64, REG0, offsetof(struct RBasic, flags)));
1908 and(cb, REG0, imm_opnd(RUBY_T_MASK));
1909 cmp(cb, REG0, imm_opnd(type_val));
1910 mov(cb, REG0, imm_opnd(Qtrue));
1911 // REG1 contains Qfalse from above
1912 cmove(cb, REG1, REG0);
1913
1914 cb_write_label(cb, ret);
1915 stack_ret = ctx_stack_push(ctx, TYPE_IMM);
1916 mov(cb, stack_ret, REG1);
1917 cb_link_labels(cb);
1918
1919 return YJIT_KEEP_COMPILING;
1920 }
1921 else {
1922 return YJIT_CANT_COMPILE;
1923 }
1924}
1925
1926static codegen_status_t
1927gen_concatstrings(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
1928{
1929 rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
1930
1931 // Save the PC and SP because we are allocating
1932 jit_prepare_routine_call(jit, ctx, REG0);
1933
1934 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)n));
1935
1936 // call rb_str_concat_literals(long n, const VALUE *strings);
1937 mov(cb, C_ARG_REGS[0], imm_opnd(n));
1938 lea(cb, C_ARG_REGS[1], values_ptr);
1939 call_ptr(cb, REG0, (void *)rb_str_concat_literals);
1940
1941 ctx_stack_pop(ctx, n);
1942 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
1943 mov(cb, stack_ret, RAX);
1944
1945 return YJIT_KEEP_COMPILING;
1946}
1947
1948static void
1949guard_two_fixnums(ctx_t *ctx, uint8_t *side_exit)
1950{
1951 // Get the stack operand types
1952 val_type_t arg1_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
1953 val_type_t arg0_type = ctx_get_opnd_type(ctx, OPND_STACK(1));
1954
1955 if (arg0_type.is_heap || arg1_type.is_heap) {
1956 jmp_ptr(cb, side_exit);
1957 return;
1958 }
1959
1960 if (arg0_type.type != ETYPE_FIXNUM && arg0_type.type != ETYPE_UNKNOWN) {
1961 jmp_ptr(cb, side_exit);
1962 return;
1963 }
1964
1965 if (arg1_type.type != ETYPE_FIXNUM && arg1_type.type != ETYPE_UNKNOWN) {
1966 jmp_ptr(cb, side_exit);
1967 return;
1968 }
1969
1970 RUBY_ASSERT(!arg0_type.is_heap);
1971 RUBY_ASSERT(!arg1_type.is_heap);
1972 RUBY_ASSERT(arg0_type.type == ETYPE_FIXNUM || arg0_type.type == ETYPE_UNKNOWN);
1973 RUBY_ASSERT(arg1_type.type == ETYPE_FIXNUM || arg1_type.type == ETYPE_UNKNOWN);
1974
1975 // Get stack operands without popping them
1976 x86opnd_t arg1 = ctx_stack_opnd(ctx, 0);
1977 x86opnd_t arg0 = ctx_stack_opnd(ctx, 1);
1978
1979 // If not fixnums, fall back
1980 if (arg0_type.type != ETYPE_FIXNUM) {
1981 ADD_COMMENT(cb, "guard arg0 fixnum");
1982 test(cb, arg0, imm_opnd(RUBY_FIXNUM_FLAG));
1983 jz_ptr(cb, side_exit);
1984 }
1985 if (arg1_type.type != ETYPE_FIXNUM) {
1986 ADD_COMMENT(cb, "guard arg1 fixnum");
1987 test(cb, arg1, imm_opnd(RUBY_FIXNUM_FLAG));
1988 jz_ptr(cb, side_exit);
1989 }
1990
1991 // Set stack types in context
1992 ctx_upgrade_opnd_type(ctx, OPND_STACK(0), TYPE_FIXNUM);
1993 ctx_upgrade_opnd_type(ctx, OPND_STACK(1), TYPE_FIXNUM);
1994}
1995
1996// Conditional move operation used by comparison operators
1997typedef void (*cmov_fn)(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
1998
1999static codegen_status_t
2000gen_fixnum_cmp(jitstate_t *jit, ctx_t *ctx, cmov_fn cmov_op)
2001{
2002 // Defer compilation so we can specialize base on a runtime receiver
2003 if (!jit_at_current_insn(jit)) {
2004 defer_compilation(jit, ctx);
2005 return YJIT_END_BLOCK;
2006 }
2007
2008 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2009 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2010
2011 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2012 // Create a side-exit to fall back to the interpreter
2013 // Note: we generate the side-exit before popping operands from the stack
2014 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2015
2016 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_LT)) {
2017 return YJIT_CANT_COMPILE;
2018 }
2019
2020 // Check that both operands are fixnums
2021 guard_two_fixnums(ctx, side_exit);
2022
2023 // Get the operands from the stack
2024 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2025 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2026
2027 // Compare the arguments
2028 xor(cb, REG0_32, REG0_32); // REG0 = Qfalse
2029 mov(cb, REG1, arg0);
2030 cmp(cb, REG1, arg1);
2031 mov(cb, REG1, imm_opnd(Qtrue));
2032 cmov_op(cb, REG0, REG1);
2033
2034 // Push the output on the stack
2035 x86opnd_t dst = ctx_stack_push(ctx, TYPE_UNKNOWN);
2036 mov(cb, dst, REG0);
2037
2038 return YJIT_KEEP_COMPILING;
2039 }
2040 else {
2041 return gen_opt_send_without_block(jit, ctx, cb);
2042 }
2043}
2044
2045static codegen_status_t
2046gen_opt_lt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2047{
2048 return gen_fixnum_cmp(jit, ctx, cmovl);
2049}
2050
2051static codegen_status_t
2052gen_opt_le(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2053{
2054 return gen_fixnum_cmp(jit, ctx, cmovle);
2055}
2056
2057static codegen_status_t
2058gen_opt_ge(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2059{
2060 return gen_fixnum_cmp(jit, ctx, cmovge);
2061}
2062
2063static codegen_status_t
2064gen_opt_gt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2065{
2066 return gen_fixnum_cmp(jit, ctx, cmovg);
2067}
2068
2069// Implements specialized equality for either two fixnum or two strings
2070// Returns true if code was generated, otherwise false
2071static bool
2072gen_equality_specialized(jitstate_t *jit, ctx_t *ctx, uint8_t *side_exit)
2073{
2074 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2075 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2076
2077 x86opnd_t a_opnd = ctx_stack_opnd(ctx, 1);
2078 x86opnd_t b_opnd = ctx_stack_opnd(ctx, 0);
2079
2080 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2081 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_EQ)) {
2082 // if overridden, emit the generic version
2083 return false;
2084 }
2085
2086 guard_two_fixnums(ctx, side_exit);
2087
2088 mov(cb, REG0, a_opnd);
2089 cmp(cb, REG0, b_opnd);
2090
2091 mov(cb, REG0, imm_opnd(Qfalse));
2092 mov(cb, REG1, imm_opnd(Qtrue));
2093 cmove(cb, REG0, REG1);
2094
2095 // Push the output on the stack
2096 ctx_stack_pop(ctx, 2);
2097 x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
2098 mov(cb, dst, REG0);
2099
2100 return true;
2101 }
2102 else if (CLASS_OF(comptime_a) == rb_cString &&
2103 CLASS_OF(comptime_b) == rb_cString) {
2104 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_EQ)) {
2105 // if overridden, emit the generic version
2106 return false;
2107 }
2108
2109 // Load a and b in preparation for call later
2110 mov(cb, C_ARG_REGS[0], a_opnd);
2111 mov(cb, C_ARG_REGS[1], b_opnd);
2112
2113 // Guard that a is a String
2114 mov(cb, REG0, C_ARG_REGS[0]);
2115 jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(1), comptime_a, SEND_MAX_DEPTH, side_exit);
2116
2117 uint32_t ret = cb_new_label(cb, "ret");
2118
2119 // If they are equal by identity, return true
2120 cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]);
2121 mov(cb, RAX, imm_opnd(Qtrue));
2122 je_label(cb, ret);
2123
2124 // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard)
2125 if (ctx_get_opnd_type(ctx, OPND_STACK(0)).type != ETYPE_STRING) {
2126 mov(cb, REG0, C_ARG_REGS[1]);
2127 // Note: any T_STRING is valid here, but we check for a ::String for simplicity
2128 jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(0), comptime_b, SEND_MAX_DEPTH, side_exit);
2129 }
2130
2131 // Call rb_str_eql_internal(a, b)
2132 call_ptr(cb, REG0, (void *)rb_str_eql_internal);
2133
2134 // Push the output on the stack
2135 cb_write_label(cb, ret);
2136 ctx_stack_pop(ctx, 2);
2137 x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
2138 mov(cb, dst, RAX);
2139 cb_link_labels(cb);
2140
2141 return true;
2142 }
2143 else {
2144 return false;
2145 }
2146}
2147
2148static codegen_status_t
2149gen_opt_eq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2150{
2151 // Defer compilation so we can specialize base on a runtime receiver
2152 if (!jit_at_current_insn(jit)) {
2153 defer_compilation(jit, ctx);
2154 return YJIT_END_BLOCK;
2155 }
2156
2157 // Create a side-exit to fall back to the interpreter
2158 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2159
2160 if (gen_equality_specialized(jit, ctx, side_exit)) {
2161 jit_jump_to_next_insn(jit, ctx);
2162 return YJIT_END_BLOCK;
2163 }
2164 else {
2165 return gen_opt_send_without_block(jit, ctx, cb);
2166 }
2167}
2168
2169static codegen_status_t gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block);
2170
2171static codegen_status_t
2172gen_opt_neq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2173{
2174 // opt_neq is passed two rb_call_data as arguments:
2175 // first for ==, second for !=
2176 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 1);
2177 return gen_send_general(jit, ctx, cd, NULL);
2178}
2179
2180static codegen_status_t
2181gen_opt_aref(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2182{
2183 struct rb_call_data * cd = (struct rb_call_data *)jit_get_arg(jit, 0);
2184 int32_t argc = (int32_t)vm_ci_argc(cd->ci);
2185
2186 // Only JIT one arg calls like `ary[6]`
2187 if (argc != 1) {
2188 GEN_COUNTER_INC(cb, oaref_argc_not_one);
2189 return YJIT_CANT_COMPILE;
2190 }
2191
2192 // Defer compilation so we can specialize base on a runtime receiver
2193 if (!jit_at_current_insn(jit)) {
2194 defer_compilation(jit, ctx);
2195 return YJIT_END_BLOCK;
2196 }
2197
2198 // Remember the context on entry for adding guard chains
2199 const ctx_t starting_context = *ctx;
2200
2201 // Specialize base on compile time values
2202 VALUE comptime_idx = jit_peek_at_stack(jit, ctx, 0);
2203 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 1);
2204
2205 // Create a side-exit to fall back to the interpreter
2206 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2207
2208 if (CLASS_OF(comptime_recv) == rb_cArray && RB_FIXNUM_P(comptime_idx)) {
2209 if (!assume_bop_not_redefined(jit, ARRAY_REDEFINED_OP_FLAG, BOP_AREF)) {
2210 return YJIT_CANT_COMPILE;
2211 }
2212
2213 // Pop the stack operands
2214 x86opnd_t idx_opnd = ctx_stack_pop(ctx, 1);
2215 x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
2216 mov(cb, REG0, recv_opnd);
2217
2218 // if (SPECIAL_CONST_P(recv)) {
2219 // Bail if receiver is not a heap object
2220 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
2221 jnz_ptr(cb, side_exit);
2222 cmp(cb, REG0, imm_opnd(Qfalse));
2223 je_ptr(cb, side_exit);
2224 cmp(cb, REG0, imm_opnd(Qnil));
2225 je_ptr(cb, side_exit);
2226
2227 // Bail if recv has a class other than ::Array.
2228 // BOP_AREF check above is only good for ::Array.
2229 mov(cb, REG1, mem_opnd(64, REG0, offsetof(struct RBasic, klass)));
2230 mov(cb, REG0, const_ptr_opnd((void *)rb_cArray));
2231 cmp(cb, REG0, REG1);
2232 jit_chain_guard(JCC_JNE, jit, &starting_context, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
2233
2234 // Bail if idx is not a FIXNUM
2235 mov(cb, REG1, idx_opnd);
2236 test(cb, REG1, imm_opnd(RUBY_FIXNUM_FLAG));
2237 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, oaref_arg_not_fixnum));
2238
2239 // Call VALUE rb_ary_entry_internal(VALUE ary, long offset).
2240 // It never raises or allocates, so we don't need to write to cfp->pc.
2241 {
2242 mov(cb, RDI, recv_opnd);
2243 sar(cb, REG1, imm_opnd(1)); // Convert fixnum to int
2244 mov(cb, RSI, REG1);
2245 call_ptr(cb, REG0, (void *)rb_ary_entry_internal);
2246
2247 // Push the return value onto the stack
2248 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2249 mov(cb, stack_ret, RAX);
2250 }
2251
2252 // Jump to next instruction. This allows guard chains to share the same successor.
2253 jit_jump_to_next_insn(jit, ctx);
2254 return YJIT_END_BLOCK;
2255 }
2256 else if (CLASS_OF(comptime_recv) == rb_cHash) {
2257 if (!assume_bop_not_redefined(jit, HASH_REDEFINED_OP_FLAG, BOP_AREF)) {
2258 return YJIT_CANT_COMPILE;
2259 }
2260
2261 x86opnd_t key_opnd = ctx_stack_opnd(ctx, 0);
2262 x86opnd_t recv_opnd = ctx_stack_opnd(ctx, 1);
2263
2264 // Guard that the receiver is a hash
2265 mov(cb, REG0, recv_opnd);
2266 jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(1), comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
2267
2268 // Setup arguments for rb_hash_aref().
2269 mov(cb, C_ARG_REGS[0], REG0);
2270 mov(cb, C_ARG_REGS[1], key_opnd);
2271
2272 // Prepare to call rb_hash_aref(). It might call #hash on the key.
2273 jit_prepare_routine_call(jit, ctx, REG0);
2274
2275 call_ptr(cb, REG0, (void *)rb_hash_aref);
2276
2277 // Pop the key and the receiver
2278 (void)ctx_stack_pop(ctx, 2);
2279
2280 // Push the return value onto the stack
2281 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2282 mov(cb, stack_ret, RAX);
2283
2284 // Jump to next instruction. This allows guard chains to share the same successor.
2285 jit_jump_to_next_insn(jit, ctx);
2286 return YJIT_END_BLOCK;
2287 }
2288 else {
2289 // General case. Call the [] method.
2290 return gen_opt_send_without_block(jit, ctx, cb);
2291 }
2292}
2293
2294static codegen_status_t
2295gen_opt_aset(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2296{
2297 // Defer compilation so we can specialize on a runtime `self`
2298 if (!jit_at_current_insn(jit)) {
2299 defer_compilation(jit, ctx);
2300 return YJIT_END_BLOCK;
2301 }
2302
2303 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 2);
2304 VALUE comptime_key = jit_peek_at_stack(jit, ctx, 1);
2305
2306 // Get the operands from the stack
2307 x86opnd_t recv = ctx_stack_opnd(ctx, 2);
2308 x86opnd_t key = ctx_stack_opnd(ctx, 1);
2309 x86opnd_t val = ctx_stack_opnd(ctx, 0);
2310
2311 if (CLASS_OF(comptime_recv) == rb_cArray && FIXNUM_P(comptime_key)) {
2312 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2313
2314 // Guard receiver is an Array
2315 mov(cb, REG0, recv);
2316 jit_guard_known_klass(jit, ctx, rb_cArray, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
2317
2318 // Guard key is a fixnum
2319 mov(cb, REG0, key);
2320 jit_guard_known_klass(jit, ctx, rb_cInteger, OPND_STACK(1), comptime_key, SEND_MAX_DEPTH, side_exit);
2321
2322 // Call rb_ary_store
2323 mov(cb, C_ARG_REGS[0], recv);
2324 mov(cb, C_ARG_REGS[1], key);
2325 sar(cb, C_ARG_REGS[1], imm_opnd(1)); // FIX2LONG(key)
2326 mov(cb, C_ARG_REGS[2], val);
2327
2328 // We might allocate or raise
2329 jit_prepare_routine_call(jit, ctx, REG0);
2330
2331 call_ptr(cb, REG0, (void *)rb_ary_store);
2332
2333 // rb_ary_store returns void
2334 // stored value should still be on stack
2335 mov(cb, REG0, ctx_stack_opnd(ctx, 0));
2336
2337 // Push the return value onto the stack
2338 ctx_stack_pop(ctx, 3);
2339 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2340 mov(cb, stack_ret, REG0);
2341
2342 jit_jump_to_next_insn(jit, ctx);
2343 return YJIT_END_BLOCK;
2344 }
2345 else if (CLASS_OF(comptime_recv) == rb_cHash) {
2346 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2347
2348 // Guard receiver is a Hash
2349 mov(cb, REG0, recv);
2350 jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
2351
2352 // Call rb_hash_aset
2353 mov(cb, C_ARG_REGS[0], recv);
2354 mov(cb, C_ARG_REGS[1], key);
2355 mov(cb, C_ARG_REGS[2], val);
2356
2357 // We might allocate or raise
2358 jit_prepare_routine_call(jit, ctx, REG0);
2359
2360 call_ptr(cb, REG0, (void *)rb_hash_aset);
2361
2362 // Push the return value onto the stack
2363 ctx_stack_pop(ctx, 3);
2364 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2365 mov(cb, stack_ret, RAX);
2366
2367 jit_jump_to_next_insn(jit, ctx);
2368 return YJIT_END_BLOCK;
2369 }
2370 else {
2371 return gen_opt_send_without_block(jit, ctx, cb);
2372 }
2373}
2374
2375static codegen_status_t
2376gen_opt_and(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2377{
2378 // Defer compilation so we can specialize on a runtime `self`
2379 if (!jit_at_current_insn(jit)) {
2380 defer_compilation(jit, ctx);
2381 return YJIT_END_BLOCK;
2382 }
2383
2384 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2385 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2386
2387 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2388 // Create a side-exit to fall back to the interpreter
2389 // Note: we generate the side-exit before popping operands from the stack
2390 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2391
2392 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_AND)) {
2393 return YJIT_CANT_COMPILE;
2394 }
2395
2396 // Check that both operands are fixnums
2397 guard_two_fixnums(ctx, side_exit);
2398
2399 // Get the operands and destination from the stack
2400 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2401 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2402
2403 // Do the bitwise and arg0 & arg1
2404 mov(cb, REG0, arg0);
2405 and(cb, REG0, arg1);
2406
2407 // Push the output on the stack
2408 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2409 mov(cb, dst, REG0);
2410
2411 return YJIT_KEEP_COMPILING;
2412 }
2413 else {
2414 // Delegate to send, call the method on the recv
2415 return gen_opt_send_without_block(jit, ctx, cb);
2416 }
2417}
2418
2419static codegen_status_t
2420gen_opt_or(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2421{
2422 // Defer compilation so we can specialize on a runtime `self`
2423 if (!jit_at_current_insn(jit)) {
2424 defer_compilation(jit, ctx);
2425 return YJIT_END_BLOCK;
2426 }
2427
2428 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2429 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2430
2431 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2432 // Create a side-exit to fall back to the interpreter
2433 // Note: we generate the side-exit before popping operands from the stack
2434 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2435
2436 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_OR)) {
2437 return YJIT_CANT_COMPILE;
2438 }
2439
2440 // Check that both operands are fixnums
2441 guard_two_fixnums(ctx, side_exit);
2442
2443 // Get the operands and destination from the stack
2444 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2445 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2446
2447 // Do the bitwise or arg0 | arg1
2448 mov(cb, REG0, arg0);
2449 or(cb, REG0, arg1);
2450
2451 // Push the output on the stack
2452 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2453 mov(cb, dst, REG0);
2454
2455 return YJIT_KEEP_COMPILING;
2456 }
2457 else {
2458 // Delegate to send, call the method on the recv
2459 return gen_opt_send_without_block(jit, ctx, cb);
2460 }
2461}
2462
2463static codegen_status_t
2464gen_opt_minus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2465{
2466 // Defer compilation so we can specialize on a runtime `self`
2467 if (!jit_at_current_insn(jit)) {
2468 defer_compilation(jit, ctx);
2469 return YJIT_END_BLOCK;
2470 }
2471
2472 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2473 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2474
2475 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2476 // Create a side-exit to fall back to the interpreter
2477 // Note: we generate the side-exit before popping operands from the stack
2478 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2479
2480 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS)) {
2481 return YJIT_CANT_COMPILE;
2482 }
2483
2484 // Check that both operands are fixnums
2485 guard_two_fixnums(ctx, side_exit);
2486
2487 // Get the operands and destination from the stack
2488 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2489 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2490
2491 // Subtract arg0 - arg1 and test for overflow
2492 mov(cb, REG0, arg0);
2493 sub(cb, REG0, arg1);
2494 jo_ptr(cb, side_exit);
2495 add(cb, REG0, imm_opnd(1));
2496
2497 // Push the output on the stack
2498 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2499 mov(cb, dst, REG0);
2500
2501 return YJIT_KEEP_COMPILING;
2502 }
2503 else {
2504 // Delegate to send, call the method on the recv
2505 return gen_opt_send_without_block(jit, ctx, cb);
2506 }
2507}
2508
2509static codegen_status_t
2510gen_opt_plus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2511{
2512 // Defer compilation so we can specialize on a runtime `self`
2513 if (!jit_at_current_insn(jit)) {
2514 defer_compilation(jit, ctx);
2515 return YJIT_END_BLOCK;
2516 }
2517
2518 VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
2519 VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
2520
2521 if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
2522 // Create a side-exit to fall back to the interpreter
2523 // Note: we generate the side-exit before popping operands from the stack
2524 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2525
2526 if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS)) {
2527 return YJIT_CANT_COMPILE;
2528 }
2529
2530 // Check that both operands are fixnums
2531 guard_two_fixnums(ctx, side_exit);
2532
2533 // Get the operands and destination from the stack
2534 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2535 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2536
2537 // Add arg0 + arg1 and test for overflow
2538 mov(cb, REG0, arg0);
2539 sub(cb, REG0, imm_opnd(1));
2540 add(cb, REG0, arg1);
2541 jo_ptr(cb, side_exit);
2542
2543 // Push the output on the stack
2544 x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
2545 mov(cb, dst, REG0);
2546
2547 return YJIT_KEEP_COMPILING;
2548 }
2549 else {
2550 // Delegate to send, call the method on the recv
2551 return gen_opt_send_without_block(jit, ctx, cb);
2552 }
2553}
2554
2555static codegen_status_t
2556gen_opt_mult(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2557{
2558 // Delegate to send, call the method on the recv
2559 return gen_opt_send_without_block(jit, ctx, cb);
2560}
2561
2562static codegen_status_t
2563gen_opt_div(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2564{
2565 // Delegate to send, call the method on the recv
2566 return gen_opt_send_without_block(jit, ctx, cb);
2567}
2568
2569VALUE rb_vm_opt_mod(VALUE recv, VALUE obj);
2570
2571static codegen_status_t
2572gen_opt_mod(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2573{
2574 // Save the PC and SP because the callee may allocate bignums
2575 // Note that this modifies REG_SP, which is why we do it first
2576 jit_prepare_routine_call(jit, ctx, REG0);
2577
2578 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2579
2580 // Get the operands from the stack
2581 x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
2582 x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
2583
2584 // Call rb_vm_opt_mod(VALUE recv, VALUE obj)
2585 mov(cb, C_ARG_REGS[0], arg0);
2586 mov(cb, C_ARG_REGS[1], arg1);
2587 call_ptr(cb, REG0, (void *)rb_vm_opt_mod);
2588
2589 // If val == Qundef, bail to do a method call
2590 cmp(cb, RAX, imm_opnd(Qundef));
2591 je_ptr(cb, side_exit);
2592
2593 // Push the return value onto the stack
2594 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
2595 mov(cb, stack_ret, RAX);
2596
2597 return YJIT_KEEP_COMPILING;
2598}
2599
2600static codegen_status_t
2601gen_opt_ltlt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2602{
2603 // Delegate to send, call the method on the recv
2604 return gen_opt_send_without_block(jit, ctx, cb);
2605}
2606
2607static codegen_status_t
2608gen_opt_nil_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2609{
2610 // Delegate to send, call the method on the recv
2611 return gen_opt_send_without_block(jit, ctx, cb);
2612}
2613
2614static codegen_status_t
2615gen_opt_empty_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2616{
2617 // Delegate to send, call the method on the recv
2618 return gen_opt_send_without_block(jit, ctx, cb);
2619}
2620
2621static codegen_status_t
2622gen_opt_str_freeze(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2623{
2624 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_FREEZE)) {
2625 return YJIT_CANT_COMPILE;
2626 }
2627
2628 VALUE str = jit_get_arg(jit, 0);
2629 jit_mov_gc_ptr(jit, cb, REG0, str);
2630
2631 // Push the return value onto the stack
2632 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
2633 mov(cb, stack_ret, REG0);
2634
2635 return YJIT_KEEP_COMPILING;
2636}
2637
2638static codegen_status_t
2639gen_opt_str_uminus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2640{
2641 if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_UMINUS)) {
2642 return YJIT_CANT_COMPILE;
2643 }
2644
2645 VALUE str = jit_get_arg(jit, 0);
2646 jit_mov_gc_ptr(jit, cb, REG0, str);
2647
2648 // Push the return value onto the stack
2649 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
2650 mov(cb, stack_ret, REG0);
2651
2652 return YJIT_KEEP_COMPILING;
2653}
2654
2655static codegen_status_t
2656gen_opt_not(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2657{
2658 return gen_opt_send_without_block(jit, ctx, cb);
2659}
2660
2661static codegen_status_t
2662gen_opt_size(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2663{
2664 return gen_opt_send_without_block(jit, ctx, cb);
2665}
2666
2667static codegen_status_t
2668gen_opt_length(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2669{
2670 return gen_opt_send_without_block(jit, ctx, cb);
2671}
2672
2673static codegen_status_t
2674gen_opt_regexpmatch2(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2675{
2676 return gen_opt_send_without_block(jit, ctx, cb);
2677}
2678
2679static codegen_status_t
2680gen_opt_case_dispatch(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2681{
2682 // Normally this instruction would lookup the key in a hash and jump to an
2683 // offset based on that.
2684 // Instead we can take the fallback case and continue with the next
2685 // instruction.
2686 // We'd hope that our jitted code will be sufficiently fast without the
2687 // hash lookup, at least for small hashes, but it's worth revisiting this
2688 // assumption in the future.
2689
2690 ctx_stack_pop(ctx, 1);
2691
2692 return YJIT_KEEP_COMPILING; // continue with the next instruction
2693}
2694
2695static void
2696gen_branchif_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2697{
2698 switch (shape) {
2699 case SHAPE_NEXT0:
2700 jz_ptr(cb, target1);
2701 break;
2702
2703 case SHAPE_NEXT1:
2704 jnz_ptr(cb, target0);
2705 break;
2706
2707 case SHAPE_DEFAULT:
2708 jnz_ptr(cb, target0);
2709 jmp_ptr(cb, target1);
2710 break;
2711 }
2712}
2713
2714static codegen_status_t
2715gen_branchif(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2716{
2717 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2718
2719 // Check for interrupts, but only on backward branches that may create loops
2720 if (jump_offset < 0) {
2721 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2722 yjit_check_ints(cb, side_exit);
2723 }
2724
2725 // Test if any bit (outside of the Qnil bit) is on
2726 // RUBY_Qfalse /* ...0000 0000 */
2727 // RUBY_Qnil /* ...0000 1000 */
2728 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2729 test(cb, val_opnd, imm_opnd(~Qnil));
2730
2731 // Get the branch target instruction offsets
2732 uint32_t next_idx = jit_next_insn_idx(jit);
2733 uint32_t jump_idx = next_idx + jump_offset;
2734 blockid_t next_block = { jit->iseq, next_idx };
2735 blockid_t jump_block = { jit->iseq, jump_idx };
2736
2737 // Generate the branch instructions
2738 gen_branch(
2739 jit,
2740 ctx,
2741 jump_block,
2742 ctx,
2743 next_block,
2744 ctx,
2745 gen_branchif_branch
2746 );
2747
2748 return YJIT_END_BLOCK;
2749}
2750
2751static void
2752gen_branchunless_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2753{
2754 switch (shape) {
2755 case SHAPE_NEXT0:
2756 jnz_ptr(cb, target1);
2757 break;
2758
2759 case SHAPE_NEXT1:
2760 jz_ptr(cb, target0);
2761 break;
2762
2763 case SHAPE_DEFAULT:
2764 jz_ptr(cb, target0);
2765 jmp_ptr(cb, target1);
2766 break;
2767 }
2768}
2769
2770static codegen_status_t
2771gen_branchunless(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2772{
2773 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2774
2775 // Check for interrupts, but only on backward branches that may create loops
2776 if (jump_offset < 0) {
2777 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2778 yjit_check_ints(cb, side_exit);
2779 }
2780
2781 // Test if any bit (outside of the Qnil bit) is on
2782 // RUBY_Qfalse /* ...0000 0000 */
2783 // RUBY_Qnil /* ...0000 1000 */
2784 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2785 test(cb, val_opnd, imm_opnd(~Qnil));
2786
2787 // Get the branch target instruction offsets
2788 uint32_t next_idx = jit_next_insn_idx(jit);
2789 uint32_t jump_idx = next_idx + jump_offset;
2790 blockid_t next_block = { jit->iseq, next_idx };
2791 blockid_t jump_block = { jit->iseq, jump_idx };
2792
2793 // Generate the branch instructions
2794 gen_branch(
2795 jit,
2796 ctx,
2797 jump_block,
2798 ctx,
2799 next_block,
2800 ctx,
2801 gen_branchunless_branch
2802 );
2803
2804 return YJIT_END_BLOCK;
2805}
2806
2807static void
2808gen_branchnil_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
2809{
2810 switch (shape) {
2811 case SHAPE_NEXT0:
2812 jne_ptr(cb, target1);
2813 break;
2814
2815 case SHAPE_NEXT1:
2816 je_ptr(cb, target0);
2817 break;
2818
2819 case SHAPE_DEFAULT:
2820 je_ptr(cb, target0);
2821 jmp_ptr(cb, target1);
2822 break;
2823 }
2824}
2825
2826static codegen_status_t
2827gen_branchnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2828{
2829 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2830
2831 // Check for interrupts, but only on backward branches that may create loops
2832 if (jump_offset < 0) {
2833 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2834 yjit_check_ints(cb, side_exit);
2835 }
2836
2837 // Test if the value is Qnil
2838 // RUBY_Qnil /* ...0000 1000 */
2839 x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
2840 cmp(cb, val_opnd, imm_opnd(Qnil));
2841
2842 // Get the branch target instruction offsets
2843 uint32_t next_idx = jit_next_insn_idx(jit);
2844 uint32_t jump_idx = next_idx + jump_offset;
2845 blockid_t next_block = { jit->iseq, next_idx };
2846 blockid_t jump_block = { jit->iseq, jump_idx };
2847
2848 // Generate the branch instructions
2849 gen_branch(
2850 jit,
2851 ctx,
2852 jump_block,
2853 ctx,
2854 next_block,
2855 ctx,
2856 gen_branchnil_branch
2857 );
2858
2859 return YJIT_END_BLOCK;
2860}
2861
2862static codegen_status_t
2863gen_jump(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
2864{
2865 int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
2866
2867 // Check for interrupts, but only on backward branches that may create loops
2868 if (jump_offset < 0) {
2869 uint8_t *side_exit = yjit_side_exit(jit, ctx);
2870 yjit_check_ints(cb, side_exit);
2871 }
2872
2873 // Get the branch target instruction offsets
2874 uint32_t jump_idx = jit_next_insn_idx(jit) + jump_offset;
2875 blockid_t jump_block = { jit->iseq, jump_idx };
2876
2877 // Generate the jump instruction
2878 gen_direct_jump(
2879 jit,
2880 ctx,
2881 jump_block
2882 );
2883
2884 return YJIT_END_BLOCK;
2885}
2886
2887/*
2888Guard that self or a stack operand has the same class as `known_klass`, using
2889`sample_instance` to speculate about the shape of the runtime value.
2890FIXNUM and on-heap integers are treated as if they have distinct classes, and
2891the guard generated for one will fail for the other.
2892
2893Recompile as contingency if possible, or take side exit a last resort.
2894*/
2895static bool
2896jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit)
2897{
2898 val_type_t val_type = ctx_get_opnd_type(ctx, insn_opnd);
2899
2900 if (known_klass == rb_cNilClass) {
2901 RUBY_ASSERT(!val_type.is_heap);
2902 if (val_type.type != ETYPE_NIL) {
2903 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2904
2905 ADD_COMMENT(cb, "guard object is nil");
2906 cmp(cb, REG0, imm_opnd(Qnil));
2907 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2908
2909 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_NIL);
2910 }
2911 }
2912 else if (known_klass == rb_cTrueClass) {
2913 RUBY_ASSERT(!val_type.is_heap);
2914 if (val_type.type != ETYPE_TRUE) {
2915 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2916
2917 ADD_COMMENT(cb, "guard object is true");
2918 cmp(cb, REG0, imm_opnd(Qtrue));
2919 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2920
2921 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_TRUE);
2922 }
2923 }
2924 else if (known_klass == rb_cFalseClass) {
2925 RUBY_ASSERT(!val_type.is_heap);
2926 if (val_type.type != ETYPE_FALSE) {
2927 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2928
2929 ADD_COMMENT(cb, "guard object is false");
2930 STATIC_ASSERT(qfalse_is_zero, Qfalse == 0);
2931 test(cb, REG0, REG0);
2932 jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
2933
2934 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FALSE);
2935 }
2936 }
2937 else if (known_klass == rb_cInteger && FIXNUM_P(sample_instance)) {
2938 RUBY_ASSERT(!val_type.is_heap);
2939 // We will guard fixnum and bignum as though they were separate classes
2940 // BIGNUM can be handled by the general else case below
2941 if (val_type.type != ETYPE_FIXNUM || !val_type.is_imm) {
2942 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2943
2944 ADD_COMMENT(cb, "guard object is fixnum");
2945 test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG));
2946 jit_chain_guard(JCC_JZ, jit, ctx, max_chain_depth, side_exit);
2947 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FIXNUM);
2948 }
2949 }
2950 else if (known_klass == rb_cSymbol && STATIC_SYM_P(sample_instance)) {
2951 RUBY_ASSERT(!val_type.is_heap);
2952 // We will guard STATIC vs DYNAMIC as though they were separate classes
2953 // DYNAMIC symbols can be handled by the general else case below
2954 if (val_type.type != ETYPE_SYMBOL || !val_type.is_imm) {
2955 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2956
2957 ADD_COMMENT(cb, "guard object is static symbol");
2958 STATIC_ASSERT(special_shift_is_8, RUBY_SPECIAL_SHIFT == 8);
2959 cmp(cb, REG0_8, imm_opnd(RUBY_SYMBOL_FLAG));
2960 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2961 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_STATIC_SYMBOL);
2962 }
2963 }
2964 else if (known_klass == rb_cFloat && FLONUM_P(sample_instance)) {
2965 RUBY_ASSERT(!val_type.is_heap);
2966 if (val_type.type != ETYPE_FLONUM || !val_type.is_imm) {
2967 RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
2968
2969 // We will guard flonum vs heap float as though they were separate classes
2970 ADD_COMMENT(cb, "guard object is flonum");
2971 mov(cb, REG1, REG0);
2972 and(cb, REG1, imm_opnd(RUBY_FLONUM_MASK));
2973 cmp(cb, REG1, imm_opnd(RUBY_FLONUM_FLAG));
2974 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2975 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FLONUM);
2976 }
2977 }
2978 else if (FL_TEST(known_klass, FL_SINGLETON) && sample_instance == rb_attr_get(known_klass, id__attached__)) {
2979 // Singleton classes are attached to one specific object, so we can
2980 // avoid one memory access (and potentially the is_heap check) by
2981 // looking for the expected object directly.
2982 // Note that in case the sample instance has a singleton class that
2983 // doesn't attach to the sample instance, it means the sample instance
2984 // has an empty singleton class that hasn't been materialized yet. In
2985 // this case, comparing against the sample instance doesn't guarantee
2986 // that its singleton class is empty, so we can't avoid the memory
2987 // access. As an example, `Object.new.singleton_class` is an object in
2988 // this situation.
2989 ADD_COMMENT(cb, "guard known object with singleton class");
2990 // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object.
2991 jit_mov_gc_ptr(jit, cb, REG1, sample_instance);
2992 cmp(cb, REG0, REG1);
2993 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
2994 }
2995 else {
2996 RUBY_ASSERT(!val_type.is_imm);
2997
2998 // Check that the receiver is a heap object
2999 // Note: if we get here, the class doesn't have immediate instances.
3000 if (!val_type.is_heap) {
3001 ADD_COMMENT(cb, "guard not immediate");
3003 test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
3004 jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
3005 cmp(cb, REG0, imm_opnd(Qnil));
3006 jit_chain_guard(JCC_JBE, jit, ctx, max_chain_depth, side_exit);
3007
3008 ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_HEAP);
3009 }
3010
3011 x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
3012
3013 // Bail if receiver class is different from known_klass
3014 // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class.
3015 ADD_COMMENT(cb, "guard known class");
3016 jit_mov_gc_ptr(jit, cb, REG1, known_klass);
3017 cmp(cb, klass_opnd, REG1);
3018 jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
3019 }
3020
3021 return true;
3022}
3023
3024// Generate ancestry guard for protected callee.
3025// Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee).
3026static void
3027jit_protected_callee_ancestry_guard(jitstate_t *jit, codeblock_t *cb, const rb_callable_method_entry_t *cme, uint8_t *side_exit)
3028{
3029 // See vm_call_method().
3030 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, self));
3031 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], cme->defined_class);
3032 // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise.
3033 // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass);
3034 call_ptr(cb, REG0, (void *)&rb_obj_is_kind_of);
3035 test(cb, RAX, RAX);
3036 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_protected_check_failed));
3037}
3038
3039// Return true when the codegen function generates code.
3040// known_recv_klass is non-NULL when the caller has used jit_guard_known_klass().
3041// See yjit_reg_method().
3042typedef bool (*method_codegen_t)(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass);
3043
3044// Register a specialized codegen function for a particular method. Note that
3045// the if the function returns true, the code it generates runs without a
3046// control frame and without interrupt checks. To avoid creating observable
3047// behavior changes, the codegen function should only target simple code paths
3048// that do not allocate and do not make method calls.
3049static void
3050yjit_reg_method(VALUE klass, const char *mid_str, method_codegen_t gen_fn)
3051{
3052 ID mid = rb_intern(mid_str);
3053 const rb_method_entry_t *me = rb_method_entry_at(klass, mid);
3054
3055 if (!me) {
3056 rb_bug("undefined optimized method: %s", rb_id2name(mid));
3057 }
3058
3059 // For now, only cfuncs are supported
3060 RUBY_ASSERT(me && me->def);
3061 RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC);
3062
3063 st_insert(yjit_method_codegen_table, (st_data_t)me->def->method_serial, (st_data_t)gen_fn);
3064}
3065
3066// Codegen for rb_obj_not().
3067// Note, caller is responsible for generating all the right guards, including
3068// arity guards.
3069static bool
3070jit_rb_obj_not(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3071{
3072 const val_type_t recv_opnd = ctx_get_opnd_type(ctx, OPND_STACK(0));
3073
3074 if (recv_opnd.type == ETYPE_NIL || recv_opnd.type == ETYPE_FALSE) {
3075 ADD_COMMENT(cb, "rb_obj_not(nil_or_false)");
3076 ctx_stack_pop(ctx, 1);
3077 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_TRUE);
3078 mov(cb, out_opnd, imm_opnd(Qtrue));
3079 }
3080 else if (recv_opnd.is_heap || recv_opnd.type != ETYPE_UNKNOWN) {
3081 // Note: recv_opnd.type != ETYPE_NIL && recv_opnd.type != ETYPE_FALSE.
3082 ADD_COMMENT(cb, "rb_obj_not(truthy)");
3083 ctx_stack_pop(ctx, 1);
3084 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FALSE);
3085 mov(cb, out_opnd, imm_opnd(Qfalse));
3086 }
3087 else {
3088 // jit_guard_known_klass() already ran on the receiver which should
3089 // have deduced deduced the type of the receiver. This case should be
3090 // rare if not unreachable.
3091 return false;
3092 }
3093 return true;
3094}
3095
3096// Codegen for rb_true()
3097static bool
3098jit_rb_true(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3099{
3100 ADD_COMMENT(cb, "nil? == true");
3101 ctx_stack_pop(ctx, 1);
3102 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
3103 mov(cb, stack_ret, imm_opnd(Qtrue));
3104 return true;
3105}
3106
3107// Codegen for rb_false()
3108static bool
3109jit_rb_false(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3110{
3111 ADD_COMMENT(cb, "nil? == false");
3112 ctx_stack_pop(ctx, 1);
3113 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
3114 mov(cb, stack_ret, imm_opnd(Qfalse));
3115 return true;
3116}
3117
3118// Codegen for rb_obj_equal()
3119// object identity comparison
3120static bool
3121jit_rb_obj_equal(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3122{
3123 ADD_COMMENT(cb, "equal?");
3124 x86opnd_t obj1 = ctx_stack_pop(ctx, 1);
3125 x86opnd_t obj2 = ctx_stack_pop(ctx, 1);
3126
3127 mov(cb, REG0, obj1);
3128 cmp(cb, REG0, obj2);
3129 mov(cb, REG0, imm_opnd(Qtrue));
3130 mov(cb, REG1, imm_opnd(Qfalse));
3131 cmovne(cb, REG0, REG1);
3132
3133 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
3134 mov(cb, stack_ret, REG0);
3135 return true;
3136}
3137
3138static VALUE
3139yjit_str_bytesize(VALUE str)
3140{
3141 return LONG2NUM(RSTRING_LEN(str));
3142}
3143
3144static bool
3145jit_rb_str_bytesize(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
3146{
3147 ADD_COMMENT(cb, "String#bytesize");
3148
3149 x86opnd_t recv = ctx_stack_pop(ctx, 1);
3150 mov(cb, C_ARG_REGS[0], recv);
3151 call_ptr(cb, REG0, (void *)&yjit_str_bytesize);
3152
3153 x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FIXNUM);
3154 mov(cb, out_opnd, RAX);
3155
3156 return true;
3157}
3158
3159// Codegen for rb_str_to_s()
3160// When String#to_s is called on a String instance, the method returns self and
3161// most of the overhead comes from setting up the method call. We observed that
3162// this situation happens a lot in some workloads.
3163static bool
3164jit_rb_str_to_s(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3165{
3166 if (recv_known_klass && *recv_known_klass == rb_cString) {
3167 ADD_COMMENT(cb, "to_s on plain string");
3168 // The method returns the receiver, which is already on the stack.
3169 // No stack movement.
3170 return true;
3171 }
3172 return false;
3173}
3174
3175static bool
3176jit_thread_s_current(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3177{
3178 ADD_COMMENT(cb, "Thread.current");
3179 ctx_stack_pop(ctx, 1);
3180
3181 // ec->thread_ptr
3182 mov(cb, REG0, member_opnd(REG_EC, rb_execution_context_t, thread_ptr));
3183
3184 // thread->self
3185 mov(cb, REG0, member_opnd(REG0, rb_thread_t, self));
3186
3187 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
3188 mov(cb, stack_ret, REG0);
3189 return true;
3190}
3191
3192// Check if we know how to codegen for a particular cfunc method
3193static method_codegen_t
3194lookup_cfunc_codegen(const rb_method_definition_t *def)
3195{
3196 method_codegen_t gen_fn;
3197 if (st_lookup(yjit_method_codegen_table, def->method_serial, (st_data_t *)&gen_fn)) {
3198 return gen_fn;
3199 }
3200 return NULL;
3201}
3202
3203// Is anyone listening for :c_call and :c_return event currently?
3204static bool
3205c_method_tracing_currently_enabled(const jitstate_t *jit)
3206{
3207 rb_event_flag_t tracing_events;
3208 if (rb_multi_ractor_p()) {
3209 tracing_events = ruby_vm_event_enabled_global_flags;
3210 }
3211 else {
3212 // At the time of writing, events are never removed from
3213 // ruby_vm_event_enabled_global_flags so always checking using it would
3214 // mean we don't compile even after tracing is disabled.
3215 tracing_events = rb_ec_ractor_hooks(jit->ec)->events;
3216 }
3217
3218 return tracing_events & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN);
3219}
3220
3221static codegen_status_t
3222gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
3223{
3224 const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(cme->def, body.cfunc);
3225
3226 // If the function expects a Ruby array of arguments
3227 if (cfunc->argc < 0 && cfunc->argc != -1) {
3228 GEN_COUNTER_INC(cb, send_cfunc_ruby_array_varg);
3229 return YJIT_CANT_COMPILE;
3230 }
3231
3232 // If the argument count doesn't match
3233 if (cfunc->argc >= 0 && cfunc->argc != argc) {
3234 GEN_COUNTER_INC(cb, send_cfunc_argc_mismatch);
3235 return YJIT_CANT_COMPILE;
3236 }
3237
3238 // Don't JIT functions that need C stack arguments for now
3239 if (cfunc->argc >= 0 && argc + 1 > NUM_C_ARG_REGS) {
3240 GEN_COUNTER_INC(cb, send_cfunc_toomany_args);
3241 return YJIT_CANT_COMPILE;
3242 }
3243
3244 if (c_method_tracing_currently_enabled(jit)) {
3245 // Don't JIT if tracing c_call or c_return
3246 GEN_COUNTER_INC(cb, send_cfunc_tracing);
3247 return YJIT_CANT_COMPILE;
3248 }
3249
3250 // Delegate to codegen for C methods if we have it.
3251 {
3252 method_codegen_t known_cfunc_codegen;
3253 if ((known_cfunc_codegen = lookup_cfunc_codegen(cme->def))) {
3254 if (known_cfunc_codegen(jit, ctx, ci, cme, block, argc, recv_known_klass)) {
3255 // cfunc codegen generated code. Terminate the block so
3256 // there isn't multiple calls in the same block.
3257 jit_jump_to_next_insn(jit, ctx);
3258 return YJIT_END_BLOCK;
3259 }
3260 }
3261 }
3262
3263 // Callee method ID
3264 //ID mid = vm_ci_mid(ci);
3265 //printf("JITting call to C function \"%s\", argc: %lu\n", rb_id2name(mid), argc);
3266 //print_str(cb, "");
3267 //print_str(cb, "calling CFUNC:");
3268 //print_str(cb, rb_id2name(mid));
3269 //print_str(cb, "recv");
3270 //print_ptr(cb, recv);
3271
3272 // Create a side-exit to fall back to the interpreter
3273 uint8_t *side_exit = yjit_side_exit(jit, ctx);
3274
3275 // Check for interrupts
3276 yjit_check_ints(cb, side_exit);
3277
3278 // Stack overflow check
3279 // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
3280 // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
3281 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + 2 * sizeof(rb_control_frame_t)));
3282 cmp(cb, REG_CFP, REG0);
3283 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
3284
3285 // Points to the receiver operand on the stack
3286 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
3287
3288 // Store incremented PC into current control frame in case callee raises.
3289 jit_save_pc(jit, REG0);
3290
3291 if (block) {
3292 // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
3293 // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
3294 // with cfp->block_code.
3295 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
3296 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
3297 }
3298
3299 // Increment the stack pointer by 3 (in the callee)
3300 // sp += 3
3301 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3));
3302
3303 // Write method entry at sp[-3]
3304 // sp[-3] = me;
3305 // Put compile time cme into REG1. It's assumed to be valid because we are notified when
3306 // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
3307 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
3308 mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
3309
3310 // Write block handler at sp[-2]
3311 // sp[-2] = block_handler;
3312 if (block) {
3313 // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
3314 lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
3315 or(cb, REG1, imm_opnd(1));
3316 mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
3317 }
3318 else {
3319 mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
3320 }
3321
3322 // Write env flags at sp[-1]
3323 // sp[-1] = frame_type;
3324 uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
3325 mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
3326
3327 // Allocate a new CFP (ec->cfp--)
3328 sub(
3329 cb,
3330 member_opnd(REG_EC, rb_execution_context_t, cfp),
3331 imm_opnd(sizeof(rb_control_frame_t))
3332 );
3333
3334 // Setup the new frame
3335 // *cfp = (const struct rb_control_frame_struct) {
3336 // .pc = 0,
3337 // .sp = sp,
3338 // .iseq = 0,
3339 // .self = recv,
3340 // .ep = sp - 1,
3341 // .block_code = 0,
3342 // .__bp__ = sp,
3343 // };
3344 mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp));
3345 mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0));
3346 mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0);
3347 mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0));
3348 mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0));
3349 mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0);
3350 sub(cb, REG0, imm_opnd(sizeof(VALUE)));
3351 mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0);
3352 mov(cb, REG0, recv);
3353 mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0);
3354
3355 // Verify that we are calling the right function
3356 if (YJIT_CHECK_MODE > 0) {
3357 // Call check_cfunc_dispatch
3358 mov(cb, C_ARG_REGS[0], recv);
3359 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], (VALUE)ci);
3360 mov(cb, C_ARG_REGS[2], const_ptr_opnd((void *)cfunc->func));
3361 jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)cme);
3362 call_ptr(cb, REG0, (void *)&check_cfunc_dispatch);
3363 }
3364
3365 // Copy SP into RAX because REG_SP will get overwritten
3366 lea(cb, RAX, ctx_sp_opnd(ctx, 0));
3367
3368 // Pop the C function arguments from the stack (in the caller)
3369 ctx_stack_pop(ctx, argc + 1);
3370
3371 // Write interpreter SP into CFP.
3372 // Needed in case the callee yields to the block.
3373 jit_save_sp(jit, ctx);
3374
3375 // Non-variadic method
3376 if (cfunc->argc >= 0) {
3377 // Copy the arguments from the stack to the C argument registers
3378 // self is the 0th argument and is at index argc from the stack top
3379 for (int32_t i = 0; i < argc + 1; ++i)
3380 {
3381 x86opnd_t stack_opnd = mem_opnd(64, RAX, -(argc + 1 - i) * SIZEOF_VALUE);
3382 x86opnd_t c_arg_reg = C_ARG_REGS[i];
3383 mov(cb, c_arg_reg, stack_opnd);
3384 }
3385 }
3386 // Variadic method
3387 if (cfunc->argc == -1) {
3388 // The method gets a pointer to the first argument
3389 // rb_f_puts(int argc, VALUE *argv, VALUE recv)
3390 mov(cb, C_ARG_REGS[0], imm_opnd(argc));
3391 lea(cb, C_ARG_REGS[1], mem_opnd(64, RAX, -(argc) * SIZEOF_VALUE));
3392 mov(cb, C_ARG_REGS[2], mem_opnd(64, RAX, -(argc + 1) * SIZEOF_VALUE));
3393 }
3394
3395 // Call the C function
3396 // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]);
3397 // cfunc comes from compile-time cme->def, which we assume to be stable.
3398 // Invalidation logic is in rb_yjit_method_lookup_change()
3399 call_ptr(cb, REG0, (void*)cfunc->func);
3400
3401 // Record code position for TracePoint patching. See full_cfunc_return().
3402 record_global_inval_patch(cb, outline_full_cfunc_return_pos);
3403
3404 // Push the return value on the Ruby stack
3405 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3406 mov(cb, stack_ret, RAX);
3407
3408 // Pop the stack frame (ec->cfp++)
3409 add(
3410 cb,
3411 member_opnd(REG_EC, rb_execution_context_t, cfp),
3412 imm_opnd(sizeof(rb_control_frame_t))
3413 );
3414
3415 // cfunc calls may corrupt types
3416 ctx_clear_local_types(ctx);
3417
3418 // Note: the return block of gen_send_iseq() has ctx->sp_offset == 1
3419 // which allows for sharing the same successor.
3420
3421 // Jump (fall through) to the call continuation block
3422 // We do this to end the current block after the call
3423 jit_jump_to_next_insn(jit, ctx);
3424 return YJIT_END_BLOCK;
3425}
3426
3427static void
3428gen_return_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
3429{
3430 switch (shape) {
3431 case SHAPE_NEXT0:
3432 case SHAPE_NEXT1:
3433 RUBY_ASSERT(false);
3434 break;
3435
3436 case SHAPE_DEFAULT:
3437 mov(cb, REG0, const_ptr_opnd(target0));
3438 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
3439 break;
3440 }
3441}
3442
3443// If true, the iseq is leaf and it can be replaced by a single C call.
3444static bool
3445rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq)
3446{
3447 unsigned int invokebuiltin_len = insn_len(BIN(opt_invokebuiltin_delegate_leave));
3448 unsigned int leave_len = insn_len(BIN(leave));
3449
3450 return (iseq->body->iseq_size == (invokebuiltin_len + leave_len) &&
3451 rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]) == BIN(opt_invokebuiltin_delegate_leave) &&
3452 rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[invokebuiltin_len]) == BIN(leave) &&
3453 iseq->body->builtin_inline_p
3454 );
3455 }
3456
3457// Return an rb_builtin_function if the iseq contains only that leaf builtin function.
3458static const struct rb_builtin_function*
3459rb_leaf_builtin_function(const rb_iseq_t *iseq)
3460{
3461 if (!rb_leaf_invokebuiltin_iseq_p(iseq))
3462 return NULL;
3463 return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1];
3464}
3465
3466static codegen_status_t
3467gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, int32_t argc)
3468{
3469 const rb_iseq_t *iseq = def_iseq_ptr(cme->def);
3470
3471 // When you have keyword arguments, there is an extra object that gets
3472 // placed on the stack the represents a bitmap of the keywords that were not
3473 // specified at the call site. We need to keep track of the fact that this
3474 // value is present on the stack in order to properly set up the callee's
3475 // stack pointer.
3476 const bool doing_kw_call = iseq->body->param.flags.has_kw;
3477 const bool supplying_kws = vm_ci_flag(ci) & VM_CALL_KWARG;
3478
3479 if (vm_ci_flag(ci) & VM_CALL_TAILCALL) {
3480 // We can't handle tailcalls
3481 GEN_COUNTER_INC(cb, send_iseq_tailcall);
3482 return YJIT_CANT_COMPILE;
3483 }
3484
3485 // No support for callees with these parameters yet as they require allocation
3486 // or complex handling.
3487 if (iseq->body->param.flags.has_rest ||
3488 iseq->body->param.flags.has_post ||
3489 iseq->body->param.flags.has_kwrest) {
3490 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3491 return YJIT_CANT_COMPILE;
3492 }
3493
3494 // If we have keyword arguments being passed to a callee that only takes
3495 // positionals, then we need to allocate a hash. For now we're going to
3496 // call that too complex and bail.
3497 if (supplying_kws && !iseq->body->param.flags.has_kw) {
3498 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3499 return YJIT_CANT_COMPILE;
3500 }
3501
3502 // If we have a method accepting no kwargs (**nil), exit if we have passed
3503 // it any kwargs.
3504 if (supplying_kws && iseq->body->param.flags.accepts_no_kwarg) {
3505 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3506 return YJIT_CANT_COMPILE;
3507 }
3508
3509 // For computing number of locals to setup for the callee
3510 int num_params = iseq->body->param.size;
3511
3512 // Block parameter handling. This mirrors setup_parameters_complex().
3513 if (iseq->body->param.flags.has_block) {
3514 if (iseq->body->local_iseq == iseq) {
3515 // Block argument is passed through EP and not setup as a local in
3516 // the callee.
3517 num_params--;
3518 }
3519 else {
3520 // In this case (param.flags.has_block && local_iseq != iseq),
3521 // the block argument is setup as a local variable and requires
3522 // materialization (allocation). Bail.
3523 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3524 return YJIT_CANT_COMPILE;
3525 }
3526 }
3527
3528 uint32_t start_pc_offset = 0;
3529
3530 const int required_num = iseq->body->param.lead_num;
3531
3532 // This struct represents the metadata about the caller-specified
3533 // keyword arguments.
3534 const struct rb_callinfo_kwarg *kw_arg = vm_ci_kwarg(ci);
3535 const int kw_arg_num = kw_arg ? kw_arg->keyword_len : 0;
3536
3537 // Arity handling and optional parameter setup
3538 const int opts_filled = argc - required_num - kw_arg_num;
3539 const int opt_num = iseq->body->param.opt_num;
3540 const int opts_missing = opt_num - opts_filled;
3541
3542 if (opts_filled < 0 || opts_filled > opt_num) {
3543 GEN_COUNTER_INC(cb, send_iseq_arity_error);
3544 return YJIT_CANT_COMPILE;
3545 }
3546
3547 // If we have unfilled optional arguments and keyword arguments then we
3548 // would need to move adjust the arguments location to account for that.
3549 // For now we aren't handling this case.
3550 if (doing_kw_call && opts_missing > 0) {
3551 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3552 return YJIT_CANT_COMPILE;
3553 }
3554
3555 if (opt_num > 0) {
3556 num_params -= opt_num - opts_filled;
3557 start_pc_offset = (uint32_t)iseq->body->param.opt_table[opts_filled];
3558 }
3559
3560 if (doing_kw_call) {
3561 // Here we're calling a method with keyword arguments and specifying
3562 // keyword arguments at this call site.
3563
3564 // This struct represents the metadata about the callee-specified
3565 // keyword parameters.
3566 const struct rb_iseq_param_keyword *keyword = iseq->body->param.keyword;
3567
3568 int required_kwargs_filled = 0;
3569
3570 if (keyword->num > 30) {
3571 // We have so many keywords that (1 << num) encoded as a FIXNUM
3572 // (which shifts it left one more) no longer fits inside a 32-bit
3573 // immediate.
3574 GEN_COUNTER_INC(cb, send_iseq_complex_callee);
3575 return YJIT_CANT_COMPILE;
3576 }
3577
3578 // Check that the kwargs being passed are valid
3579 if (supplying_kws) {
3580 // This is the list of keyword arguments that the callee specified
3581 // in its initial declaration.
3582 const ID *callee_kwargs = keyword->table;
3583
3584 // Here we're going to build up a list of the IDs that correspond to
3585 // the caller-specified keyword arguments. If they're not in the
3586 // same order as the order specified in the callee declaration, then
3587 // we're going to need to generate some code to swap values around
3588 // on the stack.
3589 ID *caller_kwargs = ALLOCA_N(VALUE, kw_arg->keyword_len);
3590 for (int kwarg_idx = 0; kwarg_idx < kw_arg->keyword_len; kwarg_idx++)
3591 caller_kwargs[kwarg_idx] = SYM2ID(kw_arg->keywords[kwarg_idx]);
3592
3593 // First, we're going to be sure that the names of every
3594 // caller-specified keyword argument correspond to a name in the
3595 // list of callee-specified keyword parameters.
3596 for (int caller_idx = 0; caller_idx < kw_arg->keyword_len; caller_idx++) {
3597 int callee_idx;
3598
3599 for (callee_idx = 0; callee_idx < keyword->num; callee_idx++) {
3600 if (caller_kwargs[caller_idx] == callee_kwargs[callee_idx]) {
3601 break;
3602 }
3603 }
3604
3605 // If the keyword was never found, then we know we have a
3606 // mismatch in the names of the keyword arguments, so we need to
3607 // bail.
3608 if (callee_idx == keyword->num) {
3609 GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
3610 return YJIT_CANT_COMPILE;
3611 }
3612
3613 // Keep a count to ensure all required kwargs are specified
3614 if (callee_idx < keyword->required_num) {
3615 required_kwargs_filled++;
3616 }
3617 }
3618 }
3619
3620 RUBY_ASSERT(required_kwargs_filled <= keyword->required_num);
3621 if (required_kwargs_filled != keyword->required_num) {
3622 GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
3623 return YJIT_CANT_COMPILE;
3624 }
3625 }
3626
3627 // Number of locals that are not parameters
3628 const int num_locals = iseq->body->local_table_size - num_params;
3629
3630 // Create a side-exit to fall back to the interpreter
3631 uint8_t *side_exit = yjit_side_exit(jit, ctx);
3632
3633 // Check for interrupts
3634 yjit_check_ints(cb, side_exit);
3635
3636 const struct rb_builtin_function *leaf_builtin = rb_leaf_builtin_function(iseq);
3637
3638 if (leaf_builtin && !block && leaf_builtin->argc + 1 <= NUM_C_ARG_REGS) {
3639 ADD_COMMENT(cb, "inlined leaf builtin");
3640
3641 // Call the builtin func (ec, recv, arg1, arg2, ...)
3642 mov(cb, C_ARG_REGS[0], REG_EC);
3643
3644 // Copy self and arguments
3645 for (int32_t i = 0; i < leaf_builtin->argc + 1; i++) {
3646 x86opnd_t stack_opnd = ctx_stack_opnd(ctx, leaf_builtin->argc - i);
3647 x86opnd_t c_arg_reg = C_ARG_REGS[i + 1];
3648 mov(cb, c_arg_reg, stack_opnd);
3649 }
3650 ctx_stack_pop(ctx, leaf_builtin->argc + 1);
3651 call_ptr(cb, REG0, (void *)leaf_builtin->func_ptr);
3652
3653 // Push the return value
3654 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3655 mov(cb, stack_ret, RAX);
3656
3657 // Note: assuming that the leaf builtin doesn't change local variables here.
3658 // Seems like a safe assumption.
3659
3660 return YJIT_KEEP_COMPILING;
3661 }
3662
3663 // Stack overflow check
3664 // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2.
3665 // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
3666 ADD_COMMENT(cb, "stack overflow check");
3667 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (num_locals + iseq->body->stack_max) + 2 * sizeof(rb_control_frame_t)));
3668 cmp(cb, REG_CFP, REG0);
3669 jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
3670
3671 if (doing_kw_call) {
3672 // Here we're calling a method with keyword arguments and specifying
3673 // keyword arguments at this call site.
3674
3675 // Number of positional arguments the callee expects before the first
3676 // keyword argument
3677 const int args_before_kw = required_num + opt_num;
3678
3679 // This struct represents the metadata about the caller-specified
3680 // keyword arguments.
3681 int caller_keyword_len = 0;
3682 const VALUE *caller_keywords = NULL;
3683 if (vm_ci_kwarg(ci)) {
3684 caller_keyword_len = vm_ci_kwarg(ci)->keyword_len;
3685 caller_keywords = &vm_ci_kwarg(ci)->keywords[0];
3686 }
3687
3688 // This struct represents the metadata about the callee-specified
3689 // keyword parameters.
3690 const struct rb_iseq_param_keyword *const keyword = iseq->body->param.keyword;
3691
3692 ADD_COMMENT(cb, "keyword args");
3693
3694 // This is the list of keyword arguments that the callee specified
3695 // in its initial declaration.
3696 const ID *callee_kwargs = keyword->table;
3697
3698 int total_kwargs = keyword->num;
3699
3700 // Here we're going to build up a list of the IDs that correspond to
3701 // the caller-specified keyword arguments. If they're not in the
3702 // same order as the order specified in the callee declaration, then
3703 // we're going to need to generate some code to swap values around
3704 // on the stack.
3705 ID *caller_kwargs = ALLOCA_N(VALUE, total_kwargs);
3706 int kwarg_idx;
3707 for (kwarg_idx = 0; kwarg_idx < caller_keyword_len; kwarg_idx++) {
3708 caller_kwargs[kwarg_idx] = SYM2ID(caller_keywords[kwarg_idx]);
3709 }
3710
3711 int unspecified_bits = 0;
3712
3713 for (int callee_idx = keyword->required_num; callee_idx < total_kwargs; callee_idx++) {
3714 bool already_passed = false;
3715 ID callee_kwarg = callee_kwargs[callee_idx];
3716
3717 for (int caller_idx = 0; caller_idx < caller_keyword_len; caller_idx++) {
3718 if (caller_kwargs[caller_idx] == callee_kwarg) {
3719 already_passed = true;
3720 break;
3721 }
3722 }
3723
3724 if (!already_passed) {
3725 // Reserve space on the stack for each default value we'll be
3726 // filling in (which is done in the next loop). Also increments
3727 // argc so that the callee's SP is recorded correctly.
3728 argc++;
3729 x86opnd_t default_arg = ctx_stack_push(ctx, TYPE_UNKNOWN);
3730 VALUE default_value = keyword->default_values[callee_idx - keyword->required_num];
3731
3732 if (default_value == Qundef) {
3733 // Qundef means that this value is not constant and must be
3734 // recalculated at runtime, so we record it in unspecified_bits
3735 // (Qnil is then used as a placeholder instead of Qundef).
3736 unspecified_bits |= 0x01 << (callee_idx - keyword->required_num);
3737 default_value = Qnil;
3738 }
3739
3740 // GC might move default_value.
3741 jit_mov_gc_ptr(jit, cb, REG0, default_value);
3742 mov(cb, default_arg, REG0);
3743
3744 caller_kwargs[kwarg_idx++] = callee_kwarg;
3745 }
3746 }
3747 RUBY_ASSERT(kwarg_idx == total_kwargs);
3748
3749 // Next, we're going to loop through every keyword that was
3750 // specified by the caller and make sure that it's in the correct
3751 // place. If it's not we're going to swap it around with another one.
3752 for (kwarg_idx = 0; kwarg_idx < total_kwargs; kwarg_idx++) {
3753 ID callee_kwarg = callee_kwargs[kwarg_idx];
3754
3755 // If the argument is already in the right order, then we don't
3756 // need to generate any code since the expected value is already
3757 // in the right place on the stack.
3758 if (callee_kwarg == caller_kwargs[kwarg_idx]) continue;
3759
3760 // In this case the argument is not in the right place, so we
3761 // need to find its position where it _should_ be and swap with
3762 // that location.
3763 for (int swap_idx = kwarg_idx + 1; swap_idx < total_kwargs; swap_idx++) {
3764 if (callee_kwarg == caller_kwargs[swap_idx]) {
3765 // First we're going to generate the code that is going
3766 // to perform the actual swapping at runtime.
3767 stack_swap(ctx, cb, argc - 1 - swap_idx - args_before_kw, argc - 1 - kwarg_idx - args_before_kw, REG1, REG0);
3768
3769 // Next we're going to do some bookkeeping on our end so
3770 // that we know the order that the arguments are
3771 // actually in now.
3772 ID tmp = caller_kwargs[kwarg_idx];
3773 caller_kwargs[kwarg_idx] = caller_kwargs[swap_idx];
3774 caller_kwargs[swap_idx] = tmp;
3775
3776 break;
3777 }
3778 }
3779 }
3780
3781 // Keyword arguments cause a special extra local variable to be
3782 // pushed onto the stack that represents the parameters that weren't
3783 // explicitly given a value and have a non-constant default.
3784 mov(cb, ctx_stack_opnd(ctx, -1), imm_opnd(INT2FIX(unspecified_bits)));
3785 }
3786 // Points to the receiver operand on the stack
3787 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
3788
3789 // Store the updated SP on the current frame (pop arguments and receiver)
3790 ADD_COMMENT(cb, "store caller sp");
3791 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
3792 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
3793
3794 // Store the next PC in the current frame
3795 jit_save_pc(jit, REG0);
3796
3797 if (block) {
3798 // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
3799 // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
3800 // with cfp->block_code.
3801 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
3802 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
3803 }
3804
3805 // Adjust the callee's stack pointer
3806 lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (3 + num_locals + doing_kw_call)));
3807
3808 // Initialize local variables to Qnil
3809 for (int i = 0; i < num_locals; i++) {
3810 mov(cb, mem_opnd(64, REG0, sizeof(VALUE) * (i - num_locals - 3)), imm_opnd(Qnil));
3811 }
3812
3813 ADD_COMMENT(cb, "push env");
3814 // Put compile time cme into REG1. It's assumed to be valid because we are notified when
3815 // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
3816 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
3817 // Write method entry at sp[-3]
3818 // sp[-3] = me;
3819 mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
3820
3821 // Write block handler at sp[-2]
3822 // sp[-2] = block_handler;
3823 if (block) {
3824 // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
3825 lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
3826 or(cb, REG1, imm_opnd(1));
3827 mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
3828 }
3829 else {
3830 mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
3831 }
3832
3833 // Write env flags at sp[-1]
3834 // sp[-1] = frame_type;
3835 uint64_t frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
3836 mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
3837
3838 ADD_COMMENT(cb, "push callee CFP");
3839 // Allocate a new CFP (ec->cfp--)
3840 sub(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
3841 mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
3842
3843 // Setup the new frame
3844 // *cfp = (const struct rb_control_frame_struct) {
3845 // .pc = pc,
3846 // .sp = sp,
3847 // .iseq = iseq,
3848 // .self = recv,
3849 // .ep = sp - 1,
3850 // .block_code = 0,
3851 // .__bp__ = sp,
3852 // };
3853 mov(cb, REG1, recv);
3854 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, self), REG1);
3855 mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP
3856 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
3857 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, __bp__), REG0);
3858 sub(cb, REG0, imm_opnd(sizeof(VALUE)));
3859 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, ep), REG0);
3860 jit_mov_gc_ptr(jit, cb, REG0, (VALUE)iseq);
3861 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, iseq), REG0);
3862 mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), imm_opnd(0));
3863
3864 // No need to set cfp->pc since the callee sets it whenever calling into routines
3865 // that could look at it through jit_save_pc().
3866 // mov(cb, REG0, const_ptr_opnd(start_pc));
3867 // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
3868
3869 // Stub so we can return to JITted code
3870 blockid_t return_block = { jit->iseq, jit_next_insn_idx(jit) };
3871
3872 // Create a context for the callee
3873 ctx_t callee_ctx = DEFAULT_CTX;
3874
3875 // Set the argument types in the callee's context
3876 for (int32_t arg_idx = 0; arg_idx < argc; ++arg_idx) {
3877 val_type_t arg_type = ctx_get_opnd_type(ctx, OPND_STACK(argc - arg_idx - 1));
3878 ctx_set_local_type(&callee_ctx, arg_idx, arg_type);
3879 }
3880 val_type_t recv_type = ctx_get_opnd_type(ctx, OPND_STACK(argc));
3881 ctx_upgrade_opnd_type(&callee_ctx, OPND_SELF, recv_type);
3882
3883 // The callee might change locals through Kernel#binding and other means.
3884 ctx_clear_local_types(ctx);
3885
3886 // Pop arguments and receiver in return context, push the return value
3887 // After the return, sp_offset will be 1. The codegen for leave writes
3888 // the return value in case of JIT-to-JIT return.
3889 ctx_t return_ctx = *ctx;
3890 ctx_stack_pop(&return_ctx, argc + 1);
3891 ctx_stack_push(&return_ctx, TYPE_UNKNOWN);
3892 return_ctx.sp_offset = 1;
3893 return_ctx.chain_depth = 0;
3894
3895 // Write the JIT return address on the callee frame
3896 gen_branch(
3897 jit,
3898 ctx,
3899 return_block,
3900 &return_ctx,
3901 return_block,
3902 &return_ctx,
3903 gen_return_branch
3904 );
3905
3906 //print_str(cb, "calling Ruby func:");
3907 //print_str(cb, rb_id2name(vm_ci_mid(ci)));
3908
3909 // Directly jump to the entry point of the callee
3910 gen_direct_jump(
3911 jit,
3912 &callee_ctx,
3913 (blockid_t){ iseq, start_pc_offset }
3914 );
3915
3916 return YJIT_END_BLOCK;
3917}
3918
3919static codegen_status_t
3920gen_struct_aref(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
3921 if (vm_ci_argc(ci) != 0) {
3922 return YJIT_CANT_COMPILE;
3923 }
3924
3925 const unsigned int off = cme->def->body.optimized.index;
3926
3927 // Confidence checks
3928 RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
3929 RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
3930
3931 // We are going to use an encoding that takes a 4-byte immediate which
3932 // limits the offset to INT32_MAX.
3933 {
3934 uint64_t native_off = (uint64_t)off * (uint64_t)SIZEOF_VALUE;
3935 if (native_off > (uint64_t)INT32_MAX) {
3936 return YJIT_CANT_COMPILE;
3937 }
3938 }
3939
3940 // All structs from the same Struct class should have the same
3941 // length. So if our comptime_recv is embedded all runtime
3942 // structs of the same class should be as well, and the same is
3943 // true of the converse.
3944 bool embedded = FL_TEST_RAW(comptime_recv, RSTRUCT_EMBED_LEN_MASK);
3945
3946 ADD_COMMENT(cb, "struct aref");
3947
3948 x86opnd_t recv = ctx_stack_pop(ctx, 1);
3949
3950 mov(cb, REG0, recv);
3951
3952 if (embedded) {
3953 mov(cb, REG0, member_opnd_idx(REG0, struct RStruct, as.ary, off));
3954 }
3955 else {
3956 mov(cb, REG0, member_opnd(REG0, struct RStruct, as.heap.ptr));
3957 mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * off));
3958 }
3959
3960 x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3961 mov(cb, ret, REG0);
3962
3963 jit_jump_to_next_insn(jit, ctx);
3964 return YJIT_END_BLOCK;
3965}
3966
3967static codegen_status_t
3968gen_struct_aset(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
3969 if (vm_ci_argc(ci) != 1) {
3970 return YJIT_CANT_COMPILE;
3971 }
3972
3973 const unsigned int off = cme->def->body.optimized.index;
3974
3975 // Confidence checks
3976 RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
3977 RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
3978
3979 ADD_COMMENT(cb, "struct aset");
3980
3981 x86opnd_t val = ctx_stack_pop(ctx, 1);
3982 x86opnd_t recv = ctx_stack_pop(ctx, 1);
3983
3984 mov(cb, C_ARG_REGS[0], recv);
3985 mov(cb, C_ARG_REGS[1], imm_opnd(off));
3986 mov(cb, C_ARG_REGS[2], val);
3987 call_ptr(cb, REG0, (void *)RSTRUCT_SET);
3988
3989 x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
3990 mov(cb, ret, RAX);
3991
3992 jit_jump_to_next_insn(jit, ctx);
3993 return YJIT_END_BLOCK;
3994}
3995
3997rb_aliased_callable_method_entry(const rb_callable_method_entry_t *me);
3998
3999static codegen_status_t
4000gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block)
4001{
4002 // Relevant definitions:
4003 // rb_execution_context_t : vm_core.h
4004 // invoker, cfunc logic : method.h, vm_method.c
4005 // rb_callinfo : vm_callinfo.h
4006 // rb_callable_method_entry_t : method.h
4007 // vm_call_cfunc_with_frame : vm_insnhelper.c
4008 //
4009 // For a general overview for how the interpreter calls methods,
4010 // see vm_call_method().
4011
4012 const struct rb_callinfo *ci = cd->ci; // info about the call site
4013
4014 int32_t argc = (int32_t)vm_ci_argc(ci);
4015 ID mid = vm_ci_mid(ci);
4016
4017 // Don't JIT calls with keyword splat
4018 if (vm_ci_flag(ci) & VM_CALL_KW_SPLAT) {
4019 GEN_COUNTER_INC(cb, send_kw_splat);
4020 return YJIT_CANT_COMPILE;
4021 }
4022
4023 // Don't JIT calls that aren't simple
4024 // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
4025 if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
4026 GEN_COUNTER_INC(cb, send_args_splat);
4027 return YJIT_CANT_COMPILE;
4028 }
4029 if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
4030 GEN_COUNTER_INC(cb, send_block_arg);
4031 return YJIT_CANT_COMPILE;
4032 }
4033
4034 // Defer compilation so we can specialize on class of receiver
4035 if (!jit_at_current_insn(jit)) {
4036 defer_compilation(jit, ctx);
4037 return YJIT_END_BLOCK;
4038 }
4039
4040 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
4041 VALUE comptime_recv_klass = CLASS_OF(comptime_recv);
4042
4043 // Guard that the receiver has the same class as the one from compile time
4044 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4045
4046 // Points to the receiver operand on the stack
4047 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
4048 insn_opnd_t recv_opnd = OPND_STACK(argc);
4049 mov(cb, REG0, recv);
4050 if (!jit_guard_known_klass(jit, ctx, comptime_recv_klass, recv_opnd, comptime_recv, SEND_MAX_DEPTH, side_exit)) {
4051 return YJIT_CANT_COMPILE;
4052 }
4053
4054 // Do method lookup
4055 const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_recv_klass, mid);
4056 if (!cme) {
4057 // TODO: counter
4058 return YJIT_CANT_COMPILE;
4059 }
4060
4061 switch (METHOD_ENTRY_VISI(cme)) {
4062 case METHOD_VISI_PUBLIC:
4063 // Can always call public methods
4064 break;
4065 case METHOD_VISI_PRIVATE:
4066 if (!(vm_ci_flag(ci) & VM_CALL_FCALL)) {
4067 // Can only call private methods with FCALL callsites.
4068 // (at the moment they are callsites without a receiver or an explicit `self` receiver)
4069 return YJIT_CANT_COMPILE;
4070 }
4071 break;
4072 case METHOD_VISI_PROTECTED:
4073 jit_protected_callee_ancestry_guard(jit, cb, cme, side_exit);
4074 break;
4075 case METHOD_VISI_UNDEF:
4076 RUBY_ASSERT(false && "cmes should always have a visibility");
4077 break;
4078 }
4079
4080 // Register block for invalidation
4081 RUBY_ASSERT(cme->called_id == mid);
4082 assume_method_lookup_stable(comptime_recv_klass, cme, jit);
4083
4084 // To handle the aliased method case (VM_METHOD_TYPE_ALIAS)
4085 while (true) {
4086 // switch on the method type
4087 switch (cme->def->type) {
4088 case VM_METHOD_TYPE_ISEQ:
4089 return gen_send_iseq(jit, ctx, ci, cme, block, argc);
4090 case VM_METHOD_TYPE_CFUNC:
4091 if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4092 GEN_COUNTER_INC(cb, send_cfunc_kwargs);
4093 return YJIT_CANT_COMPILE;
4094 }
4095 return gen_send_cfunc(jit, ctx, ci, cme, block, argc, &comptime_recv_klass);
4096 case VM_METHOD_TYPE_IVAR:
4097 if (argc != 0) {
4098 // Argument count mismatch. Getters take no arguments.
4099 GEN_COUNTER_INC(cb, send_getter_arity);
4100 return YJIT_CANT_COMPILE;
4101 }
4102 if (c_method_tracing_currently_enabled(jit)) {
4103 // Can't generate code for firing c_call and c_return events
4104 // :attr-tracing:
4105 // Handling the C method tracing events for attr_accessor
4106 // methods is easier than regular C methods as we know the
4107 // "method" we are calling into never enables those tracing
4108 // events. Once global invalidation runs, the code for the
4109 // attr_accessor is invalidated and we exit at the closest
4110 // instruction boundary which is always outside of the body of
4111 // the attr_accessor code.
4112 GEN_COUNTER_INC(cb, send_cfunc_tracing);
4113 return YJIT_CANT_COMPILE;
4114 }
4115
4116 mov(cb, REG0, recv);
4117
4118 ID ivar_name = cme->def->body.attr.id;
4119 return gen_get_ivar(jit, ctx, SEND_MAX_DEPTH, comptime_recv, ivar_name, recv_opnd, side_exit);
4120 case VM_METHOD_TYPE_ATTRSET:
4121 if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4122 GEN_COUNTER_INC(cb, send_attrset_kwargs);
4123 return YJIT_CANT_COMPILE;
4124 }
4125 else if (argc != 1 || !RB_TYPE_P(comptime_recv, T_OBJECT)) {
4126 GEN_COUNTER_INC(cb, send_ivar_set_method);
4127 return YJIT_CANT_COMPILE;
4128 }
4129 else if (c_method_tracing_currently_enabled(jit)) {
4130 // Can't generate code for firing c_call and c_return events
4131 // See :attr-tracing:
4132 GEN_COUNTER_INC(cb, send_cfunc_tracing);
4133 return YJIT_CANT_COMPILE;
4134 }
4135 else {
4136 ID ivar_name = cme->def->body.attr.id;
4137 return gen_set_ivar(jit, ctx, comptime_recv, comptime_recv_klass, ivar_name);
4138 }
4139 // Block method, e.g. define_method(:foo) { :my_block }
4140 case VM_METHOD_TYPE_BMETHOD:
4141 GEN_COUNTER_INC(cb, send_bmethod);
4142 return YJIT_CANT_COMPILE;
4143 case VM_METHOD_TYPE_ZSUPER:
4144 GEN_COUNTER_INC(cb, send_zsuper_method);
4145 return YJIT_CANT_COMPILE;
4146 case VM_METHOD_TYPE_ALIAS: {
4147 // Retrieve the alised method and re-enter the switch
4148 cme = rb_aliased_callable_method_entry(cme);
4149 continue;
4150 }
4151 case VM_METHOD_TYPE_UNDEF:
4152 GEN_COUNTER_INC(cb, send_undef_method);
4153 return YJIT_CANT_COMPILE;
4154 case VM_METHOD_TYPE_NOTIMPLEMENTED:
4155 GEN_COUNTER_INC(cb, send_not_implemented_method);
4156 return YJIT_CANT_COMPILE;
4157 // Send family of methods, e.g. call/apply
4158 case VM_METHOD_TYPE_OPTIMIZED:
4159 switch (cme->def->body.optimized.type) {
4160 case OPTIMIZED_METHOD_TYPE_SEND:
4161 GEN_COUNTER_INC(cb, send_optimized_method_send);
4162 return YJIT_CANT_COMPILE;
4163 case OPTIMIZED_METHOD_TYPE_CALL:
4164 GEN_COUNTER_INC(cb, send_optimized_method_call);
4165 return YJIT_CANT_COMPILE;
4166 case OPTIMIZED_METHOD_TYPE_BLOCK_CALL:
4167 GEN_COUNTER_INC(cb, send_optimized_method_block_call);
4168 return YJIT_CANT_COMPILE;
4169 case OPTIMIZED_METHOD_TYPE_STRUCT_AREF:
4170 return gen_struct_aref(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
4171 case OPTIMIZED_METHOD_TYPE_STRUCT_ASET:
4172 return gen_struct_aset(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
4173 default:
4174 rb_bug("unknown optimized method type (%d)", cme->def->body.optimized.type);
4175 UNREACHABLE_RETURN(YJIT_CANT_COMPILE);
4176 }
4177 case VM_METHOD_TYPE_MISSING:
4178 GEN_COUNTER_INC(cb, send_missing_method);
4179 return YJIT_CANT_COMPILE;
4180 case VM_METHOD_TYPE_REFINED:
4181 GEN_COUNTER_INC(cb, send_refined_method);
4182 return YJIT_CANT_COMPILE;
4183 // no default case so compiler issues a warning if this is not exhaustive
4184 }
4185
4186 // Unreachable
4187 RUBY_ASSERT(false);
4188 }
4189}
4190
4191static codegen_status_t
4192gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4193{
4194 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4195 return gen_send_general(jit, ctx, cd, NULL);
4196}
4197
4198static codegen_status_t
4199gen_send(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4200{
4201 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4202 rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
4203 return gen_send_general(jit, ctx, cd, block);
4204}
4205
4206static codegen_status_t
4207gen_invokesuper(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4208{
4209 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4210 rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
4211
4212 // Defer compilation so we can specialize on class of receiver
4213 if (!jit_at_current_insn(jit)) {
4214 defer_compilation(jit, ctx);
4215 return YJIT_END_BLOCK;
4216 }
4217
4218 const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(jit->ec->cfp);
4219 if (!me) {
4220 return YJIT_CANT_COMPILE;
4221 }
4222
4223 // FIXME: We should track and invalidate this block when this cme is invalidated
4224 VALUE current_defined_class = me->defined_class;
4225 ID mid = me->def->original_id;
4226
4227 if (me != rb_callable_method_entry(current_defined_class, me->called_id)) {
4228 // Though we likely could generate this call, as we are only concerned
4229 // with the method entry remaining valid, assume_method_lookup_stable
4230 // below requires that the method lookup matches as well
4231 return YJIT_CANT_COMPILE;
4232 }
4233
4234 // vm_search_normal_superclass
4235 if (BUILTIN_TYPE(current_defined_class) == T_ICLASS && FL_TEST_RAW(RBASIC(current_defined_class)->klass, RMODULE_IS_REFINEMENT)) {
4236 return YJIT_CANT_COMPILE;
4237 }
4238 VALUE comptime_superclass = RCLASS_SUPER(RCLASS_ORIGIN(current_defined_class));
4239
4240 const struct rb_callinfo *ci = cd->ci;
4241 int32_t argc = (int32_t)vm_ci_argc(ci);
4242
4243 // Don't JIT calls that aren't simple
4244 // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
4245 if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
4246 GEN_COUNTER_INC(cb, send_args_splat);
4247 return YJIT_CANT_COMPILE;
4248 }
4249 if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
4250 GEN_COUNTER_INC(cb, send_keywords);
4251 return YJIT_CANT_COMPILE;
4252 }
4253 if ((vm_ci_flag(ci) & VM_CALL_KW_SPLAT) != 0) {
4254 GEN_COUNTER_INC(cb, send_kw_splat);
4255 return YJIT_CANT_COMPILE;
4256 }
4257 if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
4258 GEN_COUNTER_INC(cb, send_block_arg);
4259 return YJIT_CANT_COMPILE;
4260 }
4261
4262 // Ensure we haven't rebound this method onto an incompatible class.
4263 // In the interpreter we try to avoid making this check by performing some
4264 // cheaper calculations first, but since we specialize on the method entry
4265 // and so only have to do this once at compile time this is fine to always
4266 // check and side exit.
4267 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
4268 if (!rb_obj_is_kind_of(comptime_recv, current_defined_class)) {
4269 return YJIT_CANT_COMPILE;
4270 }
4271
4272 // Do method lookup
4273 const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_superclass, mid);
4274
4275 if (!cme) {
4276 return YJIT_CANT_COMPILE;
4277 }
4278
4279 // Check that we'll be able to write this method dispatch before generating checks
4280 switch (cme->def->type) {
4281 case VM_METHOD_TYPE_ISEQ:
4282 case VM_METHOD_TYPE_CFUNC:
4283 break;
4284 default:
4285 // others unimplemented
4286 return YJIT_CANT_COMPILE;
4287 }
4288
4289 // Guard that the receiver has the same class as the one from compile time
4290 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4291
4292 if (jit->ec->cfp->ep[VM_ENV_DATA_INDEX_ME_CREF] != (VALUE)me) {
4293 // This will be the case for super within a block
4294 return YJIT_CANT_COMPILE;
4295 }
4296
4297 ADD_COMMENT(cb, "guard known me");
4298 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
4299 x86opnd_t ep_me_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_ME_CREF);
4300 jit_mov_gc_ptr(jit, cb, REG1, (VALUE)me);
4301 cmp(cb, ep_me_opnd, REG1);
4302 jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_me_changed));
4303
4304 if (!block) {
4305 // Guard no block passed
4306 // rb_vm_frame_block_handler(GET_EC()->cfp) == VM_BLOCK_HANDLER_NONE
4307 // note, we assume VM_ASSERT(VM_ENV_LOCAL_P(ep))
4308 //
4309 // TODO: this could properly forward the current block handler, but
4310 // would require changes to gen_send_*
4311 ADD_COMMENT(cb, "guard no block given");
4312 // EP is in REG0 from above
4313 x86opnd_t ep_specval_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL);
4314 cmp(cb, ep_specval_opnd, imm_opnd(VM_BLOCK_HANDLER_NONE));
4315 jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_block));
4316 }
4317
4318 // Points to the receiver operand on the stack
4319 x86opnd_t recv = ctx_stack_opnd(ctx, argc);
4320 mov(cb, REG0, recv);
4321
4322 // We need to assume that both our current method entry and the super
4323 // method entry we invoke remain stable
4324 assume_method_lookup_stable(current_defined_class, me, jit);
4325 assume_method_lookup_stable(comptime_superclass, cme, jit);
4326
4327 // Method calls may corrupt types
4328 ctx_clear_local_types(ctx);
4329
4330 switch (cme->def->type) {
4331 case VM_METHOD_TYPE_ISEQ:
4332 return gen_send_iseq(jit, ctx, ci, cme, block, argc);
4333 case VM_METHOD_TYPE_CFUNC:
4334 return gen_send_cfunc(jit, ctx, ci, cme, block, argc, NULL);
4335 default:
4336 break;
4337 }
4338
4339 RUBY_ASSERT_ALWAYS(false);
4340}
4341
4342static codegen_status_t
4343gen_leave(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4344{
4345 // Only the return value should be on the stack
4346 RUBY_ASSERT(ctx->stack_size == 1);
4347
4348 // Create a side-exit to fall back to the interpreter
4349 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4350
4351 // Load environment pointer EP from CFP
4352 mov(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, ep));
4353
4354 // Check for interrupts
4355 ADD_COMMENT(cb, "check for interrupts");
4356 yjit_check_ints(cb, COUNTED_EXIT(jit, side_exit, leave_se_interrupt));
4357
4358 // Load the return value
4359 mov(cb, REG0, ctx_stack_pop(ctx, 1));
4360
4361 // Pop the current frame (ec->cfp++)
4362 // Note: the return PC is already in the previous CFP
4363 add(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
4364 mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
4365
4366 // Reload REG_SP for the caller and write the return value.
4367 // Top of the stack is REG_SP[0] since the caller has sp_offset=1.
4368 mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
4369 mov(cb, mem_opnd(64, REG_SP, 0), REG0);
4370
4371 // Jump to the JIT return address on the frame that was just popped
4372 const int32_t offset_to_jit_return = -((int32_t)sizeof(rb_control_frame_t)) + (int32_t)offsetof(rb_control_frame_t, jit_return);
4373 jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return));
4374
4375 return YJIT_END_BLOCK;
4376}
4377
4378RUBY_EXTERN rb_serial_t ruby_vm_global_constant_state;
4379
4380static codegen_status_t
4381gen_getglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4382{
4383 ID gid = jit_get_arg(jit, 0);
4384
4385 // Save the PC and SP because we might make a Ruby call for warning
4386 jit_prepare_routine_call(jit, ctx, REG0);
4387
4388 mov(cb, C_ARG_REGS[0], imm_opnd(gid));
4389
4390 call_ptr(cb, REG0, (void *)&rb_gvar_get);
4391
4392 x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4393 mov(cb, top, RAX);
4394
4395 return YJIT_KEEP_COMPILING;
4396}
4397
4398static codegen_status_t
4399gen_setglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4400{
4401 ID gid = jit_get_arg(jit, 0);
4402
4403 // Save the PC and SP because we might make a Ruby call for
4404 // Kernel#set_trace_var
4405 jit_prepare_routine_call(jit, ctx, REG0);
4406
4407 mov(cb, C_ARG_REGS[0], imm_opnd(gid));
4408
4409 x86opnd_t val = ctx_stack_pop(ctx, 1);
4410
4411 mov(cb, C_ARG_REGS[1], val);
4412
4413 call_ptr(cb, REG0, (void *)&rb_gvar_set);
4414
4415 return YJIT_KEEP_COMPILING;
4416}
4417
4418static codegen_status_t
4419gen_anytostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4420{
4421 // Save the PC and SP because we might make a Ruby call for
4422 // Kernel#set_trace_var
4423 jit_prepare_routine_call(jit, ctx, REG0);
4424
4425 x86opnd_t str = ctx_stack_pop(ctx, 1);
4426 x86opnd_t val = ctx_stack_pop(ctx, 1);
4427
4428 mov(cb, C_ARG_REGS[0], str);
4429 mov(cb, C_ARG_REGS[1], val);
4430
4431 call_ptr(cb, REG0, (void *)&rb_obj_as_string_result);
4432
4433 // Push the return value
4434 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
4435 mov(cb, stack_ret, RAX);
4436
4437 return YJIT_KEEP_COMPILING;
4438}
4439
4440static codegen_status_t
4441gen_objtostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4442{
4443 if (!jit_at_current_insn(jit)) {
4444 defer_compilation(jit, ctx);
4445 return YJIT_END_BLOCK;
4446 }
4447
4448 x86opnd_t recv = ctx_stack_opnd(ctx, 0);
4449 VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 0);
4450
4451 if (RB_TYPE_P(comptime_recv, T_STRING)) {
4452 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4453
4454 mov(cb, REG0, recv);
4455 jit_guard_known_klass(jit, ctx, CLASS_OF(comptime_recv), OPND_STACK(0), comptime_recv, SEND_MAX_DEPTH, side_exit);
4456 // No work needed. The string value is already on the top of the stack.
4457 return YJIT_KEEP_COMPILING;
4458 }
4459 else {
4460 struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
4461 return gen_send_general(jit, ctx, cd, NULL);
4462 }
4463}
4464
4465static codegen_status_t
4466gen_toregexp(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4467{
4468 rb_num_t opt = jit_get_arg(jit, 0);
4469 rb_num_t cnt = jit_get_arg(jit, 1);
4470
4471 // Save the PC and SP because this allocates an object and could
4472 // raise an exception.
4473 jit_prepare_routine_call(jit, ctx, REG0);
4474
4475 x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(sizeof(VALUE) * (uint32_t)cnt));
4476 ctx_stack_pop(ctx, cnt);
4477
4478 mov(cb, C_ARG_REGS[0], imm_opnd(0));
4479 mov(cb, C_ARG_REGS[1], imm_opnd(cnt));
4480 lea(cb, C_ARG_REGS[2], values_ptr);
4481 call_ptr(cb, REG0, (void *)&rb_ary_tmp_new_from_values);
4482
4483 // Save the array so we can clear it later
4484 push(cb, RAX);
4485 push(cb, RAX); // Alignment
4486 mov(cb, C_ARG_REGS[0], RAX);
4487 mov(cb, C_ARG_REGS[1], imm_opnd(opt));
4488 call_ptr(cb, REG0, (void *)&rb_reg_new_ary);
4489
4490 // The actual regex is in RAX now. Pop the temp array from
4491 // rb_ary_tmp_new_from_values into C arg regs so we can clear it
4492 pop(cb, REG1); // Alignment
4493 pop(cb, C_ARG_REGS[0]);
4494
4495 // The value we want to push on the stack is in RAX right now
4496 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4497 mov(cb, stack_ret, RAX);
4498
4499 // Clear the temp array.
4500 call_ptr(cb, REG0, (void *)&rb_ary_clear);
4501
4502 return YJIT_KEEP_COMPILING;
4503}
4504
4505static codegen_status_t
4506gen_intern(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4507{
4508 // Save the PC and SP because we might allocate
4509 jit_prepare_routine_call(jit, ctx, REG0);
4510
4511 x86opnd_t str = ctx_stack_pop(ctx, 1);
4512
4513 mov(cb, C_ARG_REGS[0], str);
4514
4515 call_ptr(cb, REG0, (void *)&rb_str_intern);
4516
4517 // Push the return value
4518 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4519 mov(cb, stack_ret, RAX);
4520
4521 return YJIT_KEEP_COMPILING;
4522}
4523
4524static codegen_status_t
4525gen_getspecial(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4526{
4527 // This takes two arguments, key and type
4528 // key is only used when type == 0
4529 // A non-zero type determines which type of backref to fetch
4530 //rb_num_t key = jit_get_arg(jit, 0);
4531 rb_num_t type = jit_get_arg(jit, 1);
4532
4533 if (type == 0) {
4534 // not yet implemented
4535 return YJIT_CANT_COMPILE;
4536 }
4537 else if (type & 0x01) {
4538 // Fetch a "special" backref based on a char encoded by shifting by 1
4539
4540 // Can raise if matchdata uninitialized
4541 jit_prepare_routine_call(jit, ctx, REG0);
4542
4543 // call rb_backref_get()
4544 ADD_COMMENT(cb, "rb_backref_get");
4545 call_ptr(cb, REG0, (void *)rb_backref_get);
4546 mov(cb, C_ARG_REGS[0], RAX);
4547
4548 switch (type >> 1) {
4549 case '&':
4550 ADD_COMMENT(cb, "rb_reg_last_match");
4551 call_ptr(cb, REG0, (void *)rb_reg_last_match);
4552 break;
4553 case '`':
4554 ADD_COMMENT(cb, "rb_reg_match_pre");
4555 call_ptr(cb, REG0, (void *)rb_reg_match_pre);
4556 break;
4557 case '\'':
4558 ADD_COMMENT(cb, "rb_reg_match_post");
4559 call_ptr(cb, REG0, (void *)rb_reg_match_post);
4560 break;
4561 case '+':
4562 ADD_COMMENT(cb, "rb_reg_match_last");
4563 call_ptr(cb, REG0, (void *)rb_reg_match_last);
4564 break;
4565 default:
4566 rb_bug("invalid back-ref");
4567 }
4568
4569 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4570 mov(cb, stack_ret, RAX);
4571
4572 return YJIT_KEEP_COMPILING;
4573 }
4574 else {
4575 // Fetch the N-th match from the last backref based on type shifted by 1
4576
4577 // Can raise if matchdata uninitialized
4578 jit_prepare_routine_call(jit, ctx, REG0);
4579
4580 // call rb_backref_get()
4581 ADD_COMMENT(cb, "rb_backref_get");
4582 call_ptr(cb, REG0, (void *)rb_backref_get);
4583
4584 // rb_reg_nth_match((int)(type >> 1), backref);
4585 ADD_COMMENT(cb, "rb_reg_nth_match");
4586 mov(cb, C_ARG_REGS[0], imm_opnd(type >> 1));
4587 mov(cb, C_ARG_REGS[1], RAX);
4588 call_ptr(cb, REG0, (void *)rb_reg_nth_match);
4589
4590 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4591 mov(cb, stack_ret, RAX);
4592
4593 return YJIT_KEEP_COMPILING;
4594 }
4595}
4596
4597VALUE
4598rb_vm_getclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, ICVARC ic);
4599
4600static codegen_status_t
4601gen_getclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
4602{
4603 // rb_vm_getclassvariable can raise exceptions.
4604 jit_prepare_routine_call(jit, ctx, REG0);
4605
4606 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
4607 mov(cb, C_ARG_REGS[1], REG_CFP);
4608 mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
4609 mov(cb, C_ARG_REGS[3], imm_opnd(jit_get_arg(jit, 1)));
4610
4611 call_ptr(cb, REG0, (void *)rb_vm_getclassvariable);
4612
4613 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4614 mov(cb, stack_top, RAX);
4615
4616 return YJIT_KEEP_COMPILING;
4617}
4618
4619VALUE
4620rb_vm_setclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, VALUE val, ICVARC ic);
4621
4622static codegen_status_t
4623gen_setclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
4624{
4625 // rb_vm_setclassvariable can raise exceptions.
4626 jit_prepare_routine_call(jit, ctx, REG0);
4627
4628 mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
4629 mov(cb, C_ARG_REGS[1], REG_CFP);
4630 mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
4631 mov(cb, C_ARG_REGS[3], ctx_stack_pop(ctx, 1));
4632 mov(cb, C_ARG_REGS[4], imm_opnd(jit_get_arg(jit, 1)));
4633
4634 call_ptr(cb, REG0, (void *)rb_vm_setclassvariable);
4635
4636 return YJIT_KEEP_COMPILING;
4637}
4638
4639static codegen_status_t
4640gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4641{
4642 VALUE jump_offset = jit_get_arg(jit, 0);
4643 VALUE const_cache_as_value = jit_get_arg(jit, 1);
4644 IC ic = (IC)const_cache_as_value;
4645
4646 // See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update().
4647 struct iseq_inline_constant_cache_entry *ice = ic->entry;
4648 if (!ice || // cache not filled
4649 GET_IC_SERIAL(ice) != ruby_vm_global_constant_state /* cache out of date */) {
4650 // In these cases, leave a block that unconditionally side exits
4651 // for the interpreter to invalidate.
4652 return YJIT_CANT_COMPILE;
4653 }
4654
4655 // Make sure there is an exit for this block as the interpreter might want
4656 // to invalidate this block from yjit_constant_ic_update().
4657 jit_ensure_block_entry_exit(jit);
4658
4659 if (ice->ic_cref) {
4660 // Cache is keyed on a certain lexical scope. Use the interpreter's cache.
4661 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4662
4663 // Call function to verify the cache. It doesn't allocate or call methods.
4664 bool rb_vm_ic_hit_p(IC ic, const VALUE *reg_ep);
4665 mov(cb, C_ARG_REGS[0], const_ptr_opnd((void *)ic));
4666 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, ep));
4667 call_ptr(cb, REG0, (void *)rb_vm_ic_hit_p);
4668
4669 // Check the result. _Bool is one byte in SysV.
4670 test(cb, AL, AL);
4671 jz_ptr(cb, COUNTED_EXIT(jit, side_exit, opt_getinlinecache_miss));
4672
4673 // Push ic->entry->value
4674 mov(cb, REG0, const_ptr_opnd((void *)ic));
4675 mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache, entry));
4676 x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
4677 mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache_entry, value));
4678 mov(cb, stack_top, REG0);
4679 }
4680 else {
4681 // Optimize for single ractor mode.
4682 // FIXME: This leaks when st_insert raises NoMemoryError
4683 if (!assume_single_ractor_mode(jit)) return YJIT_CANT_COMPILE;
4684
4685 // Invalidate output code on any and all constant writes
4686 // FIXME: This leaks when st_insert raises NoMemoryError
4687 assume_stable_global_constant_state(jit);
4688
4689 jit_putobject(jit, ctx, ice->value);
4690 }
4691
4692 // Jump over the code for filling the cache
4693 uint32_t jump_idx = jit_next_insn_idx(jit) + (int32_t)jump_offset;
4694 gen_direct_jump(
4695 jit,
4696 ctx,
4697 (blockid_t){ .iseq = jit->iseq, .idx = jump_idx }
4698 );
4699
4700 return YJIT_END_BLOCK;
4701}
4702
4703// Push the explicit block parameter onto the temporary stack. Part of the
4704// interpreter's scheme for avoiding Proc allocations when delegating
4705// explicit block parameters.
4706static codegen_status_t
4707gen_getblockparamproxy(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4708{
4709 // A mirror of the interpreter code. Checking for the case
4710 // where it's pushing rb_block_param_proxy.
4711 uint8_t *side_exit = yjit_side_exit(jit, ctx);
4712
4713 // EP level
4714 uint32_t level = (uint32_t)jit_get_arg(jit, 1);
4715
4716 // Load environment pointer EP from CFP
4717 gen_get_ep(cb, REG0, level);
4718
4719 // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero
4720 test(cb, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_FLAGS), imm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM));
4721 jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_param_modified));
4722
4723 // Load the block handler for the current frame
4724 // note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
4725 mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
4726
4727 // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P().
4728 and(cb, REG0_8, imm_opnd(0x3));
4729
4730 // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null.
4731 cmp(cb, REG0_8, imm_opnd(0x1));
4732 jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_handler_not_iseq));
4733
4734 // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr.
4735 mov(cb, REG0, const_ptr_opnd((void *)rb_block_param_proxy));
4736 RUBY_ASSERT(!SPECIAL_CONST_P(rb_block_param_proxy));
4737 x86opnd_t top = ctx_stack_push(ctx, TYPE_HEAP);
4738 mov(cb, top, REG0);
4739
4740 return YJIT_KEEP_COMPILING;
4741}
4742
4743static codegen_status_t
4744gen_invokebuiltin(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4745{
4746 const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
4747
4748 // ec, self, and arguments
4749 if (bf->argc + 2 > NUM_C_ARG_REGS) {
4750 return YJIT_CANT_COMPILE;
4751 }
4752
4753 // If the calls don't allocate, do they need up to date PC, SP?
4754 jit_prepare_routine_call(jit, ctx, REG0);
4755
4756 // Call the builtin func (ec, recv, arg1, arg2, ...)
4757 mov(cb, C_ARG_REGS[0], REG_EC);
4758 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
4759
4760 // Copy arguments from locals
4761 for (int32_t i = 0; i < bf->argc; i++) {
4762 x86opnd_t stack_opnd = ctx_stack_opnd(ctx, bf->argc - i - 1);
4763 x86opnd_t c_arg_reg = C_ARG_REGS[2 + i];
4764 mov(cb, c_arg_reg, stack_opnd);
4765 }
4766
4767 call_ptr(cb, REG0, (void *)bf->func_ptr);
4768
4769 // Push the return value
4770 ctx_stack_pop(ctx, bf->argc);
4771 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4772 mov(cb, stack_ret, RAX);
4773
4774 return YJIT_KEEP_COMPILING;
4775}
4776
4777// opt_invokebuiltin_delegate calls a builtin function, like
4778// invokebuiltin does, but instead of taking arguments from the top of the
4779// stack uses the argument locals (and self) from the current method.
4780static codegen_status_t
4781gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
4782{
4783 const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
4784 int32_t start_index = (int32_t)jit_get_arg(jit, 1);
4785
4786 // ec, self, and arguments
4787 if (bf->argc + 2 > NUM_C_ARG_REGS) {
4788 return YJIT_CANT_COMPILE;
4789 }
4790
4791 // If the calls don't allocate, do they need up to date PC, SP?
4792 jit_prepare_routine_call(jit, ctx, REG0);
4793
4794 if (bf->argc > 0) {
4795 // Load environment pointer EP from CFP
4796 mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
4797 }
4798
4799 // Call the builtin func (ec, recv, arg1, arg2, ...)
4800 mov(cb, C_ARG_REGS[0], REG_EC);
4801 mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
4802
4803 // Copy arguments from locals
4804 for (int32_t i = 0; i < bf->argc; i++) {
4805 const int32_t offs = -jit->iseq->body->local_table_size - VM_ENV_DATA_SIZE + 1 + start_index + i;
4806 x86opnd_t local_opnd = mem_opnd(64, REG0, offs * SIZEOF_VALUE);
4807 x86opnd_t c_arg_reg = C_ARG_REGS[i + 2];
4808 mov(cb, c_arg_reg, local_opnd);
4809 }
4810 call_ptr(cb, REG0, (void *)bf->func_ptr);
4811
4812 // Push the return value
4813 x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
4814 mov(cb, stack_ret, RAX);
4815
4816 return YJIT_KEEP_COMPILING;
4817}
4818
4819static int tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data);
4820static void invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq);
4821
4822// Invalidate all generated code and patch C method return code to contain
4823// logic for firing the c_return TracePoint event. Once rb_vm_barrier()
4824// returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
4825// means they are inside a C routine. If there are any generated code on-stack,
4826// they are waiting for a return from a C routine. For every routine call, we
4827// patch in an exit after the body of the containing VM instruction. This makes
4828// it so all the invalidated code exit as soon as execution logically reaches
4829// the next VM instruction. The interpreter takes care of firing the tracing
4830// event if it so happens that the next VM instruction has one attached.
4831//
4832// The c_return event needs special handling as our codegen never outputs code
4833// that contains tracing logic. If we let the normal output code run until the
4834// start of the next VM instruction by relying on the patching scheme above, we
4835// would fail to fire the c_return event. The interpreter doesn't fire the
4836// event at an instruction boundary, so simply exiting to the interpreter isn't
4837// enough. To handle it, we patch in the full logic at the return address. See
4838// full_cfunc_return().
4839//
4840// In addition to patching, we prevent future entries into invalidated code by
4841// removing all live blocks from their iseq.
4842void
4843rb_yjit_tracing_invalidate_all(void)
4844{
4845 if (!rb_yjit_enabled_p()) return;
4846
4847 // Stop other ractors since we are going to patch machine code.
4848 RB_VM_LOCK_ENTER();
4849 rb_vm_barrier();
4850
4851 // Make it so all live block versions are no longer valid branch targets
4852 rb_objspace_each_objects(tracing_invalidate_all_i, NULL);
4853
4854 // Apply patches
4855 const uint32_t old_pos = cb->write_pos;
4856 rb_darray_for(global_inval_patches, patch_idx) {
4857 struct codepage_patch patch = rb_darray_get(global_inval_patches, patch_idx);
4858 cb_set_pos(cb, patch.inline_patch_pos);
4859 uint8_t *jump_target = cb_get_ptr(ocb, patch.outlined_target_pos);
4860 jmp_ptr(cb, jump_target);
4861 }
4862 cb_set_pos(cb, old_pos);
4863
4864 // Freeze invalidated part of the codepage. We only want to wait for
4865 // running instances of the code to exit from now on, so we shouldn't
4866 // change the code. There could be other ractors sleeping in
4867 // branch_stub_hit(), for example. We could harden this by changing memory
4868 // protection on the frozen range.
4869 RUBY_ASSERT_ALWAYS(yjit_codepage_frozen_bytes <= old_pos && "frozen bytes should increase monotonically");
4870 yjit_codepage_frozen_bytes = old_pos;
4871
4872 cb_mark_all_executable(ocb);
4873 cb_mark_all_executable(cb);
4874 RB_VM_LOCK_LEAVE();
4875}
4876
4877static int
4878tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data)
4879{
4880 VALUE v = (VALUE)vstart;
4881 for (; v != (VALUE)vend; v += stride) {
4882 void *ptr = asan_poisoned_object_p(v);
4883 asan_unpoison_object(v, false);
4884
4885 if (rb_obj_is_iseq(v)) {
4886 rb_iseq_t *iseq = (rb_iseq_t *)v;
4887 invalidate_all_blocks_for_tracing(iseq);
4888 }
4889
4890 asan_poison_object_if(ptr, v);
4891 }
4892 return 0;
4893}
4894
4895static void
4896invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq)
4897{
4898 struct rb_iseq_constant_body *body = iseq->body;
4899 if (!body) return; // iseq yet to be initialized
4900
4901 ASSERT_vm_locking();
4902
4903 // Empty all blocks on the iseq so we don't compile new blocks that jump to the
4904 // invalidted region.
4905 // TODO Leaking the blocks for now since we might have situations where
4906 // a different ractor is waiting in branch_stub_hit(). If we free the block
4907 // that ractor can wake up with a dangling block.
4908 rb_darray_for(body->yjit_blocks, version_array_idx) {
4909 rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
4910 rb_darray_for(version_array, version_idx) {
4911 // Stop listening for invalidation events like basic operation redefinition.
4912 block_t *block = rb_darray_get(version_array, version_idx);
4913 yjit_unlink_method_lookup_dependency(block);
4914 yjit_block_assumptions_free(block);
4915 }
4916 rb_darray_free(version_array);
4917 }
4918 rb_darray_free(body->yjit_blocks);
4919 body->yjit_blocks = NULL;
4920
4921#if USE_MJIT
4922 // Reset output code entry point
4923 body->jit_func = NULL;
4924#endif
4925}
4926
4927static void
4928yjit_reg_op(int opcode, codegen_fn gen_fn)
4929{
4930 RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
4931 // Check that the op wasn't previously registered
4932 RUBY_ASSERT(gen_fns[opcode] == NULL);
4933
4934 gen_fns[opcode] = gen_fn;
4935}
4936
4937void
4938yjit_init_codegen(void)
4939{
4940 // Initialize the code blocks
4941 uint32_t mem_size = rb_yjit_opts.exec_mem_size * 1024 * 1024;
4942 uint8_t *mem_block = alloc_exec_mem(mem_size);
4943
4944 cb = &block;
4945 cb_init(cb, mem_block, mem_size/2);
4946
4947 ocb = &outline_block;
4948 cb_init(ocb, mem_block + mem_size/2, mem_size/2);
4949
4950 // Generate the interpreter exit code for leave
4951 leave_exit_code = yjit_gen_leave_exit(cb);
4952
4953 // Generate full exit code for C func
4954 gen_full_cfunc_return();
4955 cb_mark_all_executable(cb);
4956
4957 // Map YARV opcodes to the corresponding codegen functions
4958 yjit_reg_op(BIN(nop), gen_nop);
4959 yjit_reg_op(BIN(dup), gen_dup);
4960 yjit_reg_op(BIN(dupn), gen_dupn);
4961 yjit_reg_op(BIN(swap), gen_swap);
4962 yjit_reg_op(BIN(setn), gen_setn);
4963 yjit_reg_op(BIN(topn), gen_topn);
4964 yjit_reg_op(BIN(pop), gen_pop);
4965 yjit_reg_op(BIN(adjuststack), gen_adjuststack);
4966 yjit_reg_op(BIN(newarray), gen_newarray);
4967 yjit_reg_op(BIN(duparray), gen_duparray);
4968 yjit_reg_op(BIN(duphash), gen_duphash);
4969 yjit_reg_op(BIN(splatarray), gen_splatarray);
4970 yjit_reg_op(BIN(expandarray), gen_expandarray);
4971 yjit_reg_op(BIN(newhash), gen_newhash);
4972 yjit_reg_op(BIN(newrange), gen_newrange);
4973 yjit_reg_op(BIN(concatstrings), gen_concatstrings);
4974 yjit_reg_op(BIN(putnil), gen_putnil);
4975 yjit_reg_op(BIN(putobject), gen_putobject);
4976 yjit_reg_op(BIN(putstring), gen_putstring);
4977 yjit_reg_op(BIN(putobject_INT2FIX_0_), gen_putobject_int2fix);
4978 yjit_reg_op(BIN(putobject_INT2FIX_1_), gen_putobject_int2fix);
4979 yjit_reg_op(BIN(putself), gen_putself);
4980 yjit_reg_op(BIN(putspecialobject), gen_putspecialobject);
4981 yjit_reg_op(BIN(getlocal), gen_getlocal);
4982 yjit_reg_op(BIN(getlocal_WC_0), gen_getlocal_wc0);
4983 yjit_reg_op(BIN(getlocal_WC_1), gen_getlocal_wc1);
4984 yjit_reg_op(BIN(setlocal), gen_setlocal);
4985 yjit_reg_op(BIN(setlocal_WC_0), gen_setlocal_wc0);
4986 yjit_reg_op(BIN(setlocal_WC_1), gen_setlocal_wc1);
4987 yjit_reg_op(BIN(getinstancevariable), gen_getinstancevariable);
4988 yjit_reg_op(BIN(setinstancevariable), gen_setinstancevariable);
4989 yjit_reg_op(BIN(defined), gen_defined);
4990 yjit_reg_op(BIN(checktype), gen_checktype);
4991 yjit_reg_op(BIN(checkkeyword), gen_checkkeyword);
4992 yjit_reg_op(BIN(opt_lt), gen_opt_lt);
4993 yjit_reg_op(BIN(opt_le), gen_opt_le);
4994 yjit_reg_op(BIN(opt_ge), gen_opt_ge);
4995 yjit_reg_op(BIN(opt_gt), gen_opt_gt);
4996 yjit_reg_op(BIN(opt_eq), gen_opt_eq);
4997 yjit_reg_op(BIN(opt_neq), gen_opt_neq);
4998 yjit_reg_op(BIN(opt_aref), gen_opt_aref);
4999 yjit_reg_op(BIN(opt_aset), gen_opt_aset);
5000 yjit_reg_op(BIN(opt_and), gen_opt_and);
5001 yjit_reg_op(BIN(opt_or), gen_opt_or);
5002 yjit_reg_op(BIN(opt_minus), gen_opt_minus);
5003 yjit_reg_op(BIN(opt_plus), gen_opt_plus);
5004 yjit_reg_op(BIN(opt_mult), gen_opt_mult);
5005 yjit_reg_op(BIN(opt_div), gen_opt_div);
5006 yjit_reg_op(BIN(opt_mod), gen_opt_mod);
5007 yjit_reg_op(BIN(opt_ltlt), gen_opt_ltlt);
5008 yjit_reg_op(BIN(opt_nil_p), gen_opt_nil_p);
5009 yjit_reg_op(BIN(opt_empty_p), gen_opt_empty_p);
5010 yjit_reg_op(BIN(opt_str_freeze), gen_opt_str_freeze);
5011 yjit_reg_op(BIN(opt_str_uminus), gen_opt_str_uminus);
5012 yjit_reg_op(BIN(opt_not), gen_opt_not);
5013 yjit_reg_op(BIN(opt_size), gen_opt_size);
5014 yjit_reg_op(BIN(opt_length), gen_opt_length);
5015 yjit_reg_op(BIN(opt_regexpmatch2), gen_opt_regexpmatch2);
5016 yjit_reg_op(BIN(opt_getinlinecache), gen_opt_getinlinecache);
5017 yjit_reg_op(BIN(invokebuiltin), gen_invokebuiltin);
5018 yjit_reg_op(BIN(opt_invokebuiltin_delegate), gen_opt_invokebuiltin_delegate);
5019 yjit_reg_op(BIN(opt_invokebuiltin_delegate_leave), gen_opt_invokebuiltin_delegate);
5020 yjit_reg_op(BIN(opt_case_dispatch), gen_opt_case_dispatch);
5021 yjit_reg_op(BIN(branchif), gen_branchif);
5022 yjit_reg_op(BIN(branchunless), gen_branchunless);
5023 yjit_reg_op(BIN(branchnil), gen_branchnil);
5024 yjit_reg_op(BIN(jump), gen_jump);
5025 yjit_reg_op(BIN(getblockparamproxy), gen_getblockparamproxy);
5026 yjit_reg_op(BIN(opt_send_without_block), gen_opt_send_without_block);
5027 yjit_reg_op(BIN(send), gen_send);
5028 yjit_reg_op(BIN(invokesuper), gen_invokesuper);
5029 yjit_reg_op(BIN(leave), gen_leave);
5030 yjit_reg_op(BIN(getglobal), gen_getglobal);
5031 yjit_reg_op(BIN(setglobal), gen_setglobal);
5032 yjit_reg_op(BIN(anytostring), gen_anytostring);
5033 yjit_reg_op(BIN(objtostring), gen_objtostring);
5034 yjit_reg_op(BIN(toregexp), gen_toregexp);
5035 yjit_reg_op(BIN(intern), gen_intern);
5036 yjit_reg_op(BIN(getspecial), gen_getspecial);
5037 yjit_reg_op(BIN(getclassvariable), gen_getclassvariable);
5038 yjit_reg_op(BIN(setclassvariable), gen_setclassvariable);
5039
5040 yjit_method_codegen_table = st_init_numtable();
5041
5042 // Specialization for C methods. See yjit_reg_method() for details.
5043 yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not);
5044
5045 yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true);
5046 yjit_reg_method(rb_mKernel, "nil?", jit_rb_false);
5047
5048 yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal);
5049 yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal);
5050 yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal);
5051 yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal);
5052 yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal);
5053 yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal);
5054
5055 // rb_str_to_s() methods in string.c
5056 yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s);
5057 yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s);
5058 yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
5059
5060 // Thread.current
5061 yjit_reg_method(rb_singleton_class(rb_cThread), "current", jit_thread_s_current);
5062}
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition: assert.h:177
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
Definition: assert.h:167
#define RUBY_EXTERN
Declaration of externally visible global variables.
Definition: dllexport.h:47
#define RUBY_EVENT_C_CALL
A method, written in C, is called.
Definition: event.h:39
#define RUBY_EVENT_C_RETURN
Return from a method, written in C.
Definition: event.h:40
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implenentation detail of RB_FL_TEST().
Definition: fl_type.h:507
#define FL_SINGLETON
Old name of RUBY_FL_SINGLETON.
Definition: fl_type.h:58
#define T_STRING
Old name of RUBY_T_STRING.
Definition: value_type.h:78
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
Definition: long.h:48
#define SPECIAL_CONST_P
Old name of RB_SPECIAL_CONST_P.
#define T_STRUCT
Old name of RUBY_T_STRUCT.
Definition: value_type.h:79
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
Definition: assume.h:31
#define SYM2ID
Old name of RB_SYM2ID.
Definition: symbol.h:45
#define CLASS_OF
Old name of rb_class_of.
Definition: globals.h:203
#define STATIC_SYM_P
Old name of RB_STATIC_SYM_P.
#define T_ICLASS
Old name of RUBY_T_ICLASS.
Definition: value_type.h:66
#define T_HASH
Old name of RUBY_T_HASH.
Definition: value_type.h:65
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
Definition: fl_type.h:140
#define LONG2NUM
Old name of RB_LONG2NUM.
Definition: long.h:50
#define FLONUM_P
Old name of RB_FLONUM_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
Definition: value_type.h:56
#define T_OBJECT
Old name of RUBY_T_OBJECT.
Definition: value_type.h:75
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
Definition: value_type.h:85
#define FL_TEST
Old name of RB_FL_TEST.
Definition: fl_type.h:139
#define FIXNUM_P
Old name of RB_FIXNUM_P.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition: error.c:802
VALUE rb_cArray
Array class.
Definition: array.c:40
VALUE rb_cInteger
Module class.
Definition: numeric.c:192
VALUE rb_cHash
Hash class.
Definition: hash.c:92
VALUE rb_cSymbol
Sumbol class.
Definition: string.c:81
VALUE rb_cThread
Thread class.
Definition: vm.c:397
VALUE rb_cFloat
Float class.
Definition: numeric.c:191
VALUE rb_cString
String class.
Definition: string.c:80
VALUE rb_ary_resurrect(VALUE ary)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
Definition: array.c:2676
VALUE rb_ary_clear(VALUE ary)
Destructively removes everything form an array.
Definition: array.c:4465
void rb_ary_store(VALUE ary, long key, VALUE val)
Destructively stores the passed value to the passed array's passed index.
Definition: array.c:1148
void rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash)
Inserts a list of key-value pairs into a hash table at once.
Definition: hash.c:4753
VALUE rb_hash_aref(VALUE hash, VALUE key)
Queries the given key in the given hash table.
Definition: hash.c:2082
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Inserts or replaces ("upsert"s) the objects into the given hash table.
Definition: hash.c:2903
VALUE rb_hash_new(void)
Creates a new, empty hash object.
Definition: hash.c:1529
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
Definition: vm.c:1580
VALUE rb_range_new(VALUE beg, VALUE end, int excl)
Creates a new Range.
Definition: range.c:67
VALUE rb_reg_last_match(VALUE md)
This just returns the argument, stringified.
Definition: re.c:1818
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
Definition: re.c:1793
VALUE rb_reg_match_post(VALUE md)
The portion of the original string after the given match.
Definition: re.c:1862
VALUE rb_reg_match_pre(VALUE md)
The portion of the original string before the given match.
Definition: re.c:1836
VALUE rb_reg_match_last(VALUE md)
The portion of the original string that captured at the very last.
Definition: re.c:1879
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
Definition: symbol.c:837
VALUE rb_attr_get(VALUE obj, ID name)
Identical to rb_ivar_get()
Definition: variable.c:1293
VALUE rb_ivar_get(VALUE obj, ID name)
Identical to rb_iv_get(), except it accepts the name as an ID instead of a C string.
Definition: variable.c:1285
rb_alloc_func_t rb_get_alloc_func(VALUE klass)
Queries the allocator function of a class.
Definition: vm_method.c:1123
ID rb_intern(const char *name)
Finds or creates a symbol of the given name.
Definition: symbol.c:782
const char * rb_id2name(ID id)
Retrieves the name mapped to the given id.
Definition: symbol.c:941
#define RBIMPL_ATTR_MAYBE_UNUSED()
Wraps (or simulates) [[maybe_unused]]
Definition: maybe_unused.h:33
#define ALLOCA_N(type, n)
Definition: memory.h:286
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
@ RARRAY_EMBED_LEN_SHIFT
Where ::RARRAY_EMBED_LEN_MASK resides.
Definition: rarray.h:159
#define RBASIC(obj)
Convenient casting macro.
Definition: rbasic.h:40
#define RCLASS_SUPER
Just another name of rb_class_get_superclass.
Definition: rclass.h:46
@ ROBJECT_EMBED_LEN_MAX
Max possible number of instance variables that can be embedded.
Definition: robject.h:84
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
Definition: rstring.h:573
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
Definition: rstring.h:483
static long RSTRUCT_LEN(VALUE st)
Returns the number of struct members.
Definition: rstruct.h:94
static VALUE RSTRUCT_SET(VALUE st, int k, VALUE v)
Resembles Struct#[]=.
Definition: rstruct.h:104
static bool RB_FIXNUM_P(VALUE obj)
Checks if the given object is a so-called Fixnum.
@ RUBY_SPECIAL_SHIFT
Least significant 8 bits are reserved.
@ RUBY_FIXNUM_FLAG
Flag to denote a fixnum.
@ RUBY_FLONUM_MASK
Bit mask detecting a flonum.
@ RUBY_FLONUM_FLAG
Flag to denote a flonum.
@ RUBY_SYMBOL_FLAG
Flag to denote a static symbol.
@ RUBY_IMMEDIATE_MASK
Bit mask detecting special consts.
Ruby's array.
const VALUE ary[RARRAY_EMBED_LEN_MAX]
Embedded elements.
long len
Number of elements of the array.
const VALUE * ptr
Pointer to the C array that holds the elements of the array.
Ruby's object's, base components.
Definition: rbasic.h:64
Ruby's ordinal objects.
VALUE * ivptr
Pointer to a C array that holds instance variables.
uint32_t numiv
Number of instance variables.
VALUE ary[ROBJECT_EMBED_LEN_MAX]
Embedded instance variables.
Basic block version Represents a portion of an iseq compiled with a given context Note: care must be ...
Definition: yjit_core.h:237
Code generation context Contains information we can use to optimize code.
Definition: yjit_core.h:134
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition: value.h:52
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...
Definition: value.h:69
uintptr_t VALUE
Type that represents a Ruby object.
Definition: value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition: value_type.h:375
ruby_value_type
C-level type of an object.
Definition: value_type.h:112
@ RUBY_T_MASK
Bitmask of ruby_value_type.
Definition: value_type.h:144