Ruby 3.1.3p185 (2022-11-24 revision 1a6b16756e0ba6b95ab71a441357ed5484e33498)
mjit_worker.c
1/**********************************************************************
2
3 mjit_worker.c - Worker for MRI method JIT compiler
4
5 Copyright (C) 2017 Vladimir Makarov <vmakarov@redhat.com>.
6
7**********************************************************************/
8
9// NOTE: All functions in this file are executed on MJIT worker. So don't
10// call Ruby methods (C functions that may call rb_funcall) or trigger
11// GC (using ZALLOC, xmalloc, xfree, etc.) in this file.
12
13/* However, note that calling `free` for resources `xmalloc`-ed in mjit.c,
14 which is currently done in some places, is sometimes problematic in the
15 following situations:
16
17 * malloc library could be different between interpreter and extensions
18 on Windows (perhaps not applicable to MJIT because CC is the same)
19 * xmalloc -> free leaks extra space used for USE_GC_MALLOC_OBJ_INFO_DETAILS
20 (not enabled by default)
21
22 ...in short, it's usually not a problem in MJIT. But maybe it's worth
23 fixing for consistency or for USE_GC_MALLOC_OBJ_INFO_DETAILS support.
24*/
25
26/* We utilize widely used C compilers (GCC and LLVM Clang) to
27 implement MJIT. We feed them a C code generated from ISEQ. The
28 industrial C compilers are slower than regular JIT engines.
29 Generated code performance of the used C compilers has a higher
30 priority over the compilation speed.
31
32 So our major goal is to minimize the ISEQ compilation time when we
33 use widely optimization level (-O2). It is achieved by
34
35 o Using a precompiled version of the header
36 o Keeping all files in `/tmp`. On modern Linux `/tmp` is a file
37 system in memory. So it is pretty fast
38 o Implementing MJIT as a multi-threaded code because we want to
39 compile ISEQs in parallel with iseq execution to speed up Ruby
40 code execution. MJIT has one thread (*worker*) to do
41 parallel compilations:
42 o It prepares a precompiled code of the minimized header.
43 It starts at the MRI execution start
44 o It generates PIC object files of ISEQs
45 o It takes one JIT unit from a priority queue unless it is empty.
46 o It translates the JIT unit ISEQ into C-code using the precompiled
47 header, calls CC and load PIC code when it is ready
48 o Currently MJIT put ISEQ in the queue when ISEQ is called
49 o MJIT can reorder ISEQs in the queue if some ISEQ has been called
50 many times and its compilation did not start yet
51 o MRI reuses the machine code if it already exists for ISEQ
52 o The machine code we generate can stop and switch to the ISEQ
53 interpretation if some condition is not satisfied as the machine
54 code can be speculative or some exception raises
55 o Speculative machine code can be canceled.
56
57 Here is a diagram showing the MJIT organization:
58
59 _______
60 |header |
61 |_______|
62 | MRI building
63 --------------|----------------------------------------
64 | MRI execution
65 |
66 _____________|_____
67 | | |
68 | ___V__ | CC ____________________
69 | | |----------->| precompiled header |
70 | | | | |____________________|
71 | | | | |
72 | | MJIT | | |
73 | | | | |
74 | | | | ____V___ CC __________
75 | |______|----------->| C code |--->| .so file |
76 | | |________| |__________|
77 | | |
78 | | |
79 | MRI machine code |<-----------------------------
80 |___________________| loading
81
82*/
83
84#ifdef __sun
85#define __EXTENSIONS__ 1
86#endif
87
88#include "vm_core.h"
89#include "vm_callinfo.h"
90#include "mjit.h"
91#include "gc.h"
92#include "ruby_assert.h"
93#include "ruby/debug.h"
94#include "ruby/thread.h"
95#include "ruby/version.h"
96#include "builtin.h"
97#include "insns.inc"
98#include "insns_info.inc"
99#include "internal/compile.h"
100
101#ifdef _WIN32
102#include <winsock2.h>
103#include <windows.h>
104#else
105#include <sys/wait.h>
106#include <sys/time.h>
107#include <dlfcn.h>
108#endif
109#include <errno.h>
110#ifdef HAVE_FCNTL_H
111#include <fcntl.h>
112#endif
113#ifdef HAVE_SYS_PARAM_H
114# include <sys/param.h>
115#endif
116#include "dln.h"
117
118#include "ruby/util.h"
119#undef strdup // ruby_strdup may trigger GC
120
121#ifndef MAXPATHLEN
122# define MAXPATHLEN 1024
123#endif
124
125#ifdef _WIN32
126#define dlopen(name,flag) ((void*)LoadLibrary(name))
127#define dlerror() strerror(rb_w32_map_errno(GetLastError()))
128#define dlsym(handle,name) ((void*)GetProcAddress((handle),(name)))
129#define dlclose(handle) (!FreeLibrary(handle))
130#define RTLD_NOW -1
131
132#define waitpid(pid,stat_loc,options) (WaitForSingleObject((HANDLE)(pid), INFINITE), GetExitCodeProcess((HANDLE)(pid), (LPDWORD)(stat_loc)), CloseHandle((HANDLE)pid), (pid))
133#define WIFEXITED(S) ((S) != STILL_ACTIVE)
134#define WEXITSTATUS(S) (S)
135#define WIFSIGNALED(S) (0)
136typedef intptr_t pid_t;
137#endif
138
139// Atomically set function pointer if possible.
140#define MJIT_ATOMIC_SET(var, val) (void)ATOMIC_PTR_EXCHANGE(var, val)
141
142#define MJIT_TMP_PREFIX "_ruby_mjit_"
143
144// JIT compaction requires the header transformation because linking multiple .o files
145// doesn't work without having `static` in the same function definitions. We currently
146// don't support transforming the MJIT header on Windows.
147#ifdef _WIN32
148# define USE_JIT_COMPACTION 0
149#else
150# define USE_JIT_COMPACTION 1
151#endif
152
153// The unit structure that holds metadata of ISeq for MJIT.
155 struct list_node unode;
156 // Unique order number of unit.
157 int id;
158 // Dlopen handle of the loaded object file.
159 void *handle;
160 rb_iseq_t *iseq;
161#if defined(_WIN32)
162 // DLL cannot be removed while loaded on Windows. If this is set, it'll be lazily deleted.
163 char *so_file;
164#endif
165 // Only used by unload_units. Flag to check this unit is currently on stack or not.
166 bool used_code_p;
167 // True if this is still in active_units but it's to be lazily removed
168 bool stale_p;
169 // mjit_compile's optimization switches
170 struct rb_mjit_compile_info compile_info;
171 // captured CC values, they should be marked with iseq.
172 const struct rb_callcache **cc_entries;
173 unsigned int cc_entries_size; // iseq->body->ci_size + ones of inlined iseqs
174};
175
176// Linked list of struct rb_mjit_unit.
178 struct list_head head;
179 int length; // the list length
180};
181
186
192
193// process.c
194extern rb_pid_t ruby_waitpid_locked(rb_vm_t *, rb_pid_t, int *status, int options, rb_nativethread_cond_t *cond);
195
196// A copy of MJIT portion of MRI options since MJIT initialization. We
197// need them as MJIT threads still can work when the most MRI data were
198// freed.
199struct mjit_options mjit_opts;
200
201// true if MJIT is enabled.
202bool mjit_enabled = false;
203// true if JIT-ed code should be called. When `ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS`
204// and `mjit_call_p == false`, any JIT-ed code execution is cancelled as soon as possible.
205bool mjit_call_p = false;
206
207// Priority queue of iseqs waiting for JIT compilation.
208// This variable is a pointer to head unit of the queue.
209static struct rb_mjit_unit_list unit_queue = { LIST_HEAD_INIT(unit_queue.head) };
210// List of units which are successfully compiled.
211static struct rb_mjit_unit_list active_units = { LIST_HEAD_INIT(active_units.head) };
212// List of compacted so files which will be cleaned up by `free_list()` in `mjit_finish()`.
213static struct rb_mjit_unit_list compact_units = { LIST_HEAD_INIT(compact_units.head) };
214// List of units before recompilation and just waiting for dlclose().
215static struct rb_mjit_unit_list stale_units = { LIST_HEAD_INIT(stale_units.head) };
216// The number of so far processed ISEQs, used to generate unique id.
217static int current_unit_num;
218// A mutex for conitionals and critical sections.
219static rb_nativethread_lock_t mjit_engine_mutex;
220// A thread conditional to wake up `mjit_finish` at the end of PCH thread.
221static rb_nativethread_cond_t mjit_pch_wakeup;
222// A thread conditional to wake up the client if there is a change in
223// executed unit status.
224static rb_nativethread_cond_t mjit_client_wakeup;
225// A thread conditional to wake up a worker if there we have something
226// to add or we need to stop MJIT engine.
227static rb_nativethread_cond_t mjit_worker_wakeup;
228// A thread conditional to wake up workers if at the end of GC.
229static rb_nativethread_cond_t mjit_gc_wakeup;
230// Greater than 0 when GC is working.
231static int in_gc = 0;
232// True when JIT is working.
233static bool in_jit = false;
234// True when active_units has at least one stale_p=true unit.
235static bool pending_stale_p = false;
236// The times when unload_units is requested. unload_units is called after some requests.
237static int unload_requests = 0;
238// The total number of unloaded units.
239static int total_unloads = 0;
240// Set to true to stop worker.
241static bool stop_worker_p;
242// Set to true if worker is stopped.
243static bool worker_stopped = true;
244
245// Path of "/tmp", which can be changed to $TMP in MinGW.
246static char *tmp_dir;
247
248// Used C compiler path.
249static const char *cc_path;
250// Used C compiler flags.
251static const char **cc_common_args;
252// Used C compiler flags added by --mjit-debug=...
253static char **cc_added_args;
254// Name of the precompiled header file.
255static char *pch_file;
256// The process id which should delete the pch_file on mjit_finish.
257static rb_pid_t pch_owner_pid;
258// Status of the precompiled header creation. The status is
259// shared by the workers and the pch thread.
260static enum {PCH_NOT_READY, PCH_FAILED, PCH_SUCCESS} pch_status;
261
262#ifndef _MSC_VER
263// Name of the header file.
264static char *header_file;
265#endif
266
267#ifdef _WIN32
268// Linker option to enable libruby.
269static char *libruby_pathflag;
270#endif
271
272#include "mjit_config.h"
273
274#if defined(__GNUC__) && \
275 (!defined(__clang__) || \
276 (defined(__clang__) && (defined(__FreeBSD__) || defined(__GLIBC__))))
277# define GCC_PIC_FLAGS "-Wfatal-errors", "-fPIC", "-shared", "-w", "-pipe",
278# define MJIT_CFLAGS_PIPE 1
279#else
280# define GCC_PIC_FLAGS /* empty */
281# define MJIT_CFLAGS_PIPE 0
282#endif
283
284// Use `-nodefaultlibs -nostdlib` for GCC where possible, which does not work on mingw, cygwin, AIX, and OpenBSD.
285// This seems to improve MJIT performance on GCC.
286#if defined __GNUC__ && !defined __clang__ && !defined(_WIN32) && !defined(__CYGWIN__) && !defined(_AIX) && !defined(__OpenBSD__)
287# define GCC_NOSTDLIB_FLAGS "-nodefaultlibs", "-nostdlib",
288#else
289# define GCC_NOSTDLIB_FLAGS // empty
290#endif
291
292static const char *const CC_COMMON_ARGS[] = {
293 MJIT_CC_COMMON MJIT_CFLAGS GCC_PIC_FLAGS
294 NULL
295};
296
297static const char *const CC_DEBUG_ARGS[] = {MJIT_DEBUGFLAGS NULL};
298static const char *const CC_OPTIMIZE_ARGS[] = {MJIT_OPTFLAGS NULL};
299
300static const char *const CC_LDSHARED_ARGS[] = {MJIT_LDSHARED GCC_PIC_FLAGS NULL};
301static const char *const CC_DLDFLAGS_ARGS[] = {MJIT_DLDFLAGS NULL};
302// `CC_LINKER_ARGS` are linker flags which must be passed to `-c` as well.
303static const char *const CC_LINKER_ARGS[] = {
304#if defined __GNUC__ && !defined __clang__ && !defined(__OpenBSD__)
305 "-nostartfiles",
306#endif
307 GCC_NOSTDLIB_FLAGS NULL
308};
309
310static const char *const CC_LIBS[] = {
311#if defined(_WIN32) || defined(__CYGWIN__)
312 MJIT_LIBS // mswin, mingw, cygwin
313#endif
314#if defined __GNUC__ && !defined __clang__
315# if defined(_WIN32)
316 "-lmsvcrt", // mingw
317# endif
318 "-lgcc", // mingw, cygwin, and GCC platforms using `-nodefaultlibs -nostdlib`
319#endif
320#if defined __ANDROID__
321 "-lm", // to avoid 'cannot locate symbol "modf" referenced by .../_ruby_mjit_XXX.so"'
322#endif
323 NULL
324};
325
326#define CC_CODEFLAG_ARGS (mjit_opts.debug ? CC_DEBUG_ARGS : CC_OPTIMIZE_ARGS)
327
328// Print the arguments according to FORMAT to stderr only if MJIT
329// verbose option value is more or equal to LEVEL.
330PRINTF_ARGS(static void, 2, 3)
331verbose(int level, const char *format, ...)
332{
333 if (mjit_opts.verbose >= level) {
334 va_list args;
335 size_t len = strlen(format);
336 char *full_format = alloca(sizeof(char) * (len + 2));
337
338 // Creating `format + '\n'` to atomically print format and '\n'.
339 memcpy(full_format, format, len);
340 full_format[len] = '\n';
341 full_format[len+1] = '\0';
342
343 va_start(args, format);
344 vfprintf(stderr, full_format, args);
345 va_end(args);
346 }
347}
348
349PRINTF_ARGS(static void, 1, 2)
350mjit_warning(const char *format, ...)
351{
352 if (mjit_opts.warnings || mjit_opts.verbose) {
353 va_list args;
354
355 fprintf(stderr, "MJIT warning: ");
356 va_start(args, format);
357 vfprintf(stderr, format, args);
358 va_end(args);
359 fprintf(stderr, "\n");
360 }
361}
362
363// Add unit node to the tail of doubly linked `list`. It should be not in
364// the list before.
365static void
366add_to_list(struct rb_mjit_unit *unit, struct rb_mjit_unit_list *list)
367{
368 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_unit_queue, list == &unit_queue);
369 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_active_units, list == &active_units);
370 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_compact_units, list == &compact_units);
371 (void)RB_DEBUG_COUNTER_INC_IF(mjit_length_stale_units, list == &stale_units);
372
373 list_add_tail(&list->head, &unit->unode);
374 list->length++;
375}
376
377static void
378remove_from_list(struct rb_mjit_unit *unit, struct rb_mjit_unit_list *list)
379{
380#if USE_DEBUG_COUNTER
381 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_unit_queue, -1, list == &unit_queue);
382 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_active_units, -1, list == &active_units);
383 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_compact_units, -1, list == &compact_units);
384 rb_debug_counter_add(RB_DEBUG_COUNTER_mjit_length_stale_units, -1, list == &stale_units);
385#endif
386
387 list_del(&unit->unode);
388 list->length--;
389}
390
391static void
392remove_file(const char *filename)
393{
394 if (remove(filename)) {
395 mjit_warning("failed to remove \"%s\": %s", filename, strerror(errno));
396 }
397}
398
399// Lazily delete .so files.
400static void
401clean_temp_files(struct rb_mjit_unit *unit)
402{
403#if defined(_WIN32)
404 if (unit->so_file) {
405 char *so_file = unit->so_file;
406
407 unit->so_file = NULL;
408 // unit->so_file is set only when mjit_opts.save_temps is false.
409 remove_file(so_file);
410 free(so_file);
411 }
412#endif
413}
414
415// This is called in the following situations:
416// 1) On dequeue or `unload_units()`, associated ISeq is already GCed.
417// 2) The unit is not called often and unloaded by `unload_units()`.
418// 3) Freeing lists on `mjit_finish()`.
419//
420// `jit_func` value does not matter for 1 and 3 since the unit won't be used anymore.
421// For the situation 2, this sets the ISeq's JIT state to NOT_COMPILED_JIT_ISEQ_FUNC
422// to prevent the situation that the same methods are continuously compiled.
423static void
424free_unit(struct rb_mjit_unit *unit)
425{
426 if (unit->iseq) { // ISeq is not GCed
427 unit->iseq->body->jit_func = (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
428 unit->iseq->body->jit_unit = NULL;
429 }
430 if (unit->cc_entries) {
431 void *entries = (void *)unit->cc_entries;
432 free(entries);
433 }
434 if (unit->handle && dlclose(unit->handle)) { // handle is NULL if it's in queue
435 mjit_warning("failed to close handle for u%d: %s", unit->id, dlerror());
436 }
437 clean_temp_files(unit);
438 free(unit);
439}
440
441// Start a critical section. Use message `msg` to print debug info at `level`.
442static inline void
443CRITICAL_SECTION_START(int level, const char *msg)
444{
445 verbose(level, "Locking %s", msg);
446 rb_native_mutex_lock(&mjit_engine_mutex);
447 verbose(level, "Locked %s", msg);
448}
449
450// Finish the current critical section. Use message `msg` to print
451// debug info at `level`.
452static inline void
453CRITICAL_SECTION_FINISH(int level, const char *msg)
454{
455 verbose(level, "Unlocked %s", msg);
456 rb_native_mutex_unlock(&mjit_engine_mutex);
457}
458
459static int
460sprint_uniq_filename(char *str, size_t size, unsigned long id, const char *prefix, const char *suffix)
461{
462 return snprintf(str, size, "%s/%sp%"PRI_PIDT_PREFIX"uu%lu%s", tmp_dir, prefix, getpid(), id, suffix);
463}
464
465// Return time in milliseconds as a double.
466#ifdef __APPLE__
467double ruby_real_ms_time(void);
468# define real_ms_time() ruby_real_ms_time()
469#else
470static double
471real_ms_time(void)
472{
473# ifdef HAVE_CLOCK_GETTIME
474 struct timespec tv;
475# ifdef CLOCK_MONOTONIC
476 const clockid_t c = CLOCK_MONOTONIC;
477# else
478 const clockid_t c = CLOCK_REALTIME;
479# endif
480
481 clock_gettime(c, &tv);
482 return tv.tv_nsec / 1000000.0 + tv.tv_sec * 1000.0;
483# else
484 struct timeval tv;
485
486 gettimeofday(&tv, NULL);
487 return tv.tv_usec / 1000.0 + tv.tv_sec * 1000.0;
488# endif
489}
490#endif
491
492// Return the best unit from list. The best is the first
493// high priority unit or the unit whose iseq has the biggest number
494// of calls so far.
495static struct rb_mjit_unit *
496get_from_list(struct rb_mjit_unit_list *list)
497{
498 while (in_gc) {
499 verbose(3, "Waiting wakeup from GC");
500 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
501 }
502 in_jit = true; // Lock GC
503
504 // Find iseq with max total_calls
505 struct rb_mjit_unit *unit = NULL, *next, *best = NULL;
506 list_for_each_safe(&list->head, unit, next, unode) {
507 if (unit->iseq == NULL) { // ISeq is GCed.
508 remove_from_list(unit, list);
509 free_unit(unit);
510 continue;
511 }
512
513 if (best == NULL || best->iseq->body->total_calls < unit->iseq->body->total_calls) {
514 best = unit;
515 }
516 }
517
518 in_jit = false; // Unlock GC
519 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
520 rb_native_cond_signal(&mjit_client_wakeup);
521
522 if (best) {
523 remove_from_list(best, list);
524 }
525 return best;
526}
527
528// Return length of NULL-terminated array `args` excluding the NULL marker.
529static size_t
530args_len(char *const *args)
531{
532 size_t i;
533
534 for (i = 0; (args[i]) != NULL;i++)
535 ;
536 return i;
537}
538
539// Concatenate `num` passed NULL-terminated arrays of strings, put the
540// result (with NULL end marker) into the heap, and return the result.
541static char **
542form_args(int num, ...)
543{
544 va_list argp;
545 size_t len, n;
546 int i;
547 char **args, **res, **tmp;
548
549 va_start(argp, num);
550 res = NULL;
551 for (i = len = 0; i < num; i++) {
552 args = va_arg(argp, char **);
553 n = args_len(args);
554 if ((tmp = (char **)realloc(res, sizeof(char *) * (len + n + 1))) == NULL) {
555 free(res);
556 res = NULL;
557 break;
558 }
559 res = tmp;
560 MEMCPY(res + len, args, char *, n + 1);
561 len += n;
562 }
563 va_end(argp);
564 return res;
565}
566
567COMPILER_WARNING_PUSH
568#if __has_warning("-Wdeprecated-declarations") || RBIMPL_COMPILER_IS(GCC)
569COMPILER_WARNING_IGNORED(-Wdeprecated-declarations)
570#endif
571// Start an OS process of absolute executable path with arguments `argv`.
572// Return PID of the process.
573static pid_t
574start_process(const char *abspath, char *const *argv)
575{
576 // Not calling non-async-signal-safe functions between vfork
577 // and execv for safety
578 int dev_null = rb_cloexec_open(ruby_null_device, O_WRONLY, 0);
579 if (dev_null < 0) {
580 verbose(1, "MJIT: Failed to open a null device: %s", strerror(errno));
581 return -1;
582 }
583 if (mjit_opts.verbose >= 2) {
584 const char *arg;
585 fprintf(stderr, "Starting process: %s", abspath);
586 for (int i = 0; (arg = argv[i]) != NULL; i++)
587 fprintf(stderr, " %s", arg);
588 fprintf(stderr, "\n");
589 }
590
591 pid_t pid;
592#ifdef _WIN32
593 extern HANDLE rb_w32_start_process(const char *abspath, char *const *argv, int out_fd);
594 int out_fd = 0;
595 if (mjit_opts.verbose <= 1) {
596 // Discard cl.exe's outputs like:
597 // _ruby_mjit_p12u3.c
598 // Creating library C:.../_ruby_mjit_p12u3.lib and object C:.../_ruby_mjit_p12u3.exp
599 out_fd = dev_null;
600 }
601
602 pid = (pid_t)rb_w32_start_process(abspath, argv, out_fd);
603 if (pid == 0) {
604 verbose(1, "MJIT: Failed to create process: %s", dlerror());
605 return -1;
606 }
607#else
608 if ((pid = vfork()) == 0) { /* TODO: reuse some function in process.c */
609 umask(0077);
610 if (mjit_opts.verbose == 0) {
611 // CC can be started in a thread using a file which has been
612 // already removed while MJIT is finishing. Discard the
613 // messages about missing files.
614 dup2(dev_null, STDERR_FILENO);
615 dup2(dev_null, STDOUT_FILENO);
616 }
617 (void)close(dev_null);
618 pid = execv(abspath, argv); // Pid will be negative on an error
619 // Even if we successfully found CC to compile PCH we still can
620 // fail with loading the CC in very rare cases for some reasons.
621 // Stop the forked process in this case.
622 verbose(1, "MJIT: Error in execv: %s", abspath);
623 _exit(1);
624 }
625#endif
626 (void)close(dev_null);
627 return pid;
628}
629COMPILER_WARNING_POP
630
631// Execute an OS process of executable PATH with arguments ARGV.
632// Return -1 or -2 if failed to execute, otherwise exit code of the process.
633// TODO: Use a similar function in process.c
634static int
635exec_process(const char *path, char *const argv[])
636{
637 int stat, exit_code = -2;
638 rb_vm_t *vm = WAITPID_USE_SIGCHLD ? GET_VM() : 0;
640
641 if (vm) {
643 rb_native_mutex_lock(&vm->waitpid_lock);
644 }
645
646 pid_t pid = start_process(path, argv);
647 for (;pid > 0;) {
648 pid_t r = vm ? ruby_waitpid_locked(vm, pid, &stat, 0, &cond)
649 : waitpid(pid, &stat, 0);
650 if (r == -1) {
651 if (errno == EINTR) continue;
652 fprintf(stderr, "[%"PRI_PIDT_PREFIX"d] waitpid(%lu): %s (SIGCHLD=%d,%u)\n",
653 getpid(), (unsigned long)pid, strerror(errno),
654 RUBY_SIGCHLD, SIGCHLD_LOSSY);
655 break;
656 }
657 else if (r == pid) {
658 if (WIFEXITED(stat)) {
659 exit_code = WEXITSTATUS(stat);
660 break;
661 }
662 else if (WIFSIGNALED(stat)) {
663 exit_code = -1;
664 break;
665 }
666 }
667 }
668
669 if (vm) {
670 rb_native_mutex_unlock(&vm->waitpid_lock);
672 }
673 return exit_code;
674}
675
676static void
677remove_so_file(const char *so_file, struct rb_mjit_unit *unit)
678{
679#if defined(_WIN32)
680 // Windows can't remove files while it's used.
681 unit->so_file = strdup(so_file); // lazily delete on `clean_temp_files()`
682 if (unit->so_file == NULL)
683 mjit_warning("failed to allocate memory to lazily remove '%s': %s", so_file, strerror(errno));
684#else
685 remove_file(so_file);
686#endif
687}
688
689// Print _mjitX, but make a human-readable funcname when --mjit-debug is used
690static void
691sprint_funcname(char *funcname, const struct rb_mjit_unit *unit)
692{
693 const rb_iseq_t *iseq = unit->iseq;
694 if (iseq == NULL || (!mjit_opts.debug && !mjit_opts.debug_flags)) {
695 sprintf(funcname, "_mjit%d", unit->id);
696 return;
697 }
698
699 // Generate a short path
700 const char *path = RSTRING_PTR(rb_iseq_path(iseq));
701 const char *lib = "/lib/";
702 const char *version = "/" STRINGIZE(RUBY_API_VERSION_MAJOR) "." STRINGIZE(RUBY_API_VERSION_MINOR) "." STRINGIZE(RUBY_API_VERSION_TEENY) "/";
703 while (strstr(path, lib)) // skip "/lib/"
704 path = strstr(path, lib) + strlen(lib);
705 while (strstr(path, version)) // skip "/x.y.z/"
706 path = strstr(path, version) + strlen(version);
707
708 // Annotate all-normalized method names
709 const char *method = RSTRING_PTR(iseq->body->location.label);
710 if (!strcmp(method, "[]")) method = "AREF";
711 if (!strcmp(method, "[]=")) method = "ASET";
712
713 // Print and normalize
714 sprintf(funcname, "_mjit%d_%s_%s", unit->id, path, method);
715 for (size_t i = 0; i < strlen(funcname); i++) {
716 char c = funcname[i];
717 if (!(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_')) {
718 funcname[i] = '_';
719 }
720 }
721}
722
723static const rb_iseq_t **compiling_iseqs = NULL;
724
725static bool
726set_compiling_iseqs(const rb_iseq_t *iseq)
727{
728 compiling_iseqs = calloc(iseq->body->iseq_size + 2, sizeof(rb_iseq_t *)); // 2: 1 (unit->iseq) + 1 (NULL end)
729 if (compiling_iseqs == NULL)
730 return false;
731
732 compiling_iseqs[0] = iseq;
733 int i = 1;
734
735 unsigned int pos = 0;
736 while (pos < iseq->body->iseq_size) {
737 int insn = rb_vm_insn_decode(iseq->body->iseq_encoded[pos]);
738 if (insn == BIN(opt_send_without_block) || insn == BIN(opt_size)) {
739 CALL_DATA cd = (CALL_DATA)iseq->body->iseq_encoded[pos + 1];
740 extern const rb_iseq_t *rb_mjit_inlinable_iseq(const struct rb_callinfo *ci, const struct rb_callcache *cc);
741 const rb_iseq_t *iseq = rb_mjit_inlinable_iseq(cd->ci, cd->cc);
742 if (iseq != NULL) {
743 compiling_iseqs[i] = iseq;
744 i++;
745 }
746 }
747 pos += insn_len(insn);
748 }
749 return true;
750}
751
752static void
753free_compiling_iseqs(void)
754{
756#ifdef _MSC_VER
757 RBIMPL_WARNING_IGNORED(4090); /* suppress false warning by MSVC */
758#endif
759 free(compiling_iseqs);
761 compiling_iseqs = NULL;
762}
763
764bool
765rb_mjit_compiling_iseq_p(const rb_iseq_t *iseq)
766{
767 assert(compiling_iseqs != NULL);
768 int i = 0;
769 while (compiling_iseqs[i]) {
770 if (compiling_iseqs[i] == iseq) return true;
771 i++;
772 }
773 return false;
774}
775
776static const int c_file_access_mode =
777#ifdef O_BINARY
778 O_BINARY|
779#endif
780 O_WRONLY|O_EXCL|O_CREAT;
781
782#define append_str2(p, str, len) ((char *)memcpy((p), str, (len))+(len))
783#define append_str(p, str) append_str2(p, str, sizeof(str)-1)
784#define append_lit(p, str) append_str2(p, str, rb_strlen_lit(str))
785
786#ifdef _MSC_VER
787// Compile C file to so. It returns true if it succeeds. (mswin)
788static bool
789compile_c_to_so(const char *c_file, const char *so_file)
790{
791 const char *files[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, "-link", libruby_pathflag, NULL };
792 char *p;
793
794 // files[0] = "-Fe*.dll"
795 files[0] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fe") + strlen(so_file) + 1));
796 p = append_lit(p, "-Fe");
797 p = append_str2(p, so_file, strlen(so_file));
798 *p = '\0';
799
800 // files[1] = "-Fo*.obj"
801 // We don't need .obj file, but it's somehow created to cwd without -Fo and we want to control the output directory.
802 files[1] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fo") + strlen(so_file) - rb_strlen_lit(DLEXT) + rb_strlen_lit(".obj") + 1));
803 char *obj_file = p = append_lit(p, "-Fo");
804 p = append_str2(p, so_file, strlen(so_file) - rb_strlen_lit(DLEXT));
805 p = append_lit(p, ".obj");
806 *p = '\0';
807
808 // files[2] = "-Yu*.pch"
809 files[2] = p = alloca(sizeof(char) * (rb_strlen_lit("-Yu") + strlen(pch_file) + 1));
810 p = append_lit(p, "-Yu");
811 p = append_str2(p, pch_file, strlen(pch_file));
812 *p = '\0';
813
814 // files[3] = "C:/.../rb_mjit_header-*.obj"
815 files[3] = p = alloca(sizeof(char) * (strlen(pch_file) + 1));
816 p = append_str2(p, pch_file, strlen(pch_file) - strlen(".pch"));
817 p = append_lit(p, ".obj");
818 *p = '\0';
819
820 // files[4] = "-Tc*.c"
821 files[4] = p = alloca(sizeof(char) * (rb_strlen_lit("-Tc") + strlen(c_file) + 1));
822 p = append_lit(p, "-Tc");
823 p = append_str2(p, c_file, strlen(c_file));
824 *p = '\0';
825
826 // files[5] = "-Fd*.pdb"
827 // Generate .pdb file in temporary directory instead of cwd.
828 files[5] = p = alloca(sizeof(char) * (rb_strlen_lit("-Fd") + strlen(so_file) - rb_strlen_lit(DLEXT) + rb_strlen_lit(".pdb") + 1));
829 p = append_lit(p, "-Fd");
830 p = append_str2(p, so_file, strlen(so_file) - rb_strlen_lit(DLEXT));
831 p = append_lit(p, ".pdb");
832 *p = '\0';
833
834 // files[6] = "-Z7"
835 // Put this last to override any debug options that came previously.
836 files[6] = p = alloca(sizeof(char) * rb_strlen_lit("-Z7") + 1);
837 p = append_lit(p, "-Z7");
838 *p = '\0';
839
840 char **args = form_args(5, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS,
841 files, CC_LIBS, CC_DLDFLAGS_ARGS);
842 if (args == NULL)
843 return false;
844
845 int exit_code = exec_process(cc_path, args);
846 free(args);
847
848 if (exit_code == 0) {
849 // remove never-used files (.obj, .lib, .exp, .pdb). XXX: Is there any way not to generate this?
850 if (!mjit_opts.save_temps) {
851 char *before_dot;
852 remove_file(obj_file);
853
854 before_dot = obj_file + strlen(obj_file) - rb_strlen_lit(".obj");
855 append_lit(before_dot, ".lib"); remove_file(obj_file);
856 append_lit(before_dot, ".exp"); remove_file(obj_file);
857 append_lit(before_dot, ".pdb"); remove_file(obj_file);
858 }
859 }
860 else {
861 verbose(2, "compile_c_to_so: compile error: %d", exit_code);
862 }
863 return exit_code == 0;
864}
865#else // _MSC_VER
866
867// The function producing the pre-compiled header.
868static void
869make_pch(void)
870{
871 const char *rest_args[] = {
872# ifdef __clang__
873 "-emit-pch",
874 "-c",
875# endif
876 // -nodefaultlibs is a linker flag, but it may affect cc1 behavior on Gentoo, which should NOT be changed on pch:
877 // https://gitweb.gentoo.org/proj/gcc-patches.git/tree/7.3.0/gentoo/13_all_default-ssp-fix.patch
878 GCC_NOSTDLIB_FLAGS
879 "-o", pch_file, header_file,
880 NULL,
881 };
882
883 verbose(2, "Creating precompiled header");
884 char **args = form_args(4, cc_common_args, CC_CODEFLAG_ARGS, cc_added_args, rest_args);
885 if (args == NULL) {
886 mjit_warning("making precompiled header failed on forming args");
887 CRITICAL_SECTION_START(3, "in make_pch");
888 pch_status = PCH_FAILED;
889 CRITICAL_SECTION_FINISH(3, "in make_pch");
890 return;
891 }
892
893 int exit_code = exec_process(cc_path, args);
894 free(args);
895
896 CRITICAL_SECTION_START(3, "in make_pch");
897 if (exit_code == 0) {
898 pch_status = PCH_SUCCESS;
899 }
900 else {
901 mjit_warning("Making precompiled header failed on compilation. Stopping MJIT worker...");
902 pch_status = PCH_FAILED;
903 }
904 /* wakeup `mjit_finish` */
905 rb_native_cond_broadcast(&mjit_pch_wakeup);
906 CRITICAL_SECTION_FINISH(3, "in make_pch");
907}
908
909// Compile .c file to .so file. It returns true if it succeeds. (non-mswin)
910// Not compiling .c to .so directly because it fails on MinGW, and this helps
911// to generate no .dSYM on macOS.
912static bool
913compile_c_to_so(const char *c_file, const char *so_file)
914{
915 char* o_file = alloca(strlen(c_file) + 1);
916 strcpy(o_file, c_file);
917 o_file[strlen(c_file) - 1] = 'o';
918
919 const char *o_args[] = {
920 "-o", o_file, c_file,
921# ifdef __clang__
922 "-include-pch", pch_file,
923# endif
924 "-c", NULL
925 };
926 char **args = form_args(5, cc_common_args, CC_CODEFLAG_ARGS, cc_added_args, o_args, CC_LINKER_ARGS);
927 if (args == NULL) return false;
928 int exit_code = exec_process(cc_path, args);
929 free(args);
930 if (exit_code != 0) {
931 verbose(2, "compile_c_to_so: failed to compile .c to .o: %d", exit_code);
932 return false;
933 }
934
935 const char *so_args[] = {
936 "-o", so_file,
937# ifdef _WIN32
938 libruby_pathflag,
939# endif
940 o_file, NULL
941 };
942# if defined(__MACH__)
943 extern VALUE rb_libruby_selfpath;
944 const char *loader_args[] = {"-bundle_loader", StringValuePtr(rb_libruby_selfpath), NULL};
945# else
946 const char *loader_args[] = {NULL};
947# endif
948 args = form_args(7, CC_LDSHARED_ARGS, CC_CODEFLAG_ARGS, so_args, loader_args, CC_LIBS, CC_DLDFLAGS_ARGS, CC_LINKER_ARGS);
949 if (args == NULL) return false;
950 exit_code = exec_process(cc_path, args);
951 free(args);
952 if (!mjit_opts.save_temps) remove_file(o_file);
953 if (exit_code != 0) {
954 verbose(2, "compile_c_to_so: failed to link .o to .so: %d", exit_code);
955 }
956 return exit_code == 0;
957}
958#endif // _MSC_VER
959
960#if USE_JIT_COMPACTION
961static void compile_prelude(FILE *f);
962
963static bool
964compile_compact_jit_code(char* c_file)
965{
966 FILE *f;
967 int fd = rb_cloexec_open(c_file, c_file_access_mode, 0600);
968 if (fd < 0 || (f = fdopen(fd, "w")) == NULL) {
969 int e = errno;
970 if (fd >= 0) (void)close(fd);
971 verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e));
972 return false;
973 }
974
975 compile_prelude(f);
976
977 // wait until mjit_gc_exit_hook is called
978 CRITICAL_SECTION_START(3, "before mjit_compile to wait GC finish");
979 while (in_gc) {
980 verbose(3, "Waiting wakeup from GC");
981 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
982 }
983 // We need to check again here because we could've waited on GC above
984 bool iseq_gced = false;
985 struct rb_mjit_unit *child_unit = 0, *next;
986 list_for_each_safe(&active_units.head, child_unit, next, unode) {
987 if (child_unit->iseq == NULL) { // ISeq is GC-ed
988 iseq_gced = true;
989 verbose(1, "JIT compaction: A method for JIT code u%d is obsoleted. Compaction will be skipped.", child_unit->id);
990 remove_from_list(child_unit, &active_units);
991 free_unit(child_unit); // unload it without waiting for throttled unload_units to retry compaction quickly
992 }
993 }
994 in_jit = !iseq_gced;
995 CRITICAL_SECTION_FINISH(3, "before mjit_compile to wait GC finish");
996 if (!in_jit) {
997 fclose(f);
998 if (!mjit_opts.save_temps)
999 remove_file(c_file);
1000 return false;
1001 }
1002
1003 // This entire loop lock GC so that we do not need to consider a case that
1004 // ISeq is GC-ed in a middle of re-compilation. It takes 3~4ms with 100 methods
1005 // on my machine. It's not too bad compared to compilation time of C (7200~8000ms),
1006 // but it might be larger if we use a larger --jit-max-cache.
1007 //
1008 // TODO: Consider using a more granular lock after we implement inlining across
1009 // compacted functions (not done yet).
1010 bool success = true;
1011 list_for_each(&active_units.head, child_unit, unode) {
1012 CRITICAL_SECTION_START(3, "before set_compiling_iseqs");
1013 success &= set_compiling_iseqs(child_unit->iseq);
1014 CRITICAL_SECTION_FINISH(3, "after set_compiling_iseqs");
1015 if (!success) continue;
1016
1017 char funcname[MAXPATHLEN];
1018 sprint_funcname(funcname, child_unit);
1019
1020 long iseq_lineno = 0;
1021 if (FIXNUM_P(child_unit->iseq->body->location.first_lineno))
1022 // FIX2INT may fallback to rb_num2long(), which is a method call and dangerous in MJIT worker. So using only FIX2LONG.
1023 iseq_lineno = FIX2LONG(child_unit->iseq->body->location.first_lineno);
1024 const char *sep = "@";
1025 const char *iseq_label = RSTRING_PTR(child_unit->iseq->body->location.label);
1026 const char *iseq_path = RSTRING_PTR(rb_iseq_path(child_unit->iseq));
1027 if (!iseq_label) iseq_label = sep = "";
1028 fprintf(f, "\n/* %s%s%s:%ld */\n", iseq_label, sep, iseq_path, iseq_lineno);
1029 success &= mjit_compile(f, child_unit->iseq, funcname, child_unit->id);
1030
1031 CRITICAL_SECTION_START(3, "before compiling_iseqs free");
1032 free_compiling_iseqs();
1033 CRITICAL_SECTION_FINISH(3, "after compiling_iseqs free");
1034 }
1035
1036 // release blocking mjit_gc_start_hook
1037 CRITICAL_SECTION_START(3, "after mjit_compile to wakeup client for GC");
1038 in_jit = false;
1039 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1040 rb_native_cond_signal(&mjit_client_wakeup);
1041 CRITICAL_SECTION_FINISH(3, "in worker to wakeup client for GC");
1042
1043 fclose(f);
1044 return success;
1045}
1046
1047// Compile all cached .c files and build a single .so file. Reload all JIT func from it.
1048// This improves the code locality for better performance in terms of iTLB and iCache.
1049static void
1050compact_all_jit_code(void)
1051{
1052 struct rb_mjit_unit *unit, *cur = 0;
1053 static const char c_ext[] = ".c";
1054 static const char so_ext[] = DLEXT;
1055 char c_file[MAXPATHLEN], so_file[MAXPATHLEN];
1056
1057 // Abnormal use case of rb_mjit_unit that doesn't have ISeq
1058 unit = calloc(1, sizeof(struct rb_mjit_unit)); // To prevent GC, don't use ZALLOC
1059 if (unit == NULL) return;
1060 unit->id = current_unit_num++;
1061 sprint_uniq_filename(c_file, (int)sizeof(c_file), unit->id, MJIT_TMP_PREFIX, c_ext);
1062 sprint_uniq_filename(so_file, (int)sizeof(so_file), unit->id, MJIT_TMP_PREFIX, so_ext);
1063
1064 bool success = compile_compact_jit_code(c_file);
1065 double start_time = real_ms_time();
1066 if (success) {
1067 success = compile_c_to_so(c_file, so_file);
1068 if (!mjit_opts.save_temps)
1069 remove_file(c_file);
1070 }
1071 double end_time = real_ms_time();
1072
1073 if (success) {
1074 void *handle = dlopen(so_file, RTLD_NOW);
1075 if (handle == NULL) {
1076 mjit_warning("failure in loading code from compacted '%s': %s", so_file, dlerror());
1077 free(unit);
1078 return;
1079 }
1080 unit->handle = handle;
1081
1082 // lazily dlclose handle (and .so file for win32) on `mjit_finish()`.
1083 add_to_list(unit, &compact_units);
1084
1085 if (!mjit_opts.save_temps)
1086 remove_so_file(so_file, unit);
1087
1088 CRITICAL_SECTION_START(3, "in compact_all_jit_code to read list");
1089 list_for_each(&active_units.head, cur, unode) {
1090 void *func;
1091 char funcname[MAXPATHLEN];
1092 sprint_funcname(funcname, cur);
1093
1094 if ((func = dlsym(handle, funcname)) == NULL) {
1095 mjit_warning("skipping to reload '%s' from '%s': %s", funcname, so_file, dlerror());
1096 continue;
1097 }
1098
1099 if (cur->iseq) { // Check whether GCed or not
1100 // Usage of jit_code might be not in a critical section.
1101 MJIT_ATOMIC_SET(cur->iseq->body->jit_func, (mjit_func_t)func);
1102 }
1103 }
1104 CRITICAL_SECTION_FINISH(3, "in compact_all_jit_code to read list");
1105 verbose(1, "JIT compaction (%.1fms): Compacted %d methods %s -> %s", end_time - start_time, active_units.length, c_file, so_file);
1106 }
1107 else {
1108 free(unit);
1109 verbose(1, "JIT compaction failure (%.1fms): Failed to compact methods", end_time - start_time);
1110 }
1111}
1112#endif // USE_JIT_COMPACTION
1113
1114static void *
1115load_func_from_so(const char *so_file, const char *funcname, struct rb_mjit_unit *unit)
1116{
1117 void *handle, *func;
1118
1119 handle = dlopen(so_file, RTLD_NOW);
1120 if (handle == NULL) {
1121 mjit_warning("failure in loading code from '%s': %s", so_file, dlerror());
1122 return (void *)NOT_COMPILED_JIT_ISEQ_FUNC;
1123 }
1124
1125 func = dlsym(handle, funcname);
1126 unit->handle = handle;
1127 return func;
1128}
1129
1130#ifndef __clang__
1131static const char *
1132header_name_end(const char *s)
1133{
1134 const char *e = s + strlen(s);
1135# ifdef __GNUC__ // don't chomp .pch for mswin
1136 static const char suffix[] = ".gch";
1137
1138 // chomp .gch suffix
1139 if (e > s+sizeof(suffix)-1 && strcmp(e-sizeof(suffix)+1, suffix) == 0) {
1140 e -= sizeof(suffix)-1;
1141 }
1142# endif
1143 return e;
1144}
1145#endif
1146
1147// Print platform-specific prerequisites in generated code.
1148static void
1149compile_prelude(FILE *f)
1150{
1151#ifndef __clang__ // -include-pch is used for Clang
1152 const char *s = pch_file;
1153 const char *e = header_name_end(s);
1154
1155 fprintf(f, "#include \"");
1156 // print pch_file except .gch for gcc, but keep .pch for mswin
1157 for (; s < e; s++) {
1158 switch (*s) {
1159 case '\\': case '"':
1160 fputc('\\', f);
1161 }
1162 fputc(*s, f);
1163 }
1164 fprintf(f, "\"\n");
1165#endif
1166
1167#ifdef _WIN32
1168 fprintf(f, "void _pei386_runtime_relocator(void){}\n");
1169 fprintf(f, "int __stdcall DllMainCRTStartup(void* hinstDLL, unsigned int fdwReason, void* lpvReserved) { return 1; }\n");
1170#endif
1171}
1172
1173// Compile ISeq in UNIT and return function pointer of JIT-ed code.
1174// It may return NOT_COMPILED_JIT_ISEQ_FUNC if something went wrong.
1175static mjit_func_t
1176convert_unit_to_func(struct rb_mjit_unit *unit)
1177{
1178 static const char c_ext[] = ".c";
1179 static const char so_ext[] = DLEXT;
1180 char c_file[MAXPATHLEN], so_file[MAXPATHLEN], funcname[MAXPATHLEN];
1181
1182 sprint_uniq_filename(c_file, (int)sizeof(c_file), unit->id, MJIT_TMP_PREFIX, c_ext);
1183 sprint_uniq_filename(so_file, (int)sizeof(so_file), unit->id, MJIT_TMP_PREFIX, so_ext);
1184 sprint_funcname(funcname, unit);
1185
1186 FILE *f;
1187 int fd = rb_cloexec_open(c_file, c_file_access_mode, 0600);
1188 if (fd < 0 || (f = fdopen(fd, "w")) == NULL) {
1189 int e = errno;
1190 if (fd >= 0) (void)close(fd);
1191 verbose(1, "Failed to fopen '%s', giving up JIT for it (%s)", c_file, strerror(e));
1192 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1193 }
1194
1195 // print #include of MJIT header, etc.
1196 compile_prelude(f);
1197
1198 // wait until mjit_gc_exit_hook is called
1199 CRITICAL_SECTION_START(3, "before mjit_compile to wait GC finish");
1200 while (in_gc) {
1201 verbose(3, "Waiting wakeup from GC");
1202 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1203 }
1204 // We need to check again here because we could've waited on GC above
1205 in_jit = (unit->iseq != NULL);
1206 if (in_jit)
1207 in_jit &= set_compiling_iseqs(unit->iseq);
1208 CRITICAL_SECTION_FINISH(3, "before mjit_compile to wait GC finish");
1209 if (!in_jit) {
1210 fclose(f);
1211 if (!mjit_opts.save_temps)
1212 remove_file(c_file);
1213 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1214 }
1215
1216 // To make MJIT worker thread-safe against GC.compact, copy ISeq values while `in_jit` is true.
1217 long iseq_lineno = 0;
1218 if (FIXNUM_P(unit->iseq->body->location.first_lineno))
1219 // FIX2INT may fallback to rb_num2long(), which is a method call and dangerous in MJIT worker. So using only FIX2LONG.
1220 iseq_lineno = FIX2LONG(unit->iseq->body->location.first_lineno);
1221 char *iseq_label = alloca(RSTRING_LEN(unit->iseq->body->location.label) + 1);
1222 char *iseq_path = alloca(RSTRING_LEN(rb_iseq_path(unit->iseq)) + 1);
1223 strcpy(iseq_label, RSTRING_PTR(unit->iseq->body->location.label));
1224 strcpy(iseq_path, RSTRING_PTR(rb_iseq_path(unit->iseq)));
1225
1226 verbose(2, "start compilation: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file);
1227 fprintf(f, "/* %s@%s:%ld */\n\n", iseq_label, iseq_path, iseq_lineno);
1228 bool success = mjit_compile(f, unit->iseq, funcname, unit->id);
1229
1230 // release blocking mjit_gc_start_hook
1231 CRITICAL_SECTION_START(3, "after mjit_compile to wakeup client for GC");
1232 free_compiling_iseqs();
1233 in_jit = false;
1234 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1235 rb_native_cond_signal(&mjit_client_wakeup);
1236 CRITICAL_SECTION_FINISH(3, "in worker to wakeup client for GC");
1237
1238 fclose(f);
1239 if (!success) {
1240 if (!mjit_opts.save_temps)
1241 remove_file(c_file);
1242 verbose(1, "JIT failure: %s@%s:%ld -> %s", iseq_label, iseq_path, iseq_lineno, c_file);
1243 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1244 }
1245
1246 double start_time = real_ms_time();
1247 success = compile_c_to_so(c_file, so_file);
1248 if (!mjit_opts.save_temps)
1249 remove_file(c_file);
1250 double end_time = real_ms_time();
1251
1252 if (!success) {
1253 verbose(2, "Failed to generate so: %s", so_file);
1254 return (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC;
1255 }
1256
1257 void *func = load_func_from_so(so_file, funcname, unit);
1258 if (!mjit_opts.save_temps)
1259 remove_so_file(so_file, unit);
1260
1261 if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) {
1262 verbose(1, "JIT success (%.1fms): %s@%s:%ld -> %s",
1263 end_time - start_time, iseq_label, iseq_path, iseq_lineno, c_file);
1264 }
1265 return (mjit_func_t)func;
1266}
1267
1268// To see cc_entries using index returned by `mjit_capture_cc_entries` in mjit_compile.c
1269const struct rb_callcache **
1270mjit_iseq_cc_entries(const struct rb_iseq_constant_body *const body)
1271{
1272 return body->jit_unit->cc_entries;
1273}
1274
1275// Capture cc entries of `captured_iseq` and append them to `compiled_iseq->jit_unit->cc_entries`.
1276// This is needed when `captured_iseq` is inlined by `compiled_iseq` and GC needs to mark inlined cc.
1277//
1278// Index to refer to `compiled_iseq->jit_unit->cc_entries` is returned instead of the address
1279// because old addresses may be invalidated by `realloc` later. -1 is returned on failure.
1280//
1281// This assumes that it's safe to reference cc without acquiring GVL.
1282int
1283mjit_capture_cc_entries(const struct rb_iseq_constant_body *compiled_iseq, const struct rb_iseq_constant_body *captured_iseq)
1284{
1285 struct rb_mjit_unit *unit = compiled_iseq->jit_unit;
1286 unsigned int new_entries_size = unit->cc_entries_size + captured_iseq->ci_size;
1287 VM_ASSERT(captured_iseq->ci_size > 0);
1288
1289 // Allocate new cc_entries and append them to unit->cc_entries
1290 const struct rb_callcache **cc_entries;
1291 int cc_entries_index = unit->cc_entries_size;
1292 if (unit->cc_entries_size == 0) {
1293 VM_ASSERT(unit->cc_entries == NULL);
1294 unit->cc_entries = cc_entries = malloc(sizeof(struct rb_callcache *) * new_entries_size);
1295 if (cc_entries == NULL) return -1;
1296 }
1297 else {
1298 void *cc_ptr = (void *)unit->cc_entries; // get rid of bogus warning by VC
1299 cc_entries = realloc(cc_ptr, sizeof(struct rb_callcache *) * new_entries_size);
1300 if (cc_entries == NULL) return -1;
1301 unit->cc_entries = cc_entries;
1302 cc_entries += cc_entries_index;
1303 }
1304 unit->cc_entries_size = new_entries_size;
1305
1306 // Capture cc to cc_enties
1307 for (unsigned int i = 0; i < captured_iseq->ci_size; i++) {
1308 cc_entries[i] = captured_iseq->call_data[i].cc;
1309 }
1310
1311 return cc_entries_index;
1312}
1313
1314// Set up field `used_code_p` for unit iseqs whose iseq on the stack of ec.
1315static void
1316mark_ec_units(rb_execution_context_t *ec)
1317{
1318 const rb_control_frame_t *cfp;
1319
1320 if (ec->vm_stack == NULL)
1321 return;
1322 for (cfp = RUBY_VM_END_CONTROL_FRAME(ec) - 1; ; cfp = RUBY_VM_NEXT_CONTROL_FRAME(cfp)) {
1323 const rb_iseq_t *iseq;
1324 if (cfp->pc && (iseq = cfp->iseq) != NULL
1325 && imemo_type((VALUE) iseq) == imemo_iseq
1326 && (iseq->body->jit_unit) != NULL) {
1327 iseq->body->jit_unit->used_code_p = true;
1328 }
1329
1330 if (cfp == ec->cfp)
1331 break; // reached the most recent cfp
1332 }
1333}
1334
1335// MJIT info related to an existing continutaion.
1337 rb_execution_context_t *ec; // continuation ec
1338 struct mjit_cont *prev, *next; // used to form lists
1339};
1340
1341// Double linked list of registered continuations. This is used to detect
1342// units which are in use in unload_units.
1343static struct mjit_cont *first_cont;
1344
1345// Unload JIT code of some units to satisfy the maximum permitted
1346// number of units with a loaded code.
1347static void
1348unload_units(void)
1349{
1350 struct rb_mjit_unit *unit = 0, *next;
1351 struct mjit_cont *cont;
1352 int units_num = active_units.length;
1353
1354 // For now, we don't unload units when ISeq is GCed. We should
1355 // unload such ISeqs first here.
1356 list_for_each_safe(&active_units.head, unit, next, unode) {
1357 if (unit->iseq == NULL) { // ISeq is GCed.
1358 remove_from_list(unit, &active_units);
1359 free_unit(unit);
1360 }
1361 }
1362
1363 // Detect units which are in use and can't be unloaded.
1364 list_for_each(&active_units.head, unit, unode) {
1365 assert(unit->iseq != NULL && unit->handle != NULL);
1366 unit->used_code_p = false;
1367 }
1368 // All threads have a root_fiber which has a mjit_cont. Other normal fibers also
1369 // have a mjit_cont. Thus we can check ISeqs in use by scanning ec of mjit_conts.
1370 for (cont = first_cont; cont != NULL; cont = cont->next) {
1371 mark_ec_units(cont->ec);
1372 }
1373 // TODO: check stale_units and unload unused ones! (note that the unit is not associated to ISeq anymore)
1374
1375 // Unload units whose total_calls is smaller than any total_calls in unit_queue.
1376 // TODO: make the algorithm more efficient
1377 long unsigned prev_queue_calls = -1;
1378 while (true) {
1379 // Calculate the next max total_calls in unit_queue
1380 long unsigned max_queue_calls = 0;
1381 list_for_each(&unit_queue.head, unit, unode) {
1382 if (unit->iseq != NULL && max_queue_calls < unit->iseq->body->total_calls
1383 && unit->iseq->body->total_calls < prev_queue_calls) {
1384 max_queue_calls = unit->iseq->body->total_calls;
1385 }
1386 }
1387 prev_queue_calls = max_queue_calls;
1388
1389 bool unloaded_p = false;
1390 list_for_each_safe(&active_units.head, unit, next, unode) {
1391 if (unit->used_code_p) // We can't unload code on stack.
1392 continue;
1393
1394 if (max_queue_calls > unit->iseq->body->total_calls) {
1395 verbose(2, "Unloading unit %d (calls=%lu, threshold=%lu)",
1396 unit->id, unit->iseq->body->total_calls, max_queue_calls);
1397 assert(unit->handle != NULL);
1398 remove_from_list(unit, &active_units);
1399 free_unit(unit);
1400 unloaded_p = true;
1401 }
1402 }
1403 if (!unloaded_p) break;
1404 }
1405
1406 if (units_num > active_units.length) {
1407 verbose(1, "Too many JIT code -- %d units unloaded", units_num - active_units.length);
1408 total_unloads += units_num - active_units.length;
1409 }
1410}
1411
1412static void mjit_add_iseq_to_process(const rb_iseq_t *iseq, const struct rb_mjit_compile_info *compile_info, bool worker_p);
1413
1414// The function implementing a worker. It is executed in a separate
1415// thread by rb_thread_create_mjit_thread. It compiles precompiled header
1416// and then compiles requested ISeqs.
1417void
1418mjit_worker(void)
1419{
1420 // Allow only `max_cache_size / 100` times (default: 100) of compaction.
1421 // Note: GC of compacted code has not been implemented yet.
1422 int max_compact_size = mjit_opts.max_cache_size / 100;
1423 if (max_compact_size < 10) max_compact_size = 10;
1424
1425 // Run unload_units after it's requested `max_cache_size / 10` (default: 10) times.
1426 // This throttles the call to mitigate locking in unload_units. It also throttles JIT compaction.
1427 int throttle_threshold = mjit_opts.max_cache_size / 10;
1428
1429#ifndef _MSC_VER
1430 if (pch_status == PCH_NOT_READY) {
1431 make_pch();
1432 }
1433#endif
1434 if (pch_status == PCH_FAILED) {
1435 mjit_enabled = false;
1436 CRITICAL_SECTION_START(3, "in worker to update worker_stopped");
1437 worker_stopped = true;
1438 verbose(3, "Sending wakeup signal to client in a mjit-worker");
1439 rb_native_cond_signal(&mjit_client_wakeup);
1440 CRITICAL_SECTION_FINISH(3, "in worker to update worker_stopped");
1441 return; // TODO: do the same thing in the latter half of mjit_finish
1442 }
1443
1444 // main worker loop
1445 while (!stop_worker_p) {
1446 struct rb_mjit_unit *unit;
1447
1448 // Wait until a unit becomes available
1449 CRITICAL_SECTION_START(3, "in worker dequeue");
1450 while ((list_empty(&unit_queue.head) || active_units.length >= mjit_opts.max_cache_size) && !stop_worker_p) {
1451 rb_native_cond_wait(&mjit_worker_wakeup, &mjit_engine_mutex);
1452 verbose(3, "Getting wakeup from client");
1453
1454 // Lazily move active_units to stale_units to avoid race conditions around active_units with compaction
1455 if (pending_stale_p) {
1456 pending_stale_p = false;
1457 struct rb_mjit_unit *next;
1458 list_for_each_safe(&active_units.head, unit, next, unode) {
1459 if (unit->stale_p) {
1460 unit->stale_p = false;
1461 remove_from_list(unit, &active_units);
1462 add_to_list(unit, &stale_units);
1463 // Lazily put it to unit_queue as well to avoid race conditions on jit_unit with mjit_compile.
1464 mjit_add_iseq_to_process(unit->iseq, &unit->iseq->body->jit_unit->compile_info, true);
1465 }
1466 }
1467 }
1468
1469 // Unload some units as needed
1470 if (unload_requests >= throttle_threshold) {
1471 while (in_gc) {
1472 verbose(3, "Waiting wakeup from GC");
1473 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1474 }
1475 in_jit = true; // Lock GC
1476
1477 RB_DEBUG_COUNTER_INC(mjit_unload_units);
1478 unload_units();
1479 unload_requests = 0;
1480
1481 in_jit = false; // Unlock GC
1482 verbose(3, "Sending wakeup signal to client in a mjit-worker for GC");
1483 rb_native_cond_signal(&mjit_client_wakeup);
1484 }
1485 if (active_units.length == mjit_opts.max_cache_size && mjit_opts.wait) { // Sometimes all methods may be in use
1486 mjit_opts.max_cache_size++; // avoid infinite loop on `rb_mjit_wait_call`. Note that --jit-wait is just for testing.
1487 verbose(1, "No units can be unloaded -- incremented max-cache-size to %d for --jit-wait", mjit_opts.max_cache_size);
1488 }
1489 }
1490 unit = get_from_list(&unit_queue);
1491 CRITICAL_SECTION_FINISH(3, "in worker dequeue");
1492
1493 if (unit) {
1494 // JIT compile
1495 mjit_func_t func = convert_unit_to_func(unit);
1496 (void)RB_DEBUG_COUNTER_INC_IF(mjit_compile_failures, func == (mjit_func_t)NOT_COMPILED_JIT_ISEQ_FUNC);
1497
1498 CRITICAL_SECTION_START(3, "in jit func replace");
1499 while (in_gc) { // Make sure we're not GC-ing when touching ISeq
1500 verbose(3, "Waiting wakeup from GC");
1501 rb_native_cond_wait(&mjit_gc_wakeup, &mjit_engine_mutex);
1502 }
1503 if (unit->iseq) { // Check whether GCed or not
1504 if ((uintptr_t)func > (uintptr_t)LAST_JIT_ISEQ_FUNC) {
1505 add_to_list(unit, &active_units);
1506 }
1507 // Usage of jit_code might be not in a critical section.
1508 MJIT_ATOMIC_SET(unit->iseq->body->jit_func, func);
1509 }
1510 else {
1511 free_unit(unit);
1512 }
1513 CRITICAL_SECTION_FINISH(3, "in jit func replace");
1514
1515#if USE_JIT_COMPACTION
1516 // Combine .o files to one .so and reload all jit_func to improve memory locality.
1517 if (compact_units.length < max_compact_size
1518 && ((!mjit_opts.wait && unit_queue.length == 0 && active_units.length > 1)
1519 || (active_units.length == mjit_opts.max_cache_size && compact_units.length * throttle_threshold <= total_unloads))) { // throttle compaction by total_unloads
1520 compact_all_jit_code();
1521 }
1522#endif
1523 }
1524 }
1525
1526 // To keep mutex unlocked when it is destroyed by mjit_finish, don't wrap CRITICAL_SECTION here.
1527 worker_stopped = true;
1528}
#define FIX2LONG
Old name of RB_FIX2LONG.
Definition: long.h:46
#define FIXNUM_P
Old name of RB_FIXNUM_P.
Defines RBIMPL_HAS_BUILTIN.
int rb_cloexec_open(const char *pathname, int flags, mode_t mode)
Opens a file that closes on exec.
Definition: io.c:314
#define rb_strlen_lit(str)
Length of a string literal.
Definition: string.h:1756
#define strdup(s)
Just another name of ruby_strdup.
Definition: util.h:176
#define RUBY_API_VERSION_TEENY
Teeny version.
Definition: version.h:76
#define RUBY_API_VERSION_MAJOR
Major version.
Definition: version.h:64
#define RUBY_API_VERSION_MINOR
Minor version.
Definition: version.h:70
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition: memory.h:366
#define PRI_PIDT_PREFIX
A rb_sprintf() format prefix to be used for a pid_t parameter.
Definition: pid_t.h:38
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition: rstring.h:82
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
Definition: rstring.h:483
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
Definition: rstring.h:497
void rb_native_mutex_lock(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_lock.
void rb_native_cond_initialize(rb_nativethread_cond_t *cond)
Fills the passed condition variable with an initial value.
void rb_native_cond_broadcast(rb_nativethread_cond_t *cond)
Signals a condition variable.
void rb_native_mutex_initialize(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_initialize.
void rb_native_mutex_unlock(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_unlock.
void rb_native_mutex_destroy(rb_nativethread_lock_t *lock)
Just another name of rb_nativethread_lock_destroy.
void rb_native_cond_destroy(rb_nativethread_cond_t *cond)
Destroys the passed condition variable.
void rb_native_cond_signal(rb_nativethread_cond_t *cond)
Signals a condition variable.
void rb_native_cond_wait(rb_nativethread_cond_t *cond, rb_nativethread_lock_t *mutex)
Waits for the passed condition variable to be signalled.
#define RBIMPL_WARNING_IGNORED(flag)
Suppresses a warning.
Definition: warning_push.h:80
#define RBIMPL_WARNING_PUSH()
Pushes compiler warning state.
Definition: warning_push.h:55
#define RBIMPL_WARNING_POP()
Pops compiler warning state.
Definition: warning_push.h:62