Ruby 3.1.3p185 (2022-11-24 revision 1a6b16756e0ba6b95ab71a441357ed5484e33498)
regexec.c
1/**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regint.h"
32
33#ifdef RUBY
34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35#else
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37#endif
38
39#ifndef USE_TOKEN_THREADED_VM
40# ifdef __GNUC__
41# define USE_TOKEN_THREADED_VM 1
42# else
43# define USE_TOKEN_THREADED_VM 0
44# endif
45#endif
46
47#ifdef RUBY
48# define ENC_DUMMY_FLAG (1<<24)
49static inline int
51{
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
53}
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59#endif /* RUBY */
60
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67static int
68is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69 const UChar *end, OnigOptionType option, int check_prev)
70{
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73 if (check_prev) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76 return 0;
77 else
78 return 1;
79 }
80 else
81 return 1;
82 }
83 else {
84 const UChar *pnext = p + enclen(enc, p, end);
85 if (pnext < end &&
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88 return 1;
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90 return 1;
91 return 0;
92 }
93 }
94 else {
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
96 }
97}
98#else /* USE_CRNL_AS_LINE_TERMINATOR */
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101#endif /* USE_CRNL_AS_LINE_TERMINATOR */
102
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
105
106static void
107history_tree_clear(OnigCaptureTreeNode* node)
108{
109 int i;
110
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
115 }
116 }
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
123 node->group = -1;
124 xfree(node->childs);
125 node->childs = (OnigCaptureTreeNode** )0;
126 }
127}
128
129static void
130history_tree_free(OnigCaptureTreeNode* node)
131{
132 history_tree_clear(node);
133 xfree(node);
134}
135
136static void
137history_root_free(OnigRegion* r)
138{
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
142 }
143}
144
145static OnigCaptureTreeNode*
146history_node_new(void)
147{
148 OnigCaptureTreeNode* node;
149
150 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
153 node->allocated = 0;
154 node->num_childs = 0;
155 node->group = -1;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
158
159 return node;
160}
161
162static int
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
164{
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
166
167 if (parent->num_childs >= parent->allocated) {
168 int n, i;
169
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
172 parent->childs =
173 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
175 }
176 else {
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
179 tmp =
180 (OnigCaptureTreeNode** )xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
182 if (tmp == 0) {
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
185 }
186 parent->childs = tmp;
187 }
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
190 }
191 parent->allocated = n;
192 }
193
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
196 return 0;
197}
198
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
201{
202 int i, r;
203 OnigCaptureTreeNode *clone, *child;
204
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
207
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
215 }
216 r = history_tree_add_child(clone, child);
217 if (r != 0) {
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
221 }
222 }
223
224 return clone;
225}
226
227extern OnigCaptureTreeNode*
228onig_get_capture_tree(OnigRegion* region)
229{
230 return region->history_root;
231}
232#endif /* USE_CAPTURE_HISTORY */
233
234extern void
235onig_region_clear(OnigRegion* region)
236{
237 int i;
238
239 for (i = 0; i < region->num_regs; i++) {
240 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
241 }
242#ifdef USE_CAPTURE_HISTORY
243 history_root_free(region);
244#endif
245}
246
247extern int
248onig_region_resize(OnigRegion* region, int n)
249{
250 region->num_regs = n;
251
252 if (n < ONIG_NREGION)
253 n = ONIG_NREGION;
254
255 if (region->allocated == 0) {
256 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
257 if (region->beg == 0)
258 return ONIGERR_MEMORY;
259
260 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
261 if (region->end == 0) {
262 xfree(region->beg);
263 return ONIGERR_MEMORY;
264 }
265
266 region->allocated = n;
267 }
268 else if (region->allocated < n) {
269 OnigPosition *tmp;
270
271 region->allocated = 0;
272 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
273 if (tmp == 0) {
274 xfree(region->beg);
275 xfree(region->end);
276 return ONIGERR_MEMORY;
277 }
278 region->beg = tmp;
279 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
280 if (tmp == 0) {
281 xfree(region->beg);
282 xfree(region->end);
283 return ONIGERR_MEMORY;
284 }
285 region->end = tmp;
286
287 region->allocated = n;
288 }
289
290 return 0;
291}
292
293static int
294onig_region_resize_clear(OnigRegion* region, int n)
295{
296 int r;
297
298 r = onig_region_resize(region, n);
299 if (r != 0) return r;
300 onig_region_clear(region);
301 return 0;
302}
303
304extern int
305onig_region_set(OnigRegion* region, int at, int beg, int end)
306{
307 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
308
309 if (at >= region->allocated) {
310 int r = onig_region_resize(region, at + 1);
311 if (r < 0) return r;
312 }
313
314 region->beg[at] = beg;
315 region->end[at] = end;
316 return 0;
317}
318
319extern void
320onig_region_init(OnigRegion* region)
321{
322 region->num_regs = 0;
323 region->allocated = 0;
324 region->beg = (OnigPosition* )0;
325 region->end = (OnigPosition* )0;
326#ifdef USE_CAPTURE_HISTORY
327 region->history_root = (OnigCaptureTreeNode* )0;
328#endif
329}
330
331extern OnigRegion*
332onig_region_new(void)
333{
334 OnigRegion* r;
335
336 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
337 if (r)
338 onig_region_init(r);
339 return r;
340}
341
342extern void
343onig_region_free(OnigRegion* r, int free_self)
344{
345 if (r) {
346 if (r->allocated > 0) {
347 if (r->beg) xfree(r->beg);
348 if (r->end) xfree(r->end);
349 r->allocated = 0;
350 }
351#ifdef USE_CAPTURE_HISTORY
352 history_root_free(r);
353#endif
354 if (free_self) xfree(r);
355 }
356}
357
358extern void
359onig_region_copy(OnigRegion* to, const OnigRegion* from)
360{
361#define RREGC_SIZE (sizeof(int) * from->num_regs)
362 int i, r;
363
364 if (to == from) return;
365
366 r = onig_region_resize(to, from->num_regs);
367 if (r) return;
368
369 for (i = 0; i < from->num_regs; i++) {
370 to->beg[i] = from->beg[i];
371 to->end[i] = from->end[i];
372 }
373 to->num_regs = from->num_regs;
374
375#ifdef USE_CAPTURE_HISTORY
376 history_root_free(to);
377
378 if (IS_NOT_NULL(from->history_root)) {
379 to->history_root = history_tree_clone(from->history_root);
380 }
381#endif
382}
383
384
386#define INVALID_STACK_INDEX -1
387
388/* stack type */
389/* used by normal-POP */
390#define STK_ALT 0x0001
391#define STK_LOOK_BEHIND_NOT 0x0002
392#define STK_POS_NOT 0x0003
393/* handled by normal-POP */
394#define STK_MEM_START 0x0100
395#define STK_MEM_END 0x8200
396#define STK_REPEAT_INC 0x0300
397#define STK_STATE_CHECK_MARK 0x1000
398/* avoided by normal-POP */
399#define STK_NULL_CHECK_START 0x3000
400#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
401#define STK_MEM_END_MARK 0x8400
402#define STK_POS 0x0500 /* used when POP-POS */
403#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
404#define STK_REPEAT 0x0700
405#define STK_CALL_FRAME 0x0800
406#define STK_RETURN 0x0900
407#define STK_VOID 0x0a00 /* for fill a blank */
408#define STK_ABSENT_POS 0x0b00 /* for absent */
409#define STK_ABSENT 0x0c00 /* absent inner loop marker */
410
411/* stack type check mask */
412#define STK_MASK_POP_USED 0x00ff
413#define STK_MASK_TO_VOID_TARGET 0x10ff
414#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
415
416#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
417# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
418 (msa).stack_p = (void* )0;\
419 (msa).options = (arg_option);\
420 (msa).region = (arg_region);\
421 (msa).start = (arg_start);\
422 (msa).gpos = (arg_gpos);\
423 (msa).best_len = ONIG_MISMATCH;\
424} while(0)
425#else
426# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
427 (msa).stack_p = (void* )0;\
428 (msa).options = (arg_option);\
429 (msa).region = (arg_region);\
430 (msa).start = (arg_start);\
431 (msa).gpos = (arg_gpos);\
432} while(0)
433#endif
434
435#ifdef USE_COMBINATION_EXPLOSION_CHECK
436
437# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
438
439# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
440 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
441 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
442 offset = ((offset) * (state_num)) >> 3;\
443 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
444 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
445 (msa).state_check_buff = (void* )xmalloc(size);\
446 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
447 }\
448 else \
449 (msa).state_check_buff = (void* )xalloca(size);\
450 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
451 (size_t )(size - (offset))); \
452 (msa).state_check_buff_size = size;\
453 }\
454 else {\
455 (msa).state_check_buff = (void* )0;\
456 (msa).state_check_buff_size = 0;\
457 }\
458 }\
459 else {\
460 (msa).state_check_buff = (void* )0;\
461 (msa).state_check_buff_size = 0;\
462 }\
463 } while(0)
464
465# define MATCH_ARG_FREE(msa) do {\
466 if ((msa).stack_p) xfree((msa).stack_p);\
467 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
468 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
469 }\
470} while(0)
471#else /* USE_COMBINATION_EXPLOSION_CHECK */
472# define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
473#endif /* USE_COMBINATION_EXPLOSION_CHECK */
474
475
476
477#define MAX_PTR_NUM 100
478
479#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
480 if (ptr_num > MAX_PTR_NUM) {\
481 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
482 heap_addr = alloc_addr;\
483 if (msa->stack_p) {\
484 stk_alloc = (OnigStackType* )(msa->stack_p);\
485 stk_base = stk_alloc;\
486 stk = stk_base;\
487 stk_end = stk_base + msa->stack_n;\
488 } else {\
489 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
490 stk_base = stk_alloc;\
491 stk = stk_base;\
492 stk_end = stk_base + (stack_num);\
493 }\
494 } else if (msa->stack_p) {\
495 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
496 heap_addr = NULL;\
497 stk_alloc = (OnigStackType* )(msa->stack_p);\
498 stk_base = stk_alloc;\
499 stk = stk_base;\
500 stk_end = stk_base + msa->stack_n;\
501 }\
502 else {\
503 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
504 + sizeof(OnigStackType) * (stack_num));\
505 heap_addr = NULL;\
506 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
507 stk_base = stk_alloc;\
508 stk = stk_base;\
509 stk_end = stk_base + (stack_num);\
510 }\
511} while(0)
512
513#define STACK_SAVE do{\
514 if (stk_base != stk_alloc) {\
515 msa->stack_p = stk_base;\
516 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
517 };\
518} while(0)
519
520static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
521
522extern unsigned int
523onig_get_match_stack_limit_size(void)
524{
525 return MatchStackLimitSize;
526}
527
528extern int
529onig_set_match_stack_limit_size(unsigned int size)
530{
531 MatchStackLimitSize = size;
532 return 0;
533}
534
535static int
536stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
537 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
538{
539 size_t n;
540 OnigStackType *x, *stk_base, *stk_end, *stk;
541
542 stk_base = *arg_stk_base;
543 stk_end = *arg_stk_end;
544 stk = *arg_stk;
545
546 n = stk_end - stk_base;
547 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
548 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
549 if (IS_NULL(x)) {
550 STACK_SAVE;
551 return ONIGERR_MEMORY;
552 }
553 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
554 n *= 2;
555 }
556 else {
557 unsigned int limit_size = MatchStackLimitSize;
558 n *= 2;
559 if (limit_size != 0 && n > limit_size) {
560 if ((unsigned int )(stk_end - stk_base) == limit_size)
561 return ONIGERR_MATCH_STACK_LIMIT_OVER;
562 else
563 n = limit_size;
564 }
565 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
566 if (IS_NULL(x)) {
567 STACK_SAVE;
568 return ONIGERR_MEMORY;
569 }
570 }
571 *arg_stk = x + (stk - stk_base);
572 *arg_stk_base = x;
573 *arg_stk_end = x + n;
574 return 0;
575}
576
577#define STACK_ENSURE(n) do {\
578 if (stk_end - stk < (n)) {\
579 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
580 if (r != 0) {\
581 STACK_SAVE;\
582 if (xmalloc_base) xfree(xmalloc_base);\
583 return r;\
584 }\
585 }\
586} while(0)
587
588#define STACK_AT(index) (stk_base + (index))
589#define GET_STACK_INDEX(stk) ((stk) - stk_base)
590
591#define STACK_PUSH_TYPE(stack_type) do {\
592 STACK_ENSURE(1);\
593 stk->type = (stack_type);\
594 STACK_INC;\
595} while(0)
596
597#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
598
599#ifdef USE_COMBINATION_EXPLOSION_CHECK
600# define STATE_CHECK_POS(s,snum) \
601 (((s) - str) * num_comb_exp_check + ((snum) - 1))
602# define STATE_CHECK_VAL(v,snum) do {\
603 if (state_check_buff != NULL) {\
604 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
605 (v) = state_check_buff[x/8] & (1<<(x%8));\
606 }\
607 else (v) = 0;\
608} while(0)
609
610
611# define ELSE_IF_STATE_CHECK_MARK(stk) \
612 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
613 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
614 state_check_buff[x/8] |= (1<<(x%8)); \
615 }
616
617# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
618 STACK_ENSURE(1);\
619 stk->type = (stack_type);\
620 stk->u.state.pcode = (pat);\
621 stk->u.state.pstr = (s);\
622 stk->u.state.pstr_prev = (sprev);\
623 stk->u.state.state_check = 0;\
624 stk->u.state.pkeep = (keep);\
625 STACK_INC;\
626} while(0)
627
628# define STACK_PUSH_ENSURED(stack_type,pat) do {\
629 stk->type = (stack_type);\
630 stk->u.state.pcode = (pat);\
631 stk->u.state.state_check = 0;\
632 STACK_INC;\
633} while(0)
634
635# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
636 STACK_ENSURE(1);\
637 stk->type = STK_ALT;\
638 stk->u.state.pcode = (pat);\
639 stk->u.state.pstr = (s);\
640 stk->u.state.pstr_prev = (sprev);\
641 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
642 stk->u.state.pkeep = (keep);\
643 STACK_INC;\
644} while(0)
645
646# define STACK_PUSH_STATE_CHECK(s,snum) do {\
647 if (state_check_buff != NULL) {\
648 STACK_ENSURE(1);\
649 stk->type = STK_STATE_CHECK_MARK;\
650 stk->u.state.pstr = (s);\
651 stk->u.state.state_check = (snum);\
652 STACK_INC;\
653 }\
654} while(0)
655
656#else /* USE_COMBINATION_EXPLOSION_CHECK */
657
658# define ELSE_IF_STATE_CHECK_MARK(stk)
659
660# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
661 STACK_ENSURE(1);\
662 stk->type = (stack_type);\
663 stk->u.state.pcode = (pat);\
664 stk->u.state.pstr = (s);\
665 stk->u.state.pstr_prev = (sprev);\
666 stk->u.state.pkeep = (keep);\
667 STACK_INC;\
668} while(0)
669
670# define STACK_PUSH_ENSURED(stack_type,pat) do {\
671 stk->type = (stack_type);\
672 stk->u.state.pcode = (pat);\
673 STACK_INC;\
674} while(0)
675#endif /* USE_COMBINATION_EXPLOSION_CHECK */
676
677#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
678#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
679#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
680#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
681#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
682#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
683 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
684
685#define STACK_PUSH_REPEAT(id, pat) do {\
686 STACK_ENSURE(1);\
687 stk->type = STK_REPEAT;\
688 stk->u.repeat.num = (id);\
689 stk->u.repeat.pcode = (pat);\
690 stk->u.repeat.count = 0;\
691 STACK_INC;\
692} while(0)
693
694#define STACK_PUSH_REPEAT_INC(sindex) do {\
695 STACK_ENSURE(1);\
696 stk->type = STK_REPEAT_INC;\
697 stk->u.repeat_inc.si = (sindex);\
698 STACK_INC;\
699} while(0)
700
701#define STACK_PUSH_MEM_START(mnum, s) do {\
702 STACK_ENSURE(1);\
703 stk->type = STK_MEM_START;\
704 stk->u.mem.num = (mnum);\
705 stk->u.mem.pstr = (s);\
706 stk->u.mem.start = mem_start_stk[mnum];\
707 stk->u.mem.end = mem_end_stk[mnum];\
708 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
709 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
710 STACK_INC;\
711} while(0)
712
713#define STACK_PUSH_MEM_END(mnum, s) do {\
714 STACK_ENSURE(1);\
715 stk->type = STK_MEM_END;\
716 stk->u.mem.num = (mnum);\
717 stk->u.mem.pstr = (s);\
718 stk->u.mem.start = mem_start_stk[mnum];\
719 stk->u.mem.end = mem_end_stk[mnum];\
720 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
721 STACK_INC;\
722} while(0)
723
724#define STACK_PUSH_MEM_END_MARK(mnum) do {\
725 STACK_ENSURE(1);\
726 stk->type = STK_MEM_END_MARK;\
727 stk->u.mem.num = (mnum);\
728 STACK_INC;\
729} while(0)
730
731#define STACK_GET_MEM_START(mnum, k) do {\
732 int level = 0;\
733 k = stk;\
734 while (k > stk_base) {\
735 k--;\
736 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
737 && k->u.mem.num == (mnum)) {\
738 level++;\
739 }\
740 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
741 if (level == 0) break;\
742 level--;\
743 }\
744 }\
745} while(0)
746
747#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
748 int level = 0;\
749 while (k < stk) {\
750 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
751 if (level == 0) (start) = k->u.mem.pstr;\
752 level++;\
753 }\
754 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
755 level--;\
756 if (level == 0) {\
757 (end) = k->u.mem.pstr;\
758 break;\
759 }\
760 }\
761 k++;\
762 }\
763} while(0)
764
765#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
766 STACK_ENSURE(1);\
767 stk->type = STK_NULL_CHECK_START;\
768 stk->u.null_check.num = (cnum);\
769 stk->u.null_check.pstr = (s);\
770 STACK_INC;\
771} while(0)
772
773#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
774 STACK_ENSURE(1);\
775 stk->type = STK_NULL_CHECK_END;\
776 stk->u.null_check.num = (cnum);\
777 STACK_INC;\
778} while(0)
779
780#define STACK_PUSH_CALL_FRAME(pat) do {\
781 STACK_ENSURE(1);\
782 stk->type = STK_CALL_FRAME;\
783 stk->u.call_frame.ret_addr = (pat);\
784 STACK_INC;\
785} while(0)
786
787#define STACK_PUSH_RETURN do {\
788 STACK_ENSURE(1);\
789 stk->type = STK_RETURN;\
790 STACK_INC;\
791} while(0)
792
793#define STACK_PUSH_ABSENT_POS(start, end) do {\
794 STACK_ENSURE(1);\
795 stk->type = STK_ABSENT_POS;\
796 stk->u.absent_pos.abs_pstr = (start);\
797 stk->u.absent_pos.end_pstr = (end);\
798 STACK_INC;\
799} while(0)
800
801
802#ifdef ONIG_DEBUG
803# define STACK_BASE_CHECK(p, at) \
804 if ((p) < stk_base) {\
805 fprintf(stderr, "at %s\n", at);\
806 goto stack_error;\
807 }
808#else
809# define STACK_BASE_CHECK(p, at)
810#endif
811
812#define STACK_POP_ONE do {\
813 stk--;\
814 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
815} while(0)
816
817#define STACK_POP do {\
818 switch (pop_level) {\
819 case STACK_POP_LEVEL_FREE:\
820 while (1) {\
821 stk--;\
822 STACK_BASE_CHECK(stk, "STACK_POP"); \
823 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
824 ELSE_IF_STATE_CHECK_MARK(stk);\
825 }\
826 break;\
827 case STACK_POP_LEVEL_MEM_START:\
828 while (1) {\
829 stk--;\
830 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
831 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
832 else if (stk->type == STK_MEM_START) {\
833 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
834 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
835 }\
836 ELSE_IF_STATE_CHECK_MARK(stk);\
837 }\
838 break;\
839 default:\
840 while (1) {\
841 stk--;\
842 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
843 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
844 else if (stk->type == STK_MEM_START) {\
845 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
846 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
847 }\
848 else if (stk->type == STK_REPEAT_INC) {\
849 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
850 }\
851 else if (stk->type == STK_MEM_END) {\
852 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
853 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
854 }\
855 ELSE_IF_STATE_CHECK_MARK(stk);\
856 }\
857 break;\
858 }\
859} while(0)
860
861#define STACK_POP_TIL_POS_NOT do {\
862 while (1) {\
863 stk--;\
864 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
865 if (stk->type == STK_POS_NOT) break;\
866 else if (stk->type == STK_MEM_START) {\
867 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
868 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
869 }\
870 else if (stk->type == STK_REPEAT_INC) {\
871 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
872 }\
873 else if (stk->type == STK_MEM_END) {\
874 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
875 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
876 }\
877 ELSE_IF_STATE_CHECK_MARK(stk);\
878 }\
879} while(0)
880
881#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
882 while (1) {\
883 stk--;\
884 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
885 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
886 else if (stk->type == STK_MEM_START) {\
887 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
888 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
889 }\
890 else if (stk->type == STK_REPEAT_INC) {\
891 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
892 }\
893 else if (stk->type == STK_MEM_END) {\
894 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
895 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
896 }\
897 ELSE_IF_STATE_CHECK_MARK(stk);\
898 }\
899} while(0)
900
901#define STACK_POP_TIL_ABSENT do {\
902 while (1) {\
903 stk--;\
904 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
905 if (stk->type == STK_ABSENT) break;\
906 else if (stk->type == STK_MEM_START) {\
907 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
908 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
909 }\
910 else if (stk->type == STK_REPEAT_INC) {\
911 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
912 }\
913 else if (stk->type == STK_MEM_END) {\
914 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
915 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
916 }\
917 ELSE_IF_STATE_CHECK_MARK(stk);\
918 }\
919} while(0)
920
921#define STACK_POP_ABSENT_POS(start, end) do {\
922 stk--;\
923 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
924 (start) = stk->u.absent_pos.abs_pstr;\
925 (end) = stk->u.absent_pos.end_pstr;\
926} while(0)
927
928#define STACK_POS_END(k) do {\
929 k = stk;\
930 while (1) {\
931 k--;\
932 STACK_BASE_CHECK(k, "STACK_POS_END"); \
933 if (IS_TO_VOID_TARGET(k)) {\
934 k->type = STK_VOID;\
935 }\
936 else if (k->type == STK_POS) {\
937 k->type = STK_VOID;\
938 break;\
939 }\
940 }\
941} while(0)
942
943#define STACK_STOP_BT_END do {\
944 OnigStackType *k = stk;\
945 while (1) {\
946 k--;\
947 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
948 if (IS_TO_VOID_TARGET(k)) {\
949 k->type = STK_VOID;\
950 }\
951 else if (k->type == STK_STOP_BT) {\
952 k->type = STK_VOID;\
953 break;\
954 }\
955 }\
956} while(0)
957
958#define STACK_NULL_CHECK(isnull,id,s) do {\
959 OnigStackType* k = stk;\
960 while (1) {\
961 k--;\
962 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
963 if (k->type == STK_NULL_CHECK_START) {\
964 if (k->u.null_check.num == (id)) {\
965 (isnull) = (k->u.null_check.pstr == (s));\
966 break;\
967 }\
968 }\
969 }\
970} while(0)
971
972#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
973 int level = 0;\
974 OnigStackType* k = stk;\
975 while (1) {\
976 k--;\
977 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
978 if (k->type == STK_NULL_CHECK_START) {\
979 if (k->u.null_check.num == (id)) {\
980 if (level == 0) {\
981 (isnull) = (k->u.null_check.pstr == (s));\
982 break;\
983 }\
984 else level--;\
985 }\
986 }\
987 else if (k->type == STK_NULL_CHECK_END) {\
988 level++;\
989 }\
990 }\
991} while(0)
992
993#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
994 OnigStackType* k = stk;\
995 while (1) {\
996 k--;\
997 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
998 if (k->type == STK_NULL_CHECK_START) {\
999 if (k->u.null_check.num == (id)) {\
1000 if (k->u.null_check.pstr != (s)) {\
1001 (isnull) = 0;\
1002 break;\
1003 }\
1004 else {\
1005 UChar* endp;\
1006 (isnull) = 1;\
1007 while (k < stk) {\
1008 if (k->type == STK_MEM_START) {\
1009 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1010 (isnull) = 0; break;\
1011 }\
1012 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1013 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1014 else\
1015 endp = (UChar* )k->u.mem.end;\
1016 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1017 (isnull) = 0; break;\
1018 }\
1019 else if (endp != s) {\
1020 (isnull) = -1; /* empty, but position changed */ \
1021 }\
1022 }\
1023 k++;\
1024 }\
1025 break;\
1026 }\
1027 }\
1028 }\
1029 }\
1030} while(0)
1031
1032#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1033 int level = 0;\
1034 OnigStackType* k = stk;\
1035 while (1) {\
1036 k--;\
1037 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1038 if (k->type == STK_NULL_CHECK_START) {\
1039 if (k->u.null_check.num == (id)) {\
1040 if (level == 0) {\
1041 if (k->u.null_check.pstr != (s)) {\
1042 (isnull) = 0;\
1043 break;\
1044 }\
1045 else {\
1046 UChar* endp;\
1047 (isnull) = 1;\
1048 while (k < stk) {\
1049 if (k->type == STK_MEM_START) {\
1050 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1051 (isnull) = 0; break;\
1052 }\
1053 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1054 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1055 else\
1056 endp = (UChar* )k->u.mem.end;\
1057 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1058 (isnull) = 0; break;\
1059 }\
1060 else if (endp != s) {\
1061 (isnull) = -1; /* empty, but position changed */ \
1062 }\
1063 }\
1064 k++;\
1065 }\
1066 break;\
1067 }\
1068 }\
1069 else {\
1070 level--;\
1071 }\
1072 }\
1073 }\
1074 else if (k->type == STK_NULL_CHECK_END) {\
1075 if (k->u.null_check.num == (id)) level++;\
1076 }\
1077 }\
1078} while(0)
1079
1080#define STACK_GET_REPEAT(id, k) do {\
1081 int level = 0;\
1082 k = stk;\
1083 while (1) {\
1084 k--;\
1085 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1086 if (k->type == STK_REPEAT) {\
1087 if (level == 0) {\
1088 if (k->u.repeat.num == (id)) {\
1089 break;\
1090 }\
1091 }\
1092 }\
1093 else if (k->type == STK_CALL_FRAME) level--;\
1094 else if (k->type == STK_RETURN) level++;\
1095 }\
1096} while(0)
1097
1098#define STACK_RETURN(addr) do {\
1099 int level = 0;\
1100 OnigStackType* k = stk;\
1101 while (1) {\
1102 k--;\
1103 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1104 if (k->type == STK_CALL_FRAME) {\
1105 if (level == 0) {\
1106 (addr) = k->u.call_frame.ret_addr;\
1107 break;\
1108 }\
1109 else level--;\
1110 }\
1111 else if (k->type == STK_RETURN)\
1112 level++;\
1113 }\
1114} while(0)
1115
1116
1117#define STRING_CMP(s1,s2,len) do {\
1118 while (len-- > 0) {\
1119 if (*s1++ != *s2++) goto fail;\
1120 }\
1121} while(0)
1122
1123#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1124 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1125 goto fail; \
1126} while(0)
1127
1128static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1129 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1130{
1131 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1132 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1133 UChar *p1, *p2, *end1, *s2;
1134 int len1, len2;
1135
1136 s2 = *ps2;
1137 end1 = s1 + mblen;
1138 while (s1 < end1) {
1139 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1140 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1141 if (len1 != len2) return 0;
1142 p1 = buf1;
1143 p2 = buf2;
1144 while (len1-- > 0) {
1145 if (*p1 != *p2) return 0;
1146 p1++;
1147 p2++;
1148 }
1149 }
1150
1151 *ps2 = s2;
1152 return 1;
1153}
1154
1155#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1156 is_fail = 0;\
1157 while (len-- > 0) {\
1158 if (*s1++ != *s2++) {\
1159 is_fail = 1; break;\
1160 }\
1161 }\
1162} while(0)
1163
1164#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1165 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1166 is_fail = 1; \
1167 else \
1168 is_fail = 0; \
1169} while(0)
1170
1171
1172#define IS_EMPTY_STR (str == end)
1173#define ON_STR_BEGIN(s) ((s) == str)
1174#define ON_STR_END(s) ((s) == end)
1175#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1176# define DATA_ENSURE_CHECK1 (s < right_range)
1177# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1178# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1179# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1180# define ABSENT_END_POS right_range
1181#else
1182# define DATA_ENSURE_CHECK1 (s < end)
1183# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1184# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1185# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1186# define ABSENT_END_POS end
1187#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1188
1189
1190#ifdef USE_CAPTURE_HISTORY
1191static int
1192make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1193 OnigStackType* stk_top, UChar* str, regex_t* reg)
1194{
1195 int n, r;
1196 OnigCaptureTreeNode* child;
1197 OnigStackType* k = *kp;
1198
1199 while (k < stk_top) {
1200 if (k->type == STK_MEM_START) {
1201 n = k->u.mem.num;
1202 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1203 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1204 child = history_node_new();
1205 CHECK_NULL_RETURN_MEMERR(child);
1206 child->group = n;
1207 child->beg = k->u.mem.pstr - str;
1208 r = history_tree_add_child(node, child);
1209 if (r != 0) {
1210 history_tree_free(child);
1211 return r;
1212 }
1213 *kp = (k + 1);
1214 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1215 if (r != 0) return r;
1216
1217 k = *kp;
1218 child->end = k->u.mem.pstr - str;
1219 }
1220 }
1221 else if (k->type == STK_MEM_END) {
1222 if (k->u.mem.num == node->group) {
1223 node->end = k->u.mem.pstr - str;
1224 *kp = k;
1225 return 0;
1226 }
1227 }
1228 k++;
1229 }
1230
1231 return 1; /* 1: root node ending. */
1232}
1233#endif /* USE_CAPTURE_HISTORY */
1234
1235#ifdef USE_BACKREF_WITH_LEVEL
1236static int mem_is_in_memp(int mem, int num, UChar* memp)
1237{
1238 int i;
1239 MemNumType m;
1240
1241 for (i = 0; i < num; i++) {
1242 GET_MEMNUM_INC(m, memp);
1243 if (mem == (int )m) return 1;
1244 }
1245 return 0;
1246}
1247
1248static int backref_match_at_nested_level(regex_t* reg,
1249 OnigStackType* top, OnigStackType* stk_base,
1250 int ignore_case, int case_fold_flag,
1251 int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1252{
1253 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1254 int level;
1255 OnigStackType* k;
1256
1257 level = 0;
1258 k = top;
1259 k--;
1260 while (k >= stk_base) {
1261 if (k->type == STK_CALL_FRAME) {
1262 level--;
1263 }
1264 else if (k->type == STK_RETURN) {
1265 level++;
1266 }
1267 else if (level == nest) {
1268 if (k->type == STK_MEM_START) {
1269 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1270 pstart = k->u.mem.pstr;
1271 if (pend != NULL_UCHARP) {
1272 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1273 p = pstart;
1274 ss = *s;
1275
1276 if (ignore_case != 0) {
1277 if (string_cmp_ic(reg->enc, case_fold_flag,
1278 pstart, &ss, pend - pstart, send) == 0)
1279 return 0; /* or goto next_mem; */
1280 }
1281 else {
1282 while (p < pend) {
1283 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1284 }
1285 }
1286
1287 *s = ss;
1288 return 1;
1289 }
1290 }
1291 }
1292 else if (k->type == STK_MEM_END) {
1293 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1294 pend = k->u.mem.pstr;
1295 }
1296 }
1297 }
1298 k--;
1299 }
1300
1301 return 0;
1302}
1303#endif /* USE_BACKREF_WITH_LEVEL */
1304
1305
1306#ifdef ONIG_DEBUG_STATISTICS
1307
1308# ifdef _WIN32
1309# include <windows.h>
1310static LARGE_INTEGER ts, te, freq;
1311# define GETTIME(t) QueryPerformanceCounter(&(t))
1312# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1313 * 1000000 / freq.QuadPart)
1314# else /* _WIN32 */
1315
1316# define USE_TIMEOFDAY
1317
1318# ifdef USE_TIMEOFDAY
1319# ifdef HAVE_SYS_TIME_H
1320# include <sys/time.h>
1321# endif
1322# ifdef HAVE_UNISTD_H
1323# include <unistd.h>
1324# endif
1325static struct timeval ts, te;
1326# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1327# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1328 (((te).tv_sec - (ts).tv_sec)*1000000))
1329# else /* USE_TIMEOFDAY */
1330# ifdef HAVE_SYS_TIMES_H
1331# include <sys/times.h>
1332# endif
1333static struct tms ts, te;
1334# define GETTIME(t) times(&(t))
1335# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1336# endif /* USE_TIMEOFDAY */
1337
1338# endif /* _WIN32 */
1339
1340static int OpCounter[256];
1341static int OpPrevCounter[256];
1342static unsigned long OpTime[256];
1343static int OpCurr = OP_FINISH;
1344static int OpPrevTarget = OP_FAIL;
1345static int MaxStackDepth = 0;
1346
1347# define MOP_IN(opcode) do {\
1348 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1349 OpCurr = opcode;\
1350 OpCounter[opcode]++;\
1351 GETTIME(ts);\
1352} while(0)
1353
1354# define MOP_OUT do {\
1355 GETTIME(te);\
1356 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1357} while(0)
1358
1359extern void
1360onig_statistics_init(void)
1361{
1362 int i;
1363 for (i = 0; i < 256; i++) {
1364 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1365 }
1366 MaxStackDepth = 0;
1367# ifdef _WIN32
1368 QueryPerformanceFrequency(&freq);
1369# endif
1370}
1371
1372extern void
1373onig_print_statistics(FILE* f)
1374{
1375 int i;
1376 fprintf(f, " count prev time\n");
1377 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1378 fprintf(f, "%8d: %8d: %10lu: %s\n",
1379 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1380 }
1381 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1382}
1383
1384# define STACK_INC do {\
1385 stk++;\
1386 if (stk - stk_base > MaxStackDepth) \
1387 MaxStackDepth = stk - stk_base;\
1388} while(0)
1389
1390#else /* ONIG_DEBUG_STATISTICS */
1391# define STACK_INC stk++
1392
1393# define MOP_IN(opcode)
1394# define MOP_OUT
1395#endif /* ONIG_DEBUG_STATISTICS */
1396
1397
1398#ifdef ONIG_DEBUG_MATCH
1399static char *
1400stack_type_str(int stack_type)
1401{
1402 switch (stack_type) {
1403 case STK_ALT: return "Alt ";
1404 case STK_LOOK_BEHIND_NOT: return "LBNot ";
1405 case STK_POS_NOT: return "PosNot";
1406 case STK_MEM_START: return "MemS ";
1407 case STK_MEM_END: return "MemE ";
1408 case STK_REPEAT_INC: return "RepInc";
1409 case STK_STATE_CHECK_MARK: return "StChMk";
1410 case STK_NULL_CHECK_START: return "NulChS";
1411 case STK_NULL_CHECK_END: return "NulChE";
1412 case STK_MEM_END_MARK: return "MemEMk";
1413 case STK_POS: return "Pos ";
1414 case STK_STOP_BT: return "StopBt";
1415 case STK_REPEAT: return "Rep ";
1416 case STK_CALL_FRAME: return "Call ";
1417 case STK_RETURN: return "Ret ";
1418 case STK_VOID: return "Void ";
1419 case STK_ABSENT_POS: return "AbsPos";
1420 case STK_ABSENT: return "Absent";
1421 default: return " ";
1422 }
1423}
1424#endif
1425
1426/* match data(str - end) from position (sstart). */
1427/* if sstart == str then set sprev to NULL. */
1428static OnigPosition
1429match_at(regex_t* reg, const UChar* str, const UChar* end,
1430#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1431 const UChar* right_range,
1432#endif
1433 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1434{
1435 static const UChar FinishCode[] = { OP_FINISH };
1436
1437 int i, num_mem, pop_level;
1438 ptrdiff_t n, best_len;
1439 LengthType tlen, tlen2;
1440 MemNumType mem;
1441 RelAddrType addr;
1442 OnigOptionType option = reg->options;
1443 OnigEncoding encode = reg->enc;
1444 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1445 UChar *s, *q, *sbegin;
1446 UChar *p = reg->p;
1447 UChar *pkeep;
1448 char *alloca_base;
1449 char *xmalloc_base = NULL;
1450 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
1451 OnigStackType *stkp; /* used as any purpose. */
1452 OnigStackIndex si;
1453 OnigStackIndex *repeat_stk;
1454 OnigStackIndex *mem_start_stk, *mem_end_stk;
1455#ifdef USE_COMBINATION_EXPLOSION_CHECK
1456 int scv;
1457 unsigned char* state_check_buff = msa->state_check_buff;
1458 int num_comb_exp_check = reg->num_comb_exp_check;
1459#endif
1460
1461#if USE_TOKEN_THREADED_VM
1462# define OP_OFFSET 1
1463# define VM_LOOP JUMP;
1464# define VM_LOOP_END
1465# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
1466# define DEFAULT L_DEFAULT:
1467# define NEXT sprev = sbegin; JUMP
1468# define JUMP RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
1469
1470 RB_GNUC_EXTENSION static const void *oplabels[] = {
1471 &&L_OP_FINISH, /* matching process terminator (no more alternative) */
1472 &&L_OP_END, /* pattern code terminator (success end) */
1473
1474 &&L_OP_EXACT1, /* single byte, N = 1 */
1475 &&L_OP_EXACT2, /* single byte, N = 2 */
1476 &&L_OP_EXACT3, /* single byte, N = 3 */
1477 &&L_OP_EXACT4, /* single byte, N = 4 */
1478 &&L_OP_EXACT5, /* single byte, N = 5 */
1479 &&L_OP_EXACTN, /* single byte */
1480 &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
1481 &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
1482 &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
1483 &&L_OP_EXACTMB2N, /* mb-length = 2 */
1484 &&L_OP_EXACTMB3N, /* mb-length = 3 */
1485 &&L_OP_EXACTMBN, /* other length */
1486
1487 &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
1488 &&L_OP_EXACTN_IC, /* single byte, ignore case */
1489
1490 &&L_OP_CCLASS,
1491 &&L_OP_CCLASS_MB,
1492 &&L_OP_CCLASS_MIX,
1493 &&L_OP_CCLASS_NOT,
1494 &&L_OP_CCLASS_MB_NOT,
1495 &&L_OP_CCLASS_MIX_NOT,
1496
1497 &&L_OP_ANYCHAR, /* "." */
1498 &&L_OP_ANYCHAR_ML, /* "." multi-line */
1499 &&L_OP_ANYCHAR_STAR, /* ".*" */
1500 &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
1501 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
1502 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
1503
1504 &&L_OP_WORD,
1505 &&L_OP_NOT_WORD,
1506 &&L_OP_WORD_BOUND,
1507 &&L_OP_NOT_WORD_BOUND,
1508# ifdef USE_WORD_BEGIN_END
1509 &&L_OP_WORD_BEGIN,
1510 &&L_OP_WORD_END,
1511# else
1512 &&L_DEFAULT,
1513 &&L_DEFAULT,
1514# endif
1515 &&L_OP_ASCII_WORD,
1516 &&L_OP_NOT_ASCII_WORD,
1517 &&L_OP_ASCII_WORD_BOUND,
1518 &&L_OP_NOT_ASCII_WORD_BOUND,
1519# ifdef USE_WORD_BEGIN_END
1520 &&L_OP_ASCII_WORD_BEGIN,
1521 &&L_OP_ASCII_WORD_END,
1522# else
1523 &&L_DEFAULT,
1524 &&L_DEFAULT,
1525# endif
1526
1527 &&L_OP_BEGIN_BUF,
1528 &&L_OP_END_BUF,
1529 &&L_OP_BEGIN_LINE,
1530 &&L_OP_END_LINE,
1531 &&L_OP_SEMI_END_BUF,
1532 &&L_OP_BEGIN_POSITION,
1533
1534 &&L_OP_BACKREF1,
1535 &&L_OP_BACKREF2,
1536 &&L_OP_BACKREFN,
1537 &&L_OP_BACKREFN_IC,
1538 &&L_OP_BACKREF_MULTI,
1539 &&L_OP_BACKREF_MULTI_IC,
1540# ifdef USE_BACKREF_WITH_LEVEL
1541 &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
1542# else
1543 &&L_DEFAULT,
1544# endif
1545 &&L_OP_MEMORY_START,
1546 &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
1547 &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
1548# ifdef USE_SUBEXP_CALL
1549 &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
1550# else
1551 &&L_DEFAULT,
1552# endif
1553 &&L_OP_MEMORY_END,
1554# ifdef USE_SUBEXP_CALL
1555 &&L_OP_MEMORY_END_REC, /* push marker to stack */
1556# else
1557 &&L_DEFAULT,
1558# endif
1559
1560 &&L_OP_KEEP,
1561
1562 &&L_OP_FAIL, /* pop stack and move */
1563 &&L_OP_JUMP,
1564 &&L_OP_PUSH,
1565 &&L_OP_POP,
1566# ifdef USE_OP_PUSH_OR_JUMP_EXACT
1567 &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
1568# else
1569 &&L_DEFAULT,
1570# endif
1571 &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
1572 &&L_OP_REPEAT, /* {n,m} */
1573 &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
1574 &&L_OP_REPEAT_INC,
1575 &&L_OP_REPEAT_INC_NG, /* non greedy */
1576 &&L_OP_REPEAT_INC_SG, /* search and get in stack */
1577 &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
1578 &&L_OP_NULL_CHECK_START, /* null loop checker start */
1579 &&L_OP_NULL_CHECK_END, /* null loop checker end */
1580# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1581 &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
1582# else
1583 &&L_DEFAULT,
1584# endif
1585# ifdef USE_SUBEXP_CALL
1586 &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
1587# else
1588 &&L_DEFAULT,
1589# endif
1590
1591 &&L_OP_PUSH_POS, /* (?=...) start */
1592 &&L_OP_POP_POS, /* (?=...) end */
1593 &&L_OP_PUSH_POS_NOT, /* (?!...) start */
1594 &&L_OP_FAIL_POS, /* (?!...) end */
1595 &&L_OP_PUSH_STOP_BT, /* (?>...) start */
1596 &&L_OP_POP_STOP_BT, /* (?>...) end */
1597 &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
1598 &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
1599 &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
1600 &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
1601 &&L_OP_ABSENT, /* (?~...) start of inner loop */
1602 &&L_OP_ABSENT_END, /* (?~...) end */
1603
1604# ifdef USE_SUBEXP_CALL
1605 &&L_OP_CALL, /* \g<name> */
1606 &&L_OP_RETURN,
1607# else
1608 &&L_DEFAULT,
1609 &&L_DEFAULT,
1610# endif
1611 &&L_OP_CONDITION,
1612
1613# ifdef USE_COMBINATION_EXPLOSION_CHECK
1614 &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
1615 &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
1616 &&L_OP_STATE_CHECK, /* check only */
1617# else
1618 &&L_DEFAULT,
1619 &&L_DEFAULT,
1620 &&L_DEFAULT,
1621# endif
1622# ifdef USE_COMBINATION_EXPLOSION_CHECK
1623 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
1624 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
1625# else
1626 &&L_DEFAULT,
1627 &&L_DEFAULT,
1628# endif
1629 /* no need: IS_DYNAMIC_OPTION() == 0 */
1630# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
1631 &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
1632 &&L_OP_SET_OPTION /* set option */
1633# else
1634 &&L_DEFAULT,
1635 &&L_DEFAULT
1636# endif
1637 };
1638#else /* USE_TOKEN_THREADED_VM */
1639
1640# define OP_OFFSET 0
1641# define VM_LOOP \
1642 while (1) { \
1643 OPCODE_EXEC_HOOK; \
1644 sbegin = s; \
1645 switch (*p++) {
1646# define VM_LOOP_END } sprev = sbegin; }
1647# define CASE(x) case x:
1648# define DEFAULT default:
1649# define NEXT break
1650# define JUMP continue; break
1651#endif /* USE_TOKEN_THREADED_VM */
1652
1653
1654#ifdef USE_SUBEXP_CALL
1655/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
1656 etc. Additional space is required. */
1657# define ADD_NUMMEM 1
1658#else
1659/* Stack #0 not is used. */
1660# define ADD_NUMMEM 0
1661#endif
1662
1663 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
1664
1665 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
1666 pop_level = reg->stack_pop_level;
1667 num_mem = reg->num_mem;
1668 repeat_stk = (OnigStackIndex* )alloca_base;
1669
1670 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1671 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
1672 {
1673 OnigStackIndex *pp = mem_start_stk;
1674 for (; pp < repeat_stk + n; pp += 2) {
1675 pp[0] = INVALID_STACK_INDEX;
1676 pp[1] = INVALID_STACK_INDEX;
1677 }
1678 }
1679#ifndef USE_SUBEXP_CALL
1680 mem_start_stk--; /* for index start from 1,
1681 mem_start_stk[1]..mem_start_stk[num_mem] */
1682 mem_end_stk--; /* for index start from 1,
1683 mem_end_stk[1]..mem_end_stk[num_mem] */
1684#endif
1685
1686#ifdef ONIG_DEBUG_MATCH
1687 fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
1688 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
1689 fprintf(stderr, "size: %d, start offset: %d\n",
1690 (int )(end - str), (int )(sstart - str));
1691 fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
1692#endif
1693
1694 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
1695 best_len = ONIG_MISMATCH;
1696 s = (UChar* )sstart;
1697 pkeep = (UChar* )sstart;
1698
1699
1700#ifdef ONIG_DEBUG_MATCH
1701# define OPCODE_EXEC_HOOK \
1702 if (s) { \
1703 UChar *op, *q, *bp, buf[50]; \
1704 int len; \
1705 op = p - OP_OFFSET; \
1706 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
1707 bp = buf; \
1708 q = s; \
1709 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
1710 for (i = 0; i < 7 && q < end; i++) { \
1711 len = enclen(encode, q, end); \
1712 while (len-- > 0) *bp++ = *q++; \
1713 } \
1714 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
1715 } \
1716 xmemcpy(bp, "\"", 1); bp += 1; \
1717 *bp = 0; \
1718 fputs((char* )buf, stderr); \
1719 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
1720 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
1721 stk - stk_base - 1, \
1722 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
1723 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
1724 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
1725 fprintf(stderr, "\n"); \
1726 }
1727#else
1728# define OPCODE_EXEC_HOOK ((void) 0)
1729#endif
1730
1731
1732 VM_LOOP {
1733 CASE(OP_END) MOP_IN(OP_END);
1734 n = s - sstart;
1735 if (n > best_len) {
1736 OnigRegion* region;
1737#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1738 if (IS_FIND_LONGEST(option)) {
1739 if (n > msa->best_len) {
1740 msa->best_len = n;
1741 msa->best_s = (UChar* )sstart;
1742 }
1743 else
1744 goto end_best_len;
1745 }
1746#endif
1747 best_len = n;
1748 region = msa->region;
1749 if (region) {
1750 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
1751 region->end[0] = s - str;
1752 for (i = 1; i <= num_mem; i++) {
1753 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1754 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1755 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1756 else
1757 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1758
1759 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1760 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1761 : (UChar* )((void* )mem_end_stk[i])) - str;
1762 }
1763 else {
1764 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1765 }
1766 }
1767
1768#ifdef USE_CAPTURE_HISTORY
1769 if (reg->capture_history != 0) {
1770 int r;
1771 OnigCaptureTreeNode* node;
1772
1773 if (IS_NULL(region->history_root)) {
1774 region->history_root = node = history_node_new();
1775 CHECK_NULL_RETURN_MEMERR(node);
1776 }
1777 else {
1778 node = region->history_root;
1779 history_tree_clear(node);
1780 }
1781
1782 node->group = 0;
1783 node->beg = ((pkeep > s) ? s : pkeep) - str;
1784 node->end = s - str;
1785
1786 stkp = stk_base;
1787 r = make_capture_history_tree(region->history_root, &stkp,
1788 stk, (UChar* )str, reg);
1789 if (r < 0) {
1790 best_len = r; /* error code */
1791 goto finish;
1792 }
1793 }
1794#endif /* USE_CAPTURE_HISTORY */
1795 } /* if (region) */
1796 } /* n > best_len */
1797
1798#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1799 end_best_len:
1800#endif
1801 MOP_OUT;
1802
1803 if (IS_FIND_CONDITION(option)) {
1804 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1805 best_len = ONIG_MISMATCH;
1806 goto fail; /* for retry */
1807 }
1808 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1809 goto fail; /* for retry */
1810 }
1811 }
1812
1813 /* default behavior: return first-matching result. */
1814 goto finish;
1815 NEXT;
1816
1817 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
1818 DATA_ENSURE(1);
1819 if (*p != *s) goto fail;
1820 p++; s++;
1821 MOP_OUT;
1822 NEXT;
1823
1824 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
1825 {
1826 int len;
1827 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1828
1829 DATA_ENSURE(1);
1830 len = ONIGENC_MBC_CASE_FOLD(encode,
1831 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1832 case_fold_flag,
1833 &s, end, lowbuf);
1834 DATA_ENSURE(0);
1835 q = lowbuf;
1836 while (len-- > 0) {
1837 if (*p != *q) {
1838 goto fail;
1839 }
1840 p++; q++;
1841 }
1842 }
1843 MOP_OUT;
1844 NEXT;
1845
1846 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
1847 DATA_ENSURE(2);
1848 if (*p != *s) goto fail;
1849 p++; s++;
1850 if (*p != *s) goto fail;
1851 sprev = s;
1852 p++; s++;
1853 MOP_OUT;
1854 JUMP;
1855
1856 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
1857 DATA_ENSURE(3);
1858 if (*p != *s) goto fail;
1859 p++; s++;
1860 if (*p != *s) goto fail;
1861 p++; s++;
1862 if (*p != *s) goto fail;
1863 sprev = s;
1864 p++; s++;
1865 MOP_OUT;
1866 JUMP;
1867
1868 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
1869 DATA_ENSURE(4);
1870 if (*p != *s) goto fail;
1871 p++; s++;
1872 if (*p != *s) goto fail;
1873 p++; s++;
1874 if (*p != *s) goto fail;
1875 p++; s++;
1876 if (*p != *s) goto fail;
1877 sprev = s;
1878 p++; s++;
1879 MOP_OUT;
1880 JUMP;
1881
1882 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
1883 DATA_ENSURE(5);
1884 if (*p != *s) goto fail;
1885 p++; s++;
1886 if (*p != *s) goto fail;
1887 p++; s++;
1888 if (*p != *s) goto fail;
1889 p++; s++;
1890 if (*p != *s) goto fail;
1891 p++; s++;
1892 if (*p != *s) goto fail;
1893 sprev = s;
1894 p++; s++;
1895 MOP_OUT;
1896 JUMP;
1897
1898 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
1899 GET_LENGTH_INC(tlen, p);
1900 DATA_ENSURE(tlen);
1901 while (tlen-- > 0) {
1902 if (*p++ != *s++) goto fail;
1903 }
1904 sprev = s - 1;
1905 MOP_OUT;
1906 JUMP;
1907
1908 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
1909 {
1910 int len;
1911 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1912
1913 GET_LENGTH_INC(tlen, p);
1914 endp = p + tlen;
1915
1916 while (p < endp) {
1917 sprev = s;
1918 DATA_ENSURE(1);
1919 len = ONIGENC_MBC_CASE_FOLD(encode,
1920 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1921 case_fold_flag,
1922 &s, end, lowbuf);
1923 DATA_ENSURE(0);
1924 q = lowbuf;
1925 while (len-- > 0) {
1926 if (*p != *q) goto fail;
1927 p++; q++;
1928 }
1929 }
1930 }
1931
1932 MOP_OUT;
1933 JUMP;
1934
1935 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
1936 DATA_ENSURE(2);
1937 if (*p != *s) goto fail;
1938 p++; s++;
1939 if (*p != *s) goto fail;
1940 p++; s++;
1941 MOP_OUT;
1942 NEXT;
1943
1944 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
1945 DATA_ENSURE(4);
1946 if (*p != *s) goto fail;
1947 p++; s++;
1948 if (*p != *s) goto fail;
1949 p++; s++;
1950 sprev = s;
1951 if (*p != *s) goto fail;
1952 p++; s++;
1953 if (*p != *s) goto fail;
1954 p++; s++;
1955 MOP_OUT;
1956 JUMP;
1957
1958 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
1959 DATA_ENSURE(6);
1960 if (*p != *s) goto fail;
1961 p++; s++;
1962 if (*p != *s) goto fail;
1963 p++; s++;
1964 if (*p != *s) goto fail;
1965 p++; s++;
1966 if (*p != *s) goto fail;
1967 p++; s++;
1968 sprev = s;
1969 if (*p != *s) goto fail;
1970 p++; s++;
1971 if (*p != *s) goto fail;
1972 p++; s++;
1973 MOP_OUT;
1974 JUMP;
1975
1976 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
1977 GET_LENGTH_INC(tlen, p);
1978 DATA_ENSURE(tlen * 2);
1979 while (tlen-- > 0) {
1980 if (*p != *s) goto fail;
1981 p++; s++;
1982 if (*p != *s) goto fail;
1983 p++; s++;
1984 }
1985 sprev = s - 2;
1986 MOP_OUT;
1987 JUMP;
1988
1989 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
1990 GET_LENGTH_INC(tlen, p);
1991 DATA_ENSURE(tlen * 3);
1992 while (tlen-- > 0) {
1993 if (*p != *s) goto fail;
1994 p++; s++;
1995 if (*p != *s) goto fail;
1996 p++; s++;
1997 if (*p != *s) goto fail;
1998 p++; s++;
1999 }
2000 sprev = s - 3;
2001 MOP_OUT;
2002 JUMP;
2003
2004 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2005 GET_LENGTH_INC(tlen, p); /* mb-len */
2006 GET_LENGTH_INC(tlen2, p); /* string len */
2007 tlen2 *= tlen;
2008 DATA_ENSURE(tlen2);
2009 while (tlen2-- > 0) {
2010 if (*p != *s) goto fail;
2011 p++; s++;
2012 }
2013 sprev = s - tlen;
2014 MOP_OUT;
2015 JUMP;
2016
2017 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2018 DATA_ENSURE(1);
2019 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2020 p += SIZE_BITSET;
2021 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2022 MOP_OUT;
2023 NEXT;
2024
2025 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2026 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2027
2028 cclass_mb:
2029 GET_LENGTH_INC(tlen, p);
2030 {
2031 OnigCodePoint code;
2032 UChar *ss;
2033 int mb_len;
2034
2035 DATA_ENSURE(1);
2036 mb_len = enclen(encode, s, end);
2037 DATA_ENSURE(mb_len);
2038 ss = s;
2039 s += mb_len;
2040 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2041
2042#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2043 if (! onig_is_in_code_range(p, code)) goto fail;
2044#else
2045 q = p;
2046 ALIGNMENT_RIGHT(q);
2047 if (! onig_is_in_code_range(q, code)) goto fail;
2048#endif
2049 }
2050 p += tlen;
2051 MOP_OUT;
2052 NEXT;
2053
2054 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2055 DATA_ENSURE(1);
2056 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2057 p += SIZE_BITSET;
2058 goto cclass_mb;
2059 }
2060 else {
2061 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2062 goto fail;
2063
2064 p += SIZE_BITSET;
2065 GET_LENGTH_INC(tlen, p);
2066 p += tlen;
2067 s++;
2068 }
2069 MOP_OUT;
2070 NEXT;
2071
2072 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2073 DATA_ENSURE(1);
2074 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2075 p += SIZE_BITSET;
2076 s += enclen(encode, s, end);
2077 MOP_OUT;
2078 NEXT;
2079
2080 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2081 DATA_ENSURE(1);
2082 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2083 s++;
2084 GET_LENGTH_INC(tlen, p);
2085 p += tlen;
2086 goto cc_mb_not_success;
2087 }
2088
2089 cclass_mb_not:
2090 GET_LENGTH_INC(tlen, p);
2091 {
2092 OnigCodePoint code;
2093 UChar *ss;
2094 int mb_len = enclen(encode, s, end);
2095
2096 if (! DATA_ENSURE_CHECK(mb_len)) {
2097 DATA_ENSURE(1);
2098 s = (UChar* )end;
2099 p += tlen;
2100 goto cc_mb_not_success;
2101 }
2102
2103 ss = s;
2104 s += mb_len;
2105 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2106
2107#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2108 if (onig_is_in_code_range(p, code)) goto fail;
2109#else
2110 q = p;
2111 ALIGNMENT_RIGHT(q);
2112 if (onig_is_in_code_range(q, code)) goto fail;
2113#endif
2114 }
2115 p += tlen;
2116
2117 cc_mb_not_success:
2118 MOP_OUT;
2119 NEXT;
2120
2121 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
2122 DATA_ENSURE(1);
2123 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2124 p += SIZE_BITSET;
2125 goto cclass_mb_not;
2126 }
2127 else {
2128 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2129 goto fail;
2130
2131 p += SIZE_BITSET;
2132 GET_LENGTH_INC(tlen, p);
2133 p += tlen;
2134 s++;
2135 }
2136 MOP_OUT;
2137 NEXT;
2138
2139 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
2140 DATA_ENSURE(1);
2141 n = enclen(encode, s, end);
2142 DATA_ENSURE(n);
2143 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2144 s += n;
2145 MOP_OUT;
2146 NEXT;
2147
2148 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
2149 DATA_ENSURE(1);
2150 n = enclen(encode, s, end);
2151 DATA_ENSURE(n);
2152 s += n;
2153 MOP_OUT;
2154 NEXT;
2155
2156 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
2157 while (DATA_ENSURE_CHECK1) {
2158 STACK_PUSH_ALT(p, s, sprev, pkeep);
2159 n = enclen(encode, s, end);
2160 DATA_ENSURE(n);
2161 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2162 sprev = s;
2163 s += n;
2164 }
2165 MOP_OUT;
2166 JUMP;
2167
2168 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
2169 while (DATA_ENSURE_CHECK1) {
2170 STACK_PUSH_ALT(p, s, sprev, pkeep);
2171 n = enclen(encode, s, end);
2172 if (n > 1) {
2173 DATA_ENSURE(n);
2174 sprev = s;
2175 s += n;
2176 }
2177 else {
2178 sprev = s;
2179 s++;
2180 }
2181 }
2182 MOP_OUT;
2183 JUMP;
2184
2185 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2186 while (DATA_ENSURE_CHECK1) {
2187 if (*p == *s) {
2188 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2189 }
2190 n = enclen(encode, s, end);
2191 DATA_ENSURE(n);
2192 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2193 sprev = s;
2194 s += n;
2195 }
2196 p++;
2197 MOP_OUT;
2198 NEXT;
2199
2200 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2201 while (DATA_ENSURE_CHECK1) {
2202 if (*p == *s) {
2203 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2204 }
2205 n = enclen(encode, s, end);
2206 if (n > 1) {
2207 DATA_ENSURE(n);
2208 sprev = s;
2209 s += n;
2210 }
2211 else {
2212 sprev = s;
2213 s++;
2214 }
2215 }
2216 p++;
2217 MOP_OUT;
2218 NEXT;
2219
2220#ifdef USE_COMBINATION_EXPLOSION_CHECK
2221 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2222 GET_STATE_CHECK_NUM_INC(mem, p);
2223 while (DATA_ENSURE_CHECK1) {
2224 STATE_CHECK_VAL(scv, mem);
2225 if (scv) goto fail;
2226
2227 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2228 n = enclen(encode, s, end);
2229 DATA_ENSURE(n);
2230 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2231 sprev = s;
2232 s += n;
2233 }
2234 MOP_OUT;
2235 NEXT;
2236
2237 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
2238 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2239
2240 GET_STATE_CHECK_NUM_INC(mem, p);
2241 while (DATA_ENSURE_CHECK1) {
2242 STATE_CHECK_VAL(scv, mem);
2243 if (scv) goto fail;
2244
2245 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2246 n = enclen(encode, s, end);
2247 if (n > 1) {
2248 DATA_ENSURE(n);
2249 sprev = s;
2250 s += n;
2251 }
2252 else {
2253 sprev = s;
2254 s++;
2255 }
2256 }
2257 MOP_OUT;
2258 NEXT;
2259#endif /* USE_COMBINATION_EXPLOSION_CHECK */
2260
2261 CASE(OP_WORD) MOP_IN(OP_WORD);
2262 DATA_ENSURE(1);
2263 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2264 goto fail;
2265
2266 s += enclen(encode, s, end);
2267 MOP_OUT;
2268 NEXT;
2269
2270 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
2271 DATA_ENSURE(1);
2272 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2273 goto fail;
2274
2275 s += enclen(encode, s, end);
2276 MOP_OUT;
2277 NEXT;
2278
2279 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
2280 DATA_ENSURE(1);
2281 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2282 goto fail;
2283
2284 s += enclen(encode, s, end);
2285 MOP_OUT;
2286 NEXT;
2287
2288 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
2289 DATA_ENSURE(1);
2290 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2291 goto fail;
2292
2293 s += enclen(encode, s, end);
2294 MOP_OUT;
2295 NEXT;
2296
2297 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
2298 if (ON_STR_BEGIN(s)) {
2299 DATA_ENSURE(1);
2300 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2301 goto fail;
2302 }
2303 else if (ON_STR_END(s)) {
2304 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2305 goto fail;
2306 }
2307 else {
2308 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2309 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2310 goto fail;
2311 }
2312 MOP_OUT;
2313 JUMP;
2314
2315 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
2316 if (ON_STR_BEGIN(s)) {
2317 DATA_ENSURE(1);
2318 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2319 goto fail;
2320 }
2321 else if (ON_STR_END(s)) {
2322 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2323 goto fail;
2324 }
2325 else {
2326 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2327 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2328 goto fail;
2329 }
2330 MOP_OUT;
2331 JUMP;
2332
2333 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
2334 if (ON_STR_BEGIN(s)) {
2335 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
2336 goto fail;
2337 }
2338 else if (ON_STR_END(s)) {
2339 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2340 goto fail;
2341 }
2342 else {
2343 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2344 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
2345 goto fail;
2346 }
2347 MOP_OUT;
2348 JUMP;
2349
2350 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
2351 if (ON_STR_BEGIN(s)) {
2352 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2353 goto fail;
2354 }
2355 else if (ON_STR_END(s)) {
2356 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2357 goto fail;
2358 }
2359 else {
2360 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2361 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2362 goto fail;
2363 }
2364 MOP_OUT;
2365 JUMP;
2366
2367#ifdef USE_WORD_BEGIN_END
2368 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
2369 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2370 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2371 MOP_OUT;
2372 JUMP;
2373 }
2374 }
2375 goto fail;
2376 NEXT;
2377
2378 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
2379 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2380 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2381 MOP_OUT;
2382 JUMP;
2383 }
2384 }
2385 goto fail;
2386 NEXT;
2387
2388 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
2389 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2390 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2391 MOP_OUT;
2392 JUMP;
2393 }
2394 }
2395 goto fail;
2396 NEXT;
2397
2398 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
2399 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2400 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2401 MOP_OUT;
2402 JUMP;
2403 }
2404 }
2405 goto fail;
2406 NEXT;
2407#endif
2408
2409 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
2410 if (! ON_STR_BEGIN(s)) goto fail;
2411 if (IS_NOTBOS(msa->options)) goto fail;
2412
2413 MOP_OUT;
2414 JUMP;
2415
2416 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
2417 if (! ON_STR_END(s)) goto fail;
2418 if (IS_NOTEOS(msa->options)) goto fail;
2419
2420 MOP_OUT;
2421 JUMP;
2422
2423 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
2424 if (ON_STR_BEGIN(s)) {
2425 if (IS_NOTBOL(msa->options)) goto fail;
2426 MOP_OUT;
2427 JUMP;
2428 }
2429 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
2430#ifdef USE_CRNL_AS_LINE_TERMINATOR
2431 && !(IS_NEWLINE_CRLF(option)
2432 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
2433#endif
2434 && !ON_STR_END(s)) {
2435 MOP_OUT;
2436 JUMP;
2437 }
2438 goto fail;
2439 NEXT;
2440
2441 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
2442 if (ON_STR_END(s)) {
2443#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2444 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2445#endif
2446 if (IS_NOTEOL(msa->options)) goto fail;
2447 MOP_OUT;
2448 JUMP;
2449#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2450 }
2451#endif
2452 }
2453 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2454 MOP_OUT;
2455 JUMP;
2456 }
2457 goto fail;
2458 NEXT;
2459
2460 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
2461 if (ON_STR_END(s)) {
2462#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2463 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2464#endif
2465 if (IS_NOTEOL(msa->options)) goto fail;
2466 MOP_OUT;
2467 JUMP;
2468#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2469 }
2470#endif
2471 }
2472 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2473 UChar* ss = s + enclen(encode, s, end);
2474 if (ON_STR_END(ss)) {
2475 MOP_OUT;
2476 JUMP;
2477 }
2478#ifdef USE_CRNL_AS_LINE_TERMINATOR
2479 else if (IS_NEWLINE_CRLF(option)
2480 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2481 ss += enclen(encode, ss, end);
2482 if (ON_STR_END(ss)) {
2483 MOP_OUT;
2484 JUMP;
2485 }
2486 }
2487#endif
2488 }
2489 goto fail;
2490 NEXT;
2491
2492 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
2493 if (s != msa->gpos)
2494 goto fail;
2495
2496 MOP_OUT;
2497 JUMP;
2498
2499 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
2500 GET_MEMNUM_INC(mem, p);
2501 STACK_PUSH_MEM_START(mem, s);
2502 MOP_OUT;
2503 JUMP;
2504
2505 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
2506 GET_MEMNUM_INC(mem, p);
2507 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2508 mem_end_stk[mem] = INVALID_STACK_INDEX;
2509 MOP_OUT;
2510 JUMP;
2511
2512 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
2513 GET_MEMNUM_INC(mem, p);
2514 STACK_PUSH_MEM_END(mem, s);
2515 MOP_OUT;
2516 JUMP;
2517
2518 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
2519 GET_MEMNUM_INC(mem, p);
2520 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2521 MOP_OUT;
2522 JUMP;
2523
2524 CASE(OP_KEEP) MOP_IN(OP_KEEP);
2525 pkeep = s;
2526 MOP_OUT;
2527 JUMP;
2528
2529#ifdef USE_SUBEXP_CALL
2530 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
2531 GET_MEMNUM_INC(mem, p);
2532 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2533 STACK_PUSH_MEM_END(mem, s);
2534 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2535 MOP_OUT;
2536 JUMP;
2537
2538 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
2539 GET_MEMNUM_INC(mem, p);
2540 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2541 STACK_GET_MEM_START(mem, stkp);
2542
2543 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2544 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2545 else
2546 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2547
2548 STACK_PUSH_MEM_END_MARK(mem);
2549 MOP_OUT;
2550 JUMP;
2551#endif
2552
2553 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
2554 mem = 1;
2555 goto backref;
2556 NEXT;
2557
2558 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
2559 mem = 2;
2560 goto backref;
2561 NEXT;
2562
2563 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
2564 GET_MEMNUM_INC(mem, p);
2565 backref:
2566 {
2567 int len;
2568 UChar *pstart, *pend;
2569
2570 /* if you want to remove following line,
2571 you should check in parse and compile time. */
2572 if (mem > num_mem) goto fail;
2573 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2574 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2575
2576 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2577 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2578 else
2579 pstart = (UChar* )((void* )mem_start_stk[mem]);
2580
2581 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2582 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2583 : (UChar* )((void* )mem_end_stk[mem]));
2584 n = pend - pstart;
2585 DATA_ENSURE(n);
2586 sprev = s;
2587 STRING_CMP(pstart, s, n);
2588 while (sprev + (len = enclen(encode, sprev, end)) < s)
2589 sprev += len;
2590
2591 MOP_OUT;
2592 JUMP;
2593 }
2594
2595 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
2596 GET_MEMNUM_INC(mem, p);
2597 {
2598 int len;
2599 UChar *pstart, *pend;
2600
2601 /* if you want to remove following line,
2602 you should check in parse and compile time. */
2603 if (mem > num_mem) goto fail;
2604 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2605 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2606
2607 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2608 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2609 else
2610 pstart = (UChar* )((void* )mem_start_stk[mem]);
2611
2612 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2613 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2614 : (UChar* )((void* )mem_end_stk[mem]));
2615 n = pend - pstart;
2616 DATA_ENSURE(n);
2617 sprev = s;
2618 STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
2619 while (sprev + (len = enclen(encode, sprev, end)) < s)
2620 sprev += len;
2621
2622 MOP_OUT;
2623 JUMP;
2624 }
2625 NEXT;
2626
2627 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
2628 {
2629 int len, is_fail;
2630 UChar *pstart, *pend, *swork;
2631
2632 GET_LENGTH_INC(tlen, p);
2633 for (i = 0; i < tlen; i++) {
2634 GET_MEMNUM_INC(mem, p);
2635
2636 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2637 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2638
2639 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2640 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2641 else
2642 pstart = (UChar* )((void* )mem_start_stk[mem]);
2643
2644 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2645 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2646 : (UChar* )((void* )mem_end_stk[mem]));
2647 n = pend - pstart;
2648 DATA_ENSURE_CONTINUE(n);
2649 sprev = s;
2650 swork = s;
2651 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2652 if (is_fail) continue;
2653 s = swork;
2654 while (sprev + (len = enclen(encode, sprev, end)) < s)
2655 sprev += len;
2656
2657 p += (SIZE_MEMNUM * (tlen - i - 1));
2658 break; /* success */
2659 }
2660 if (i == tlen) goto fail;
2661 MOP_OUT;
2662 JUMP;
2663 }
2664 NEXT;
2665
2666 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
2667 {
2668 int len, is_fail;
2669 UChar *pstart, *pend, *swork;
2670
2671 GET_LENGTH_INC(tlen, p);
2672 for (i = 0; i < tlen; i++) {
2673 GET_MEMNUM_INC(mem, p);
2674
2675 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2676 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2677
2678 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2679 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2680 else
2681 pstart = (UChar* )((void* )mem_start_stk[mem]);
2682
2683 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2684 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2685 : (UChar* )((void* )mem_end_stk[mem]));
2686 n = pend - pstart;
2687 DATA_ENSURE_CONTINUE(n);
2688 sprev = s;
2689 swork = s;
2690 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
2691 if (is_fail) continue;
2692 s = swork;
2693 while (sprev + (len = enclen(encode, sprev, end)) < s)
2694 sprev += len;
2695
2696 p += (SIZE_MEMNUM * (tlen - i - 1));
2697 break; /* success */
2698 }
2699 if (i == tlen) goto fail;
2700 MOP_OUT;
2701 JUMP;
2702 }
2703
2704#ifdef USE_BACKREF_WITH_LEVEL
2705 CASE(OP_BACKREF_WITH_LEVEL)
2706 {
2707 int len;
2708 OnigOptionType ic;
2709 LengthType level;
2710
2711 GET_OPTION_INC(ic, p);
2712 GET_LENGTH_INC(level, p);
2713 GET_LENGTH_INC(tlen, p);
2714
2715 sprev = s;
2716 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
2717 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2718 while (sprev + (len = enclen(encode, sprev, end)) < s)
2719 sprev += len;
2720
2721 p += (SIZE_MEMNUM * tlen);
2722 }
2723 else
2724 goto fail;
2725
2726 MOP_OUT;
2727 JUMP;
2728 }
2729
2730#endif
2731
2732#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2733 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
2734 GET_OPTION_INC(option, p);
2735 STACK_PUSH_ALT(p, s, sprev, pkeep);
2736 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2737 MOP_OUT;
2738 JUMP;
2739
2740 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
2741 GET_OPTION_INC(option, p);
2742 MOP_OUT;
2743 JUMP;
2744#endif
2745
2746 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
2747 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2748 STACK_PUSH_NULL_CHECK_START(mem, s);
2749 MOP_OUT;
2750 JUMP;
2751
2752 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
2753 {
2754 int isnull;
2755
2756 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2757 STACK_NULL_CHECK(isnull, mem, s);
2758 if (isnull) {
2759#ifdef ONIG_DEBUG_MATCH
2760 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
2761 (int )mem, (uintptr_t )s, s);
2762#endif
2763 null_check_found:
2764 /* empty loop founded, skip next instruction */
2765 switch (*p++) {
2766 case OP_JUMP:
2767 case OP_PUSH:
2768 p += SIZE_RELADDR;
2769 break;
2770 case OP_REPEAT_INC:
2771 case OP_REPEAT_INC_NG:
2772 case OP_REPEAT_INC_SG:
2773 case OP_REPEAT_INC_NG_SG:
2774 p += SIZE_MEMNUM;
2775 break;
2776 default:
2777 goto unexpected_bytecode_error;
2778 break;
2779 }
2780 }
2781 }
2782 MOP_OUT;
2783 JUMP;
2784
2785#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2786 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
2787 {
2788 int isnull;
2789
2790 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2791 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2792 if (isnull) {
2793# ifdef ONIG_DEBUG_MATCH
2794 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
2795 (int )mem, (uintptr_t )s, s);
2796# endif
2797 if (isnull == -1) goto fail;
2798 goto null_check_found;
2799 }
2800 }
2801 MOP_OUT;
2802 JUMP;
2803#endif
2804
2805#ifdef USE_SUBEXP_CALL
2806 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
2807 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2808 {
2809 int isnull;
2810
2811 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2812# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2813 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2814# else
2815 STACK_NULL_CHECK_REC(isnull, mem, s);
2816# endif
2817 if (isnull) {
2818# ifdef ONIG_DEBUG_MATCH
2819 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
2820 (int )mem, (uintptr_t )s, s);
2821# endif
2822 if (isnull == -1) goto fail;
2823 goto null_check_found;
2824 }
2825 else {
2826 STACK_PUSH_NULL_CHECK_END(mem);
2827 }
2828 }
2829 MOP_OUT;
2830 JUMP;
2831#endif
2832
2833 CASE(OP_JUMP) MOP_IN(OP_JUMP);
2834 GET_RELADDR_INC(addr, p);
2835 p += addr;
2836 MOP_OUT;
2837 CHECK_INTERRUPT_IN_MATCH_AT;
2838 JUMP;
2839
2840 CASE(OP_PUSH) MOP_IN(OP_PUSH);
2841 GET_RELADDR_INC(addr, p);
2842 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2843 MOP_OUT;
2844 JUMP;
2845
2846#ifdef USE_COMBINATION_EXPLOSION_CHECK
2847 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
2848 GET_STATE_CHECK_NUM_INC(mem, p);
2849 STATE_CHECK_VAL(scv, mem);
2850 if (scv) goto fail;
2851
2852 GET_RELADDR_INC(addr, p);
2853 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2854 MOP_OUT;
2855 JUMP;
2856
2857 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2858 GET_STATE_CHECK_NUM_INC(mem, p);
2859 GET_RELADDR_INC(addr, p);
2860 STATE_CHECK_VAL(scv, mem);
2861 if (scv) {
2862 p += addr;
2863 }
2864 else {
2865 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2866 }
2867 MOP_OUT;
2868 JUMP;
2869
2870 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
2871 GET_STATE_CHECK_NUM_INC(mem, p);
2872 STATE_CHECK_VAL(scv, mem);
2873 if (scv) goto fail;
2874
2875 STACK_PUSH_STATE_CHECK(s, mem);
2876 MOP_OUT;
2877 JUMP;
2878#endif /* USE_COMBINATION_EXPLOSION_CHECK */
2879
2880 CASE(OP_POP) MOP_IN(OP_POP);
2881 STACK_POP_ONE;
2882 MOP_OUT;
2883 JUMP;
2884
2885#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2886 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2887 GET_RELADDR_INC(addr, p);
2888 if (*p == *s && DATA_ENSURE_CHECK1) {
2889 p++;
2890 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2891 MOP_OUT;
2892 JUMP;
2893 }
2894 p += (addr + 1);
2895 MOP_OUT;
2896 JUMP;
2897#endif
2898
2899 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2900 GET_RELADDR_INC(addr, p);
2901 if (*p == *s) {
2902 p++;
2903 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2904 MOP_OUT;
2905 JUMP;
2906 }
2907 p++;
2908 MOP_OUT;
2909 JUMP;
2910
2911 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
2912 {
2913 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2914 GET_RELADDR_INC(addr, p);
2915
2916 STACK_ENSURE(1);
2917 repeat_stk[mem] = GET_STACK_INDEX(stk);
2918 STACK_PUSH_REPEAT(mem, p);
2919
2920 if (reg->repeat_range[mem].lower == 0) {
2921 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2922 }
2923 }
2924 MOP_OUT;
2925 JUMP;
2926
2927 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
2928 {
2929 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2930 GET_RELADDR_INC(addr, p);
2931
2932 STACK_ENSURE(1);
2933 repeat_stk[mem] = GET_STACK_INDEX(stk);
2934 STACK_PUSH_REPEAT(mem, p);
2935
2936 if (reg->repeat_range[mem].lower == 0) {
2937 STACK_PUSH_ALT(p, s, sprev, pkeep);
2938 p += addr;
2939 }
2940 }
2941 MOP_OUT;
2942 JUMP;
2943
2944 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
2945 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2946 si = repeat_stk[mem];
2947 stkp = STACK_AT(si);
2948
2949 repeat_inc:
2950 stkp->u.repeat.count++;
2951 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2952 /* end of repeat. Nothing to do. */
2953 }
2954 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2955 STACK_PUSH_ALT(p, s, sprev, pkeep);
2956 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2957 }
2958 else {
2959 p = stkp->u.repeat.pcode;
2960 }
2961 STACK_PUSH_REPEAT_INC(si);
2962 MOP_OUT;
2963 CHECK_INTERRUPT_IN_MATCH_AT;
2964 JUMP;
2965
2966 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
2967 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2968 STACK_GET_REPEAT(mem, stkp);
2969 si = GET_STACK_INDEX(stkp);
2970 goto repeat_inc;
2971 NEXT;
2972
2973 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
2974 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2975 si = repeat_stk[mem];
2976 stkp = STACK_AT(si);
2977
2978 repeat_inc_ng:
2979 stkp->u.repeat.count++;
2980 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2981 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2982 UChar* pcode = stkp->u.repeat.pcode;
2983
2984 STACK_PUSH_REPEAT_INC(si);
2985 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
2986 }
2987 else {
2988 p = stkp->u.repeat.pcode;
2989 STACK_PUSH_REPEAT_INC(si);
2990 }
2991 }
2992 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2993 STACK_PUSH_REPEAT_INC(si);
2994 }
2995 MOP_OUT;
2996 CHECK_INTERRUPT_IN_MATCH_AT;
2997 JUMP;
2998
2999 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3000 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3001 STACK_GET_REPEAT(mem, stkp);
3002 si = GET_STACK_INDEX(stkp);
3003 goto repeat_inc_ng;
3004 NEXT;
3005
3006 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3007 STACK_PUSH_POS(s, sprev, pkeep);
3008 MOP_OUT;
3009 JUMP;
3010
3011 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3012 {
3013 STACK_POS_END(stkp);
3014 s = stkp->u.state.pstr;
3015 sprev = stkp->u.state.pstr_prev;
3016 }
3017 MOP_OUT;
3018 JUMP;
3019
3020 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3021 GET_RELADDR_INC(addr, p);
3022 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3023 MOP_OUT;
3024 JUMP;
3025
3026 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3027 STACK_POP_TIL_POS_NOT;
3028 goto fail;
3029 NEXT;
3030
3031 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3032 STACK_PUSH_STOP_BT;
3033 MOP_OUT;
3034 JUMP;
3035
3036 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3037 STACK_STOP_BT_END;
3038 MOP_OUT;
3039 JUMP;
3040
3041 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3042 GET_LENGTH_INC(tlen, p);
3043 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3044 if (IS_NULL(s)) goto fail;
3045 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3046 MOP_OUT;
3047 JUMP;
3048
3049 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3050 GET_RELADDR_INC(addr, p);
3051 GET_LENGTH_INC(tlen, p);
3052 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3053 if (IS_NULL(q)) {
3054 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3055 If you want to change to fail, replace following line. */
3056 p += addr;
3057 /* goto fail; */
3058 }
3059 else {
3060 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3061 s = q;
3062 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3063 }
3064 MOP_OUT;
3065 JUMP;
3066
3067 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
3068 STACK_POP_TIL_LOOK_BEHIND_NOT;
3069 goto fail;
3070 NEXT;
3071
3072 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
3073 /* Save the absent-start-pos and the original end-pos. */
3074 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
3075 MOP_OUT;
3076 JUMP;
3077
3078 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
3079 {
3080 const UChar* aend = ABSENT_END_POS;
3081 UChar* absent;
3082 UChar* selfp = p - 1;
3083
3084 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
3085 GET_RELADDR_INC(addr, p);
3086#ifdef ONIG_DEBUG_MATCH
3087 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3088#endif
3089 if ((absent > aend) && (s > absent)) {
3090 /* An empty match occurred in (?~...) at the start point.
3091 * Never match. */
3092 STACK_POP;
3093 goto fail;
3094 }
3095 else if ((s >= aend) && (s > absent)) {
3096 if (s > aend) {
3097 /* Only one (or less) character matched in the last iteration.
3098 * This is not a possible point. */
3099 goto fail;
3100 }
3101 /* All possible points were found. Try matching after (?~...). */
3102 DATA_ENSURE(0);
3103 p += addr;
3104 }
3105 else {
3106 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
3107 n = enclen(encode, s, end);
3108 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
3109 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
3110 STACK_PUSH_ABSENT;
3111 ABSENT_END_POS = aend;
3112 }
3113 }
3114 MOP_OUT;
3115 JUMP;
3116
3117 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
3118 /* The pattern inside (?~...) was matched.
3119 * Set the end-pos temporary and go to next iteration. */
3120 if (sprev < ABSENT_END_POS)
3121 ABSENT_END_POS = sprev;
3122#ifdef ONIG_DEBUG_MATCH
3123 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
3124#endif
3125 STACK_POP_TIL_ABSENT;
3126 goto fail;
3127 NEXT;
3128
3129#ifdef USE_SUBEXP_CALL
3130 CASE(OP_CALL) MOP_IN(OP_CALL);
3131 GET_ABSADDR_INC(addr, p);
3132 STACK_PUSH_CALL_FRAME(p);
3133 p = reg->p + addr;
3134 MOP_OUT;
3135 JUMP;
3136
3137 CASE(OP_RETURN) MOP_IN(OP_RETURN);
3138 STACK_RETURN(p);
3139 STACK_PUSH_RETURN;
3140 MOP_OUT;
3141 JUMP;
3142#endif
3143
3144 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
3145 GET_MEMNUM_INC(mem, p);
3146 GET_RELADDR_INC(addr, p);
3147 if ((mem > num_mem) ||
3148 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3149 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3150 p += addr;
3151 }
3152 MOP_OUT;
3153 JUMP;
3154
3155 CASE(OP_FINISH)
3156 goto finish;
3157 NEXT;
3158
3159 CASE(OP_FAIL)
3160 if (0) {
3161 /* fall */
3162 fail:
3163 MOP_OUT;
3164 }
3165 MOP_IN(OP_FAIL);
3166 STACK_POP;
3167 p = stk->u.state.pcode;
3168 s = stk->u.state.pstr;
3169 sprev = stk->u.state.pstr_prev;
3170 pkeep = stk->u.state.pkeep;
3171
3172#ifdef USE_COMBINATION_EXPLOSION_CHECK
3173 if (stk->u.state.state_check != 0) {
3174 stk->type = STK_STATE_CHECK_MARK;
3175 stk++;
3176 }
3177#endif
3178
3179 MOP_OUT;
3180 JUMP;
3181
3182 DEFAULT
3183 goto bytecode_error;
3184 } VM_LOOP_END
3185
3186 finish:
3187 STACK_SAVE;
3188 if (xmalloc_base) xfree(xmalloc_base);
3189 return best_len;
3190
3191#ifdef ONIG_DEBUG
3192 stack_error:
3193 STACK_SAVE;
3194 if (xmalloc_base) xfree(xmalloc_base);
3195 return ONIGERR_STACK_BUG;
3196#endif
3197
3198 bytecode_error:
3199 STACK_SAVE;
3200 if (xmalloc_base) xfree(xmalloc_base);
3201 return ONIGERR_UNDEFINED_BYTECODE;
3202
3203 unexpected_bytecode_error:
3204 STACK_SAVE;
3205 if (xmalloc_base) xfree(xmalloc_base);
3206 return ONIGERR_UNEXPECTED_BYTECODE;
3207}
3208
3209
3210static UChar*
3211slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
3212 const UChar* text, const UChar* text_end, UChar* text_range)
3213{
3214 UChar *t, *p, *s, *end;
3215
3216 end = (UChar* )text_end;
3217 end -= target_end - target - 1;
3218 if (end > text_range)
3219 end = text_range;
3220
3221 s = (UChar* )text;
3222
3223 if (enc->max_enc_len == enc->min_enc_len) {
3224 int n = enc->max_enc_len;
3225
3226 while (s < end) {
3227 if (*s == *target) {
3228 p = s + 1;
3229 t = target + 1;
3230 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3231 return s;
3232 }
3233 s += n;
3234 }
3235 return (UChar* )NULL;
3236 }
3237 while (s < end) {
3238 if (*s == *target) {
3239 p = s + 1;
3240 t = target + 1;
3241 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3242 return s;
3243 }
3244 s += enclen(enc, s, text_end);
3245 }
3246
3247 return (UChar* )NULL;
3248}
3249
3250static int
3251str_lower_case_match(OnigEncoding enc, int case_fold_flag,
3252 const UChar* t, const UChar* tend,
3253 const UChar* p, const UChar* end)
3254{
3255 int lowlen;
3256 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3257
3258 while (t < tend) {
3259 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
3260 q = lowbuf;
3261 while (lowlen > 0) {
3262 if (*t++ != *q++) return 0;
3263 lowlen--;
3264 }
3265 }
3266
3267 return 1;
3268}
3269
3270static UChar*
3271slow_search_ic(OnigEncoding enc, int case_fold_flag,
3272 UChar* target, UChar* target_end,
3273 const UChar* text, const UChar* text_end, UChar* text_range)
3274{
3275 UChar *s, *end;
3276
3277 end = (UChar* )text_end;
3278 end -= target_end - target - 1;
3279 if (end > text_range)
3280 end = text_range;
3281
3282 s = (UChar* )text;
3283
3284 while (s < end) {
3285 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3286 s, text_end))
3287 return s;
3288
3289 s += enclen(enc, s, text_end);
3290 }
3291
3292 return (UChar* )NULL;
3293}
3294
3295static UChar*
3296slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
3297 const UChar* text, const UChar* adjust_text,
3298 const UChar* text_end, const UChar* text_start)
3299{
3300 UChar *t, *p, *s;
3301
3302 s = (UChar* )text_end;
3303 s -= (target_end - target);
3304 if (s > text_start)
3305 s = (UChar* )text_start;
3306 else
3307 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3308
3309 while (s >= text) {
3310 if (*s == *target) {
3311 p = s + 1;
3312 t = target + 1;
3313 while (t < target_end) {
3314 if (*t != *p++)
3315 break;
3316 t++;
3317 }
3318 if (t == target_end)
3319 return s;
3320 }
3321 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3322 }
3323
3324 return (UChar* )NULL;
3325}
3326
3327static UChar*
3328slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
3329 UChar* target, UChar* target_end,
3330 const UChar* text, const UChar* adjust_text,
3331 const UChar* text_end, const UChar* text_start)
3332{
3333 UChar *s;
3334
3335 s = (UChar* )text_end;
3336 s -= (target_end - target);
3337 if (s > text_start)
3338 s = (UChar* )text_start;
3339 else
3340 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3341
3342 while (s >= text) {
3343 if (str_lower_case_match(enc, case_fold_flag,
3344 target, target_end, s, text_end))
3345 return s;
3346
3347 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3348 }
3349
3350 return (UChar* )NULL;
3351}
3352
3353#ifndef USE_SUNDAY_QUICK_SEARCH
3354/* Boyer-Moore-Horspool search applied to a multibyte string */
3355static UChar*
3356bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3357 const UChar* text, const UChar* text_end,
3358 const UChar* text_range)
3359{
3360 const UChar *s, *se, *t, *p, *end;
3361 const UChar *tail;
3362 ptrdiff_t skip, tlen1;
3363
3364# ifdef ONIG_DEBUG_SEARCH
3365 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3366 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3367# endif
3368
3369 tail = target_end - 1;
3370 tlen1 = tail - target;
3371 end = text_range;
3372 if (end + tlen1 > text_end)
3373 end = text_end - tlen1;
3374
3375 s = text;
3376
3377 if (IS_NULL(reg->int_map)) {
3378 while (s < end) {
3379 p = se = s + tlen1;
3380 t = tail;
3381 while (*p == *t) {
3382 if (t == target) return (UChar* )s;
3383 p--; t--;
3384 }
3385 skip = reg->map[*se];
3386 t = s;
3387 do {
3388 s += enclen(reg->enc, s, end);
3389 } while ((s - t) < skip && s < end);
3390 }
3391 }
3392 else {
3393# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3394 while (s < end) {
3395 p = se = s + tlen1;
3396 t = tail;
3397 while (*p == *t) {
3398 if (t == target) return (UChar* )s;
3399 p--; t--;
3400 }
3401 skip = reg->int_map[*se];
3402 t = s;
3403 do {
3404 s += enclen(reg->enc, s, end);
3405 } while ((s - t) < skip && s < end);
3406 }
3407# endif
3408 }
3409
3410 return (UChar* )NULL;
3411}
3412
3413/* Boyer-Moore-Horspool search */
3414static UChar*
3415bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3416 const UChar* text, const UChar* text_end, const UChar* text_range)
3417{
3418 const UChar *s, *t, *p, *end;
3419 const UChar *tail;
3420
3421# ifdef ONIG_DEBUG_SEARCH
3422 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3423 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3424# endif
3425
3426 end = text_range + (target_end - target) - 1;
3427 if (end > text_end)
3428 end = text_end;
3429
3430 tail = target_end - 1;
3431 s = text + (target_end - target) - 1;
3432 if (IS_NULL(reg->int_map)) {
3433 while (s < end) {
3434 p = s;
3435 t = tail;
3436# ifdef ONIG_DEBUG_SEARCH
3437 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
3438 (intptr_t )(s - text), s);
3439# endif
3440 while (*p == *t) {
3441 if (t == target) return (UChar* )p;
3442 p--; t--;
3443 }
3444 s += reg->map[*s];
3445 }
3446 }
3447 else { /* see int_map[] */
3448# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3449 while (s < end) {
3450 p = s;
3451 t = tail;
3452 while (*p == *t) {
3453 if (t == target) return (UChar* )p;
3454 p--; t--;
3455 }
3456 s += reg->int_map[*s];
3457 }
3458# endif
3459 }
3460 return (UChar* )NULL;
3461}
3462
3463/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
3464static UChar*
3465bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3466 const UChar* text, const UChar* text_end,
3467 const UChar* text_range)
3468{
3469 const UChar *s, *se, *t, *end;
3470 const UChar *tail;
3471 ptrdiff_t skip, tlen1;
3472 OnigEncoding enc = reg->enc;
3473 int case_fold_flag = reg->case_fold_flag;
3474
3475# ifdef ONIG_DEBUG_SEARCH
3476 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3477 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3478# endif
3479
3480 tail = target_end - 1;
3481 tlen1 = tail - target;
3482 end = text_range;
3483 if (end + tlen1 > text_end)
3484 end = text_end - tlen1;
3485
3486 s = text;
3487
3488 if (IS_NULL(reg->int_map)) {
3489 while (s < end) {
3490 se = s + tlen1;
3491 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3492 s, se + 1))
3493 return (UChar* )s;
3494 skip = reg->map[*se];
3495 t = s;
3496 do {
3497 s += enclen(reg->enc, s, end);
3498 } while ((s - t) < skip && s < end);
3499 }
3500 }
3501 else {
3502# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3503 while (s < end) {
3504 se = s + tlen1;
3505 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3506 s, se + 1))
3507 return (UChar* )s;
3508 skip = reg->int_map[*se];
3509 t = s;
3510 do {
3511 s += enclen(reg->enc, s, end);
3512 } while ((s - t) < skip && s < end);
3513 }
3514# endif
3515 }
3516
3517 return (UChar* )NULL;
3518}
3519
3520/* Boyer-Moore-Horspool search (ignore case) */
3521static UChar*
3522bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3523 const UChar* text, const UChar* text_end, const UChar* text_range)
3524{
3525 const UChar *s, *p, *end;
3526 const UChar *tail;
3527 OnigEncoding enc = reg->enc;
3528 int case_fold_flag = reg->case_fold_flag;
3529
3530# ifdef ONIG_DEBUG_SEARCH
3531 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3532 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3533# endif
3534
3535 end = text_range + (target_end - target) - 1;
3536 if (end > text_end)
3537 end = text_end;
3538
3539 tail = target_end - 1;
3540 s = text + (target_end - target) - 1;
3541 if (IS_NULL(reg->int_map)) {
3542 while (s < end) {
3543 p = s - (target_end - target) + 1;
3544 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3545 p, s + 1))
3546 return (UChar* )p;
3547 s += reg->map[*s];
3548 }
3549 }
3550 else { /* see int_map[] */
3551# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3552 while (s < end) {
3553 p = s - (target_end - target) + 1;
3554 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3555 p, s + 1))
3556 return (UChar* )p;
3557 s += reg->int_map[*s];
3558 }
3559# endif
3560 }
3561 return (UChar* )NULL;
3562}
3563
3564#else /* USE_SUNDAY_QUICK_SEARCH */
3565
3566/* Sunday's quick search applied to a multibyte string */
3567static UChar*
3568bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3569 const UChar* text, const UChar* text_end,
3570 const UChar* text_range)
3571{
3572 const UChar *s, *se, *t, *p, *end;
3573 const UChar *tail;
3574 ptrdiff_t skip, tlen1;
3575 OnigEncoding enc = reg->enc;
3576
3577# ifdef ONIG_DEBUG_SEARCH
3578 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3579 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3580# endif
3581
3582 tail = target_end - 1;
3583 tlen1 = tail - target;
3584 end = text_range;
3585 if (end + tlen1 > text_end)
3586 end = text_end - tlen1;
3587
3588 s = text;
3589
3590 if (IS_NULL(reg->int_map)) {
3591 while (s < end) {
3592 p = se = s + tlen1;
3593 t = tail;
3594 while (*p == *t) {
3595 if (t == target) return (UChar* )s;
3596 p--; t--;
3597 }
3598 if (s + 1 >= end) break;
3599 skip = reg->map[se[1]];
3600 t = s;
3601 do {
3602 s += enclen(enc, s, end);
3603 } while ((s - t) < skip && s < end);
3604 }
3605 }
3606 else {
3607# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3608 while (s < end) {
3609 p = se = s + tlen1;
3610 t = tail;
3611 while (*p == *t) {
3612 if (t == target) return (UChar* )s;
3613 p--; t--;
3614 }
3615 if (s + 1 >= end) break;
3616 skip = reg->int_map[se[1]];
3617 t = s;
3618 do {
3619 s += enclen(enc, s, end);
3620 } while ((s - t) < skip && s < end);
3621 }
3622# endif
3623 }
3624
3625 return (UChar* )NULL;
3626}
3627
3628/* Sunday's quick search */
3629static UChar*
3630bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3631 const UChar* text, const UChar* text_end, const UChar* text_range)
3632{
3633 const UChar *s, *t, *p, *end;
3634 const UChar *tail;
3635 ptrdiff_t tlen1;
3636
3637# ifdef ONIG_DEBUG_SEARCH
3638 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3639 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3640# endif
3641
3642 tail = target_end - 1;
3643 tlen1 = tail - target;
3644 end = text_range + tlen1;
3645 if (end > text_end)
3646 end = text_end;
3647
3648 s = text + tlen1;
3649 if (IS_NULL(reg->int_map)) {
3650 while (s < end) {
3651 p = s;
3652 t = tail;
3653 while (*p == *t) {
3654 if (t == target) return (UChar* )p;
3655 p--; t--;
3656 }
3657 if (s + 1 >= end) break;
3658 s += reg->map[s[1]];
3659 }
3660 }
3661 else { /* see int_map[] */
3662# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3663 while (s < end) {
3664 p = s;
3665 t = tail;
3666 while (*p == *t) {
3667 if (t == target) return (UChar* )p;
3668 p--; t--;
3669 }
3670 if (s + 1 >= end) break;
3671 s += reg->int_map[s[1]];
3672 }
3673# endif
3674 }
3675 return (UChar* )NULL;
3676}
3677
3678/* Sunday's quick search applied to a multibyte string (ignore case) */
3679static UChar*
3680bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3681 const UChar* text, const UChar* text_end,
3682 const UChar* text_range)
3683{
3684 const UChar *s, *se, *t, *end;
3685 const UChar *tail;
3686 ptrdiff_t skip, tlen1;
3687 OnigEncoding enc = reg->enc;
3688 int case_fold_flag = reg->case_fold_flag;
3689
3690# ifdef ONIG_DEBUG_SEARCH
3691 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3692 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3693# endif
3694
3695 tail = target_end - 1;
3696 tlen1 = tail - target;
3697 end = text_range;
3698 if (end + tlen1 > text_end)
3699 end = text_end - tlen1;
3700
3701 s = text;
3702
3703 if (IS_NULL(reg->int_map)) {
3704 while (s < end) {
3705 se = s + tlen1;
3706 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3707 s, se + 1))
3708 return (UChar* )s;
3709 if (s + 1 >= end) break;
3710 skip = reg->map[se[1]];
3711 t = s;
3712 do {
3713 s += enclen(enc, s, end);
3714 } while ((s - t) < skip && s < end);
3715 }
3716 }
3717 else {
3718# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3719 while (s < end) {
3720 se = s + tlen1;
3721 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3722 s, se + 1))
3723 return (UChar* )s;
3724 if (s + 1 >= end) break;
3725 skip = reg->int_map[se[1]];
3726 t = s;
3727 do {
3728 s += enclen(enc, s, end);
3729 } while ((s - t) < skip && s < end);
3730 }
3731# endif
3732 }
3733
3734 return (UChar* )NULL;
3735}
3736
3737/* Sunday's quick search (ignore case) */
3738static UChar*
3739bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3740 const UChar* text, const UChar* text_end, const UChar* text_range)
3741{
3742 const UChar *s, *p, *end;
3743 const UChar *tail;
3744 ptrdiff_t tlen1;
3745 OnigEncoding enc = reg->enc;
3746 int case_fold_flag = reg->case_fold_flag;
3747
3748# ifdef ONIG_DEBUG_SEARCH
3749 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3750 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3751# endif
3752
3753 tail = target_end - 1;
3754 tlen1 = tail - target;
3755 end = text_range + tlen1;
3756 if (end > text_end)
3757 end = text_end;
3758
3759 s = text + tlen1;
3760 if (IS_NULL(reg->int_map)) {
3761 while (s < end) {
3762 p = s - tlen1;
3763 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3764 p, s + 1))
3765 return (UChar* )p;
3766 if (s + 1 >= end) break;
3767 s += reg->map[s[1]];
3768 }
3769 }
3770 else { /* see int_map[] */
3771# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3772 while (s < end) {
3773 p = s - tlen1;
3774 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3775 p, s + 1))
3776 return (UChar* )p;
3777 if (s + 1 >= end) break;
3778 s += reg->int_map[s[1]];
3779 }
3780# endif
3781 }
3782 return (UChar* )NULL;
3783}
3784#endif /* USE_SUNDAY_QUICK_SEARCH */
3785
3786#ifdef USE_INT_MAP_BACKWARD
3787static int
3788set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3789 int** skip)
3790{
3791 int i, len;
3792
3793 if (IS_NULL(*skip)) {
3794 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3795 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
3796 }
3797
3798 len = (int )(end - s);
3799 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3800 (*skip)[i] = len;
3801
3802 for (i = len - 1; i > 0; i--)
3803 (*skip)[s[i]] = i;
3804
3805 return 0;
3806}
3807
3808static UChar*
3809bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3810 const UChar* text, const UChar* adjust_text,
3811 const UChar* text_end, const UChar* text_start)
3812{
3813 const UChar *s, *t, *p;
3814
3815 s = text_end - (target_end - target);
3816 if (text_start < s)
3817 s = text_start;
3818 else
3819 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3820
3821 while (s >= text) {
3822 p = s;
3823 t = target;
3824 while (t < target_end && *p == *t) {
3825 p++; t++;
3826 }
3827 if (t == target_end)
3828 return (UChar* )s;
3829
3830 s -= reg->int_map_backward[*s];
3831 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3832 }
3833
3834 return (UChar* )NULL;
3835}
3836#endif
3837
3838static UChar*
3839map_search(OnigEncoding enc, UChar map[],
3840 const UChar* text, const UChar* text_range, const UChar* text_end)
3841{
3842 const UChar *s = text;
3843
3844 while (s < text_range) {
3845 if (map[*s]) return (UChar* )s;
3846
3847 s += enclen(enc, s, text_end);
3848 }
3849 return (UChar* )NULL;
3850}
3851
3852static UChar*
3853map_search_backward(OnigEncoding enc, UChar map[],
3854 const UChar* text, const UChar* adjust_text,
3855 const UChar* text_start, const UChar* text_end)
3856{
3857 const UChar *s = text_start;
3858
3859 while (s >= text) {
3860 if (map[*s]) return (UChar* )s;
3861
3862 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3863 }
3864 return (UChar* )NULL;
3865}
3866
3867extern OnigPosition
3868onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3869 OnigOptionType option)
3870{
3871 ptrdiff_t r;
3872 UChar *prev;
3873 OnigMatchArg msa;
3874
3875 MATCH_ARG_INIT(msa, option, region, at, at);
3876#ifdef USE_COMBINATION_EXPLOSION_CHECK
3877 {
3878 ptrdiff_t offset = at - str;
3879 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3880 }
3881#endif
3882
3883 if (region) {
3884 r = onig_region_resize_clear(region, reg->num_mem + 1);
3885 }
3886 else
3887 r = 0;
3888
3889 if (r == 0) {
3890 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
3891 r = match_at(reg, str, end,
3892#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3893 end,
3894#endif
3895 at, prev, &msa);
3896 }
3897
3898 MATCH_ARG_FREE(msa);
3899 return r;
3900}
3901
3902static int
3903forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3904 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3905{
3906 UChar *p, *pprev = (UChar* )NULL;
3907
3908#ifdef ONIG_DEBUG_SEARCH
3909 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
3910 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
3911#endif
3912
3913 p = s;
3914 if (reg->dmin > 0) {
3915 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3916 p += reg->dmin;
3917 }
3918 else {
3919 UChar *q = p + reg->dmin;
3920
3921 if (q >= end) return 0; /* fail */
3922 while (p < q) p += enclen(reg->enc, p, end);
3923 }
3924 }
3925
3926 retry:
3927 switch (reg->optimize) {
3928 case ONIG_OPTIMIZE_EXACT:
3929 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3930 break;
3931 case ONIG_OPTIMIZE_EXACT_IC:
3932 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3933 reg->exact, reg->exact_end, p, end, range);
3934 break;
3935
3936 case ONIG_OPTIMIZE_EXACT_BM:
3937 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3938 break;
3939
3940 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3941 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3942 break;
3943
3944 case ONIG_OPTIMIZE_EXACT_BM_IC:
3945 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
3946 break;
3947
3948 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
3949 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
3950 break;
3951
3952 case ONIG_OPTIMIZE_MAP:
3953 p = map_search(reg->enc, reg->map, p, range, end);
3954 break;
3955 }
3956
3957 if (p && p < range) {
3958 if (p - reg->dmin < s) {
3959 retry_gate:
3960 pprev = p;
3961 p += enclen(reg->enc, p, end);
3962 goto retry;
3963 }
3964
3965 if (reg->sub_anchor) {
3966 UChar* prev;
3967
3968 switch (reg->sub_anchor) {
3969 case ANCHOR_BEGIN_LINE:
3970 if (!ON_STR_BEGIN(p)) {
3971 prev = onigenc_get_prev_char_head(reg->enc,
3972 (pprev ? pprev : str), p, end);
3973 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
3974 goto retry_gate;
3975 }
3976 break;
3977
3978 case ANCHOR_END_LINE:
3979 if (ON_STR_END(p)) {
3980#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3981 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3982 (pprev ? pprev : str), p);
3983 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
3984 goto retry_gate;
3985#endif
3986 }
3987 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
3988 goto retry_gate;
3989 break;
3990 }
3991 }
3992
3993 if (reg->dmax == 0) {
3994 *low = p;
3995 if (low_prev) {
3996 if (*low > s)
3997 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
3998 else
3999 *low_prev = onigenc_get_prev_char_head(reg->enc,
4000 (pprev ? pprev : str), p, end);
4001 }
4002 }
4003 else {
4004 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4005 if (p < str + reg->dmax) {
4006 *low = (UChar* )str;
4007 if (low_prev)
4008 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4009 }
4010 else {
4011 *low = p - reg->dmax;
4012 if (*low > s) {
4013 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4014 *low, end, (const UChar** )low_prev);
4015 if (low_prev && IS_NULL(*low_prev))
4016 *low_prev = onigenc_get_prev_char_head(reg->enc,
4017 (pprev ? pprev : s), *low, end);
4018 }
4019 else {
4020 if (low_prev)
4021 *low_prev = onigenc_get_prev_char_head(reg->enc,
4022 (pprev ? pprev : str), *low, end);
4023 }
4024 }
4025 }
4026 }
4027 /* no needs to adjust *high, *high is used as range check only */
4028 *high = p - reg->dmin;
4029
4030#ifdef ONIG_DEBUG_SEARCH
4031 fprintf(stderr,
4032 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
4033 *low - str, *high - str, reg->dmin, reg->dmax);
4034#endif
4035 return 1; /* success */
4036 }
4037
4038 return 0; /* fail */
4039}
4040
4041#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4042
4043static int
4044backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4045 UChar* s, const UChar* range, UChar* adjrange,
4046 UChar** low, UChar** high)
4047{
4048 UChar *p;
4049
4050 range += reg->dmin;
4051 p = s;
4052
4053 retry:
4054 switch (reg->optimize) {
4055 case ONIG_OPTIMIZE_EXACT:
4056 exact_method:
4057 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4058 range, adjrange, end, p);
4059 break;
4060
4061 case ONIG_OPTIMIZE_EXACT_IC:
4062 case ONIG_OPTIMIZE_EXACT_BM_IC:
4063 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4064 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4065 reg->exact, reg->exact_end,
4066 range, adjrange, end, p);
4067 break;
4068
4069 case ONIG_OPTIMIZE_EXACT_BM:
4070 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4071#ifdef USE_INT_MAP_BACKWARD
4072 if (IS_NULL(reg->int_map_backward)) {
4073 int r;
4074 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4075 goto exact_method;
4076
4077 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4078 &(reg->int_map_backward));
4079 if (r) return r;
4080 }
4081 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4082 end, p);
4083#else
4084 goto exact_method;
4085#endif
4086 break;
4087
4088 case ONIG_OPTIMIZE_MAP:
4089 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4090 break;
4091 }
4092
4093 if (p) {
4094 if (reg->sub_anchor) {
4095 UChar* prev;
4096
4097 switch (reg->sub_anchor) {
4098 case ANCHOR_BEGIN_LINE:
4099 if (!ON_STR_BEGIN(p)) {
4100 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4101 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4102 p = prev;
4103 goto retry;
4104 }
4105 }
4106 break;
4107
4108 case ANCHOR_END_LINE:
4109 if (ON_STR_END(p)) {
4110#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4111 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4112 if (IS_NULL(prev)) goto fail;
4113 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4114 p = prev;
4115 goto retry;
4116 }
4117#endif
4118 }
4119 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4120 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4121 if (IS_NULL(p)) goto fail;
4122 goto retry;
4123 }
4124 break;
4125 }
4126 }
4127
4128 /* no needs to adjust *high, *high is used as range check only */
4129 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4130 *low = p - reg->dmax;
4131 *high = p - reg->dmin;
4132 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4133 }
4134
4135#ifdef ONIG_DEBUG_SEARCH
4136 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4137 (int )(*low - str), (int )(*high - str));
4138#endif
4139 return 1; /* success */
4140 }
4141
4142 fail:
4143#ifdef ONIG_DEBUG_SEARCH
4144 fprintf(stderr, "backward_search_range: fail.\n");
4145#endif
4146 return 0; /* fail */
4147}
4148
4149
4150extern OnigPosition
4151onig_search(regex_t* reg, const UChar* str, const UChar* end,
4152 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4153{
4154 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4155}
4156
4157extern OnigPosition
4158onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
4159 const UChar* global_pos,
4160 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4161{
4162 ptrdiff_t r;
4163 UChar *s, *prev;
4164 OnigMatchArg msa;
4165#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4166 const UChar *orig_start = start;
4167 const UChar *orig_range = range;
4168#endif
4169
4170#ifdef ONIG_DEBUG_SEARCH
4171 fprintf(stderr,
4172 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
4173 (uintptr_t )str, str, end - str, start - str, range - str);
4174#endif
4175
4176 if (region) {
4177 r = onig_region_resize_clear(region, reg->num_mem + 1);
4178 if (r) goto finish_no_msa;
4179 }
4180
4181 if (start > end || start < str) goto mismatch_no_msa;
4182
4183
4184#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4185# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4186# define MATCH_AND_RETURN_CHECK(upper_range) \
4187 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4188 if (r != ONIG_MISMATCH) {\
4189 if (r >= 0) {\
4190 if (! IS_FIND_LONGEST(reg->options)) {\
4191 goto match;\
4192 }\
4193 }\
4194 else goto finish; /* error */ \
4195 }
4196# else
4197# define MATCH_AND_RETURN_CHECK(upper_range) \
4198 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4199 if (r != ONIG_MISMATCH) {\
4200 if (r >= 0) {\
4201 goto match;\
4202 }\
4203 else goto finish; /* error */ \
4204 }
4205# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4206#else
4207# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4208# define MATCH_AND_RETURN_CHECK(none) \
4209 r = match_at(reg, str, end, s, prev, &msa);\
4210 if (r != ONIG_MISMATCH) {\
4211 if (r >= 0) {\
4212 if (! IS_FIND_LONGEST(reg->options)) {\
4213 goto match;\
4214 }\
4215 }\
4216 else goto finish; /* error */ \
4217 }
4218# else
4219# define MATCH_AND_RETURN_CHECK(none) \
4220 r = match_at(reg, str, end, s, prev, &msa);\
4221 if (r != ONIG_MISMATCH) {\
4222 if (r >= 0) {\
4223 goto match;\
4224 }\
4225 else goto finish; /* error */ \
4226 }
4227# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4228#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
4229
4230
4231 /* anchor optimize: resume search range */
4232 if (reg->anchor != 0 && str < end) {
4233 UChar *min_semi_end, *max_semi_end;
4234
4235 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4236 /* search start-position only */
4237 begin_position:
4238 if (range > start)
4239 {
4240 if (global_pos > start)
4241 {
4242 if (global_pos < range)
4243 range = global_pos + 1;
4244 }
4245 else
4246 range = start + 1;
4247 }
4248 else
4249 range = start;
4250 }
4251 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
4252 /* search str-position only */
4253 if (range > start) {
4254 if (start != str) goto mismatch_no_msa;
4255 range = str + 1;
4256 }
4257 else {
4258 if (range <= str) {
4259 start = str;
4260 range = str;
4261 }
4262 else
4263 goto mismatch_no_msa;
4264 }
4265 }
4266 else if (reg->anchor & ANCHOR_END_BUF) {
4267 min_semi_end = max_semi_end = (UChar* )end;
4268
4269 end_buf:
4270 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
4271 goto mismatch_no_msa;
4272
4273 if (range > start) {
4274 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
4275 start = min_semi_end - reg->anchor_dmax;
4276 if (start < end)
4277 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
4278 }
4279 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
4280 range = max_semi_end - reg->anchor_dmin + 1;
4281 }
4282
4283 if (start > range) goto mismatch_no_msa;
4284 /* If start == range, match with empty at end.
4285 Backward search is used. */
4286 }
4287 else {
4288 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
4289 range = min_semi_end - reg->anchor_dmax;
4290 }
4291 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
4292 start = max_semi_end - reg->anchor_dmin;
4293 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
4294 }
4295 if (range > start) goto mismatch_no_msa;
4296 }
4297 }
4298 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
4299 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
4300
4301 max_semi_end = (UChar* )end;
4302 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
4303 min_semi_end = pre_end;
4304
4305#ifdef USE_CRNL_AS_LINE_TERMINATOR
4306 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
4307 if (IS_NOT_NULL(pre_end) &&
4308 IS_NEWLINE_CRLF(reg->options) &&
4309 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
4310 min_semi_end = pre_end;
4311 }
4312#endif
4313 if (min_semi_end > str && start <= min_semi_end) {
4314 goto end_buf;
4315 }
4316 }
4317 else {
4318 min_semi_end = (UChar* )end;
4319 goto end_buf;
4320 }
4321 }
4322 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
4323 goto begin_position;
4324 }
4325 }
4326 else if (str == end) { /* empty string */
4327 static const UChar address_for_empty_string[] = "";
4328
4329#ifdef ONIG_DEBUG_SEARCH
4330 fprintf(stderr, "onig_search: empty string.\n");
4331#endif
4332
4333 if (reg->threshold_len == 0) {
4334 start = end = str = address_for_empty_string;
4335 s = (UChar* )start;
4336 prev = (UChar* )NULL;
4337
4338 MATCH_ARG_INIT(msa, option, region, start, start);
4339#ifdef USE_COMBINATION_EXPLOSION_CHECK
4340 msa.state_check_buff = (void* )0;
4341 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
4342#endif
4343 MATCH_AND_RETURN_CHECK(end);
4344 goto mismatch;
4345 }
4346 goto mismatch_no_msa;
4347 }
4348
4349#ifdef ONIG_DEBUG_SEARCH
4350 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
4351 (int )(end - str), (int )(start - str), (int )(range - str));
4352#endif
4353
4354 MATCH_ARG_INIT(msa, option, region, start, global_pos);
4355#ifdef USE_COMBINATION_EXPLOSION_CHECK
4356 {
4357 ptrdiff_t offset = (MIN(start, range) - str);
4358 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4359 }
4360#endif
4361
4362 s = (UChar* )start;
4363 if (range > start) { /* forward search */
4364 if (s > str)
4365 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4366 else
4367 prev = (UChar* )NULL;
4368
4369 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4370 UChar *sch_range, *low, *high, *low_prev;
4371
4372 sch_range = (UChar* )range;
4373 if (reg->dmax != 0) {
4374 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4375 sch_range = (UChar* )end;
4376 else {
4377 sch_range += reg->dmax;
4378 if (sch_range > end) sch_range = (UChar* )end;
4379 }
4380 }
4381
4382 if ((end - start) < reg->threshold_len)
4383 goto mismatch;
4384
4385 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4386 do {
4387 if (! forward_search_range(reg, str, end, s, sch_range,
4388 &low, &high, &low_prev)) goto mismatch;
4389 if (s < low) {
4390 s = low;
4391 prev = low_prev;
4392 }
4393 while (s <= high) {
4394 MATCH_AND_RETURN_CHECK(orig_range);
4395 prev = s;
4396 s += enclen(reg->enc, s, end);
4397 }
4398 } while (s < range);
4399 goto mismatch;
4400 }
4401 else { /* check only. */
4402 if (! forward_search_range(reg, str, end, s, sch_range,
4403 &low, &high, (UChar** )NULL)) goto mismatch;
4404
4405 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
4406 do {
4407 MATCH_AND_RETURN_CHECK(orig_range);
4408 prev = s;
4409 s += enclen(reg->enc, s, end);
4410
4411 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
4412 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
4413 && s < range) {
4414 prev = s;
4415 s += enclen(reg->enc, s, end);
4416 }
4417 }
4418 } while (s < range);
4419 goto mismatch;
4420 }
4421 }
4422 }
4423
4424 do {
4425 MATCH_AND_RETURN_CHECK(orig_range);
4426 prev = s;
4427 s += enclen(reg->enc, s, end);
4428 } while (s < range);
4429
4430 if (s == range) { /* because empty match with /$/. */
4431 MATCH_AND_RETURN_CHECK(orig_range);
4432 }
4433 }
4434 else { /* backward search */
4435 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4436 UChar *low, *high, *adjrange, *sch_start;
4437
4438 if (range < end)
4439 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
4440 else
4441 adjrange = (UChar* )end;
4442
4443 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
4444 (end - range) >= reg->threshold_len) {
4445 do {
4446 sch_start = s + reg->dmax;
4447 if (sch_start > end) sch_start = (UChar* )end;
4448 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4449 &low, &high) <= 0)
4450 goto mismatch;
4451
4452 if (s > high)
4453 s = high;
4454
4455 while (s >= low) {
4456 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4457 MATCH_AND_RETURN_CHECK(orig_start);
4458 s = prev;
4459 }
4460 } while (s >= range);
4461 goto mismatch;
4462 }
4463 else { /* check only. */
4464 if ((end - range) < reg->threshold_len) goto mismatch;
4465
4466 sch_start = s;
4467 if (reg->dmax != 0) {
4468 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4469 sch_start = (UChar* )end;
4470 else {
4471 sch_start += reg->dmax;
4472 if (sch_start > end) sch_start = (UChar* )end;
4473 else
4474 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
4475 start, sch_start, end);
4476 }
4477 }
4478 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4479 &low, &high) <= 0) goto mismatch;
4480 }
4481 }
4482
4483 do {
4484 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4485 MATCH_AND_RETURN_CHECK(orig_start);
4486 s = prev;
4487 } while (s >= range);
4488 }
4489
4490 mismatch:
4491#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4492 if (IS_FIND_LONGEST(reg->options)) {
4493 if (msa.best_len >= 0) {
4494 s = msa.best_s;
4495 goto match;
4496 }
4497 }
4498#endif
4499 r = ONIG_MISMATCH;
4500
4501 finish:
4502 MATCH_ARG_FREE(msa);
4503
4504 /* If result is mismatch and no FIND_NOT_EMPTY option,
4505 then the region is not set in match_at(). */
4506 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
4507 onig_region_clear(region);
4508 }
4509
4510#ifdef ONIG_DEBUG
4511 if (r != ONIG_MISMATCH)
4512 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4513#endif
4514 return r;
4515
4516 mismatch_no_msa:
4517 r = ONIG_MISMATCH;
4518 finish_no_msa:
4519#ifdef ONIG_DEBUG
4520 if (r != ONIG_MISMATCH)
4521 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4522#endif
4523 return r;
4524
4525 match:
4526 MATCH_ARG_FREE(msa);
4527 return s - str;
4528}
4529
4530extern OnigPosition
4531onig_scan(regex_t* reg, const UChar* str, const UChar* end,
4532 OnigRegion* region, OnigOptionType option,
4533 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
4534 void* callback_arg)
4535{
4536 OnigPosition r;
4537 OnigPosition n;
4538 int rs;
4539 const UChar* start;
4540
4541 n = 0;
4542 start = str;
4543 while (1) {
4544 r = onig_search(reg, str, end, start, end, region, option);
4545 if (r >= 0) {
4546 rs = scan_callback(n, r, region, callback_arg);
4547 n++;
4548 if (rs != 0)
4549 return rs;
4550
4551 if (region->end[0] == start - str) {
4552 if (start >= end) break;
4553 start += enclen(reg->enc, start, end);
4554 }
4555 else
4556 start = str + region->end[0];
4557
4558 if (start > end)
4559 break;
4560 }
4561 else if (r == ONIG_MISMATCH) {
4562 break;
4563 }
4564 else { /* error */
4565 return r;
4566 }
4567 }
4568
4569 return n;
4570}
4571
4572extern OnigEncoding
4573onig_get_encoding(const regex_t* reg)
4574{
4575 return reg->enc;
4576}
4577
4578extern OnigOptionType
4579onig_get_options(const regex_t* reg)
4580{
4581 return reg->options;
4582}
4583
4584extern OnigCaseFoldType
4585onig_get_case_fold_flag(const regex_t* reg)
4586{
4587 return reg->case_fold_flag;
4588}
4589
4590extern const OnigSyntaxType*
4591onig_get_syntax(const regex_t* reg)
4592{
4593 return reg->syntax;
4594}
4595
4596extern int
4597onig_number_of_captures(const regex_t* reg)
4598{
4599 return reg->num_mem;
4600}
4601
4602extern int
4603onig_number_of_capture_histories(const regex_t* reg)
4604{
4605#ifdef USE_CAPTURE_HISTORY
4606 int i, n;
4607
4608 n = 0;
4609 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
4610 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
4611 n++;
4612 }
4613 return n;
4614#else
4615 return 0;
4616#endif
4617}
4618
4619extern void
4620onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
4621{
4622 *to = *from;
4623}
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
Definition: defines.h:89
#define xfree
Old name of ruby_xfree.
Definition: xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition: xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition: xmalloc.h:53
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.
Definition: encoding.h:782
Definition: win32.h:696