14#include "ruby/internal/config.h"
24#include "debug_counter.h"
29#include "internal/array.h"
30#include "internal/compar.h"
31#include "internal/compilers.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/numeric.h"
36#include "internal/object.h"
37#include "internal/proc.h"
38#include "internal/re.h"
39#include "internal/sanitizers.h"
40#include "internal/string.h"
41#include "internal/transcode.h"
46#include "ruby_assert.h"
49#if defined HAVE_CRYPT_R
50# if defined HAVE_CRYPT_H
53#elif !defined HAVE_CRYPT
54# include "missing/crypt.h"
55# define HAVE_CRYPT_R 1
58#define BEG(no) (regs->beg[(no)])
59#define END(no) (regs->end[(no)])
62#undef rb_usascii_str_new
66#undef rb_tainted_str_new_cstr
67#undef rb_usascii_str_new_cstr
68#undef rb_utf8_str_new_cstr
69#undef rb_enc_str_new_cstr
70#undef rb_external_str_new_cstr
71#undef rb_locale_str_new_cstr
72#undef rb_str_dup_frozen
73#undef rb_str_buf_new_cstr
104#define RUBY_MAX_CHAR_LEN 16
105#define STR_SHARED_ROOT FL_USER5
106#define STR_BORROWED FL_USER6
107#define STR_TMPLOCK FL_USER7
108#define STR_NOFREE FL_USER18
109#define STR_FAKESTR FL_USER19
111#define STR_SET_NOEMBED(str) do {\
112 FL_SET((str), STR_NOEMBED);\
114 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
117 STR_SET_EMBED_LEN((str), 0);\
120#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
122# define STR_SET_EMBED_LEN(str, n) do { \
123 assert(str_embed_capa(str) > (n));\
124 RSTRING(str)->as.embed.len = (n);\
127# define STR_SET_EMBED_LEN(str, n) do { \
129 RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
130 RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
134#define STR_SET_LEN(str, n) do { \
135 if (STR_EMBED_P(str)) {\
136 STR_SET_EMBED_LEN((str), (n));\
139 RSTRING(str)->as.heap.len = (n);\
143#define STR_DEC_LEN(str) do {\
144 if (STR_EMBED_P(str)) {\
145 long n = RSTRING_LEN(str);\
147 STR_SET_EMBED_LEN((str), n);\
150 RSTRING(str)->as.heap.len--;\
154#define TERM_LEN(str) rb_enc_mbminlen(rb_enc_get(str))
155#define TERM_FILL(ptr, termlen) do {\
156 char *const term_fill_ptr = (ptr);\
157 const int term_fill_len = (termlen);\
158 *term_fill_ptr = '\0';\
159 if (UNLIKELY(term_fill_len > 1))\
160 memset(term_fill_ptr, 0, term_fill_len);\
163#define RESIZE_CAPA(str,capacity) do {\
164 const int termlen = TERM_LEN(str);\
165 RESIZE_CAPA_TERM(str,capacity,termlen);\
167#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
168 if (STR_EMBED_P(str)) {\
169 if (str_embed_capa(str) < capacity + termlen) {\
170 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
171 const long tlen = RSTRING_LEN(str);\
172 memcpy(tmp, RSTRING_PTR(str), tlen);\
173 RSTRING(str)->as.heap.ptr = tmp;\
174 RSTRING(str)->as.heap.len = tlen;\
175 STR_SET_NOEMBED(str);\
176 RSTRING(str)->as.heap.aux.capa = (capacity);\
180 assert(!FL_TEST((str), STR_SHARED)); \
181 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
182 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
183 RSTRING(str)->as.heap.aux.capa = (capacity);\
187#define STR_SET_SHARED(str, shared_str) do { \
188 if (!FL_TEST(str, STR_FAKESTR)) { \
189 assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
190 assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
191 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
192 FL_SET((str), STR_SHARED); \
193 FL_SET((shared_str), STR_SHARED_ROOT); \
194 if (RBASIC_CLASS((shared_str)) == 0) \
195 FL_SET_RAW((shared_str), STR_BORROWED); \
199#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
200#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
203#define STR_ENC_GET(str) get_encoding(str)
205#if !defined SHARABLE_MIDDLE_SUBSTRING
206# define SHARABLE_MIDDLE_SUBSTRING 0
208#if !SHARABLE_MIDDLE_SUBSTRING
209#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
211#define SHARABLE_SUBSTRING_P(beg, len, end) 1
216str_embed_capa(VALUE str)
219 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.embed.
ary);
226str_embed_size(
long capa)
228 return offsetof(
struct RString, as.embed.
ary) + capa;
232STR_EMBEDDABLE_P(
long len,
long termlen)
235 return rb_gc_size_allocatable_p(str_embed_size(len + termlen));
241static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str);
242static VALUE str_new_frozen(VALUE klass, VALUE orig);
243static VALUE str_new_frozen_buffer(VALUE klass, VALUE orig,
int copy_encoding);
244static VALUE str_new_static(VALUE klass,
const char *ptr,
long len,
int encindex);
245static VALUE str_new(VALUE klass,
const char *ptr,
long len);
246static void str_make_independent_expand(VALUE str,
long len,
long expand,
const int termlen);
247static inline void str_modifiable(VALUE str);
248static VALUE rb_str_downcase(
int argc, VALUE *argv, VALUE str);
251str_make_independent(VALUE str)
254 int termlen = TERM_LEN(str);
255 str_make_independent_expand((str), len, 0L, termlen);
258static inline int str_dependent_p(VALUE str);
261rb_str_make_independent(VALUE str)
263 if (str_dependent_p(str)) {
264 str_make_independent(str);
269rb_debug_rstring_null_ptr(
const char *func)
271 fprintf(stderr,
"%s is returning NULL!! "
272 "SIGSEGV is highly expected to follow immediately. "
273 "If you could reproduce, attach your debugger here, "
274 "and look at the passed string.",
279static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
282get_actual_encoding(
const int encidx, VALUE str)
284 const unsigned char *q;
287 case ENCINDEX_UTF_16:
290 if (q[0] == 0xFE && q[1] == 0xFF) {
291 return rb_enc_get_from_index(ENCINDEX_UTF_16BE);
293 if (q[0] == 0xFF && q[1] == 0xFE) {
294 return rb_enc_get_from_index(ENCINDEX_UTF_16LE);
297 case ENCINDEX_UTF_32:
300 if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF) {
301 return rb_enc_get_from_index(ENCINDEX_UTF_32BE);
303 if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF) {
304 return rb_enc_get_from_index(ENCINDEX_UTF_32LE);
312get_encoding(VALUE str)
318mustnot_broken(VALUE str)
320 if (is_broken_string(str)) {
326mustnot_wchar(VALUE str)
334static int fstring_cmp(VALUE a, VALUE b);
336static VALUE register_fstring(VALUE str,
bool copy);
343#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
351fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
355 VALUE str = (
VALUE)*key;
361 if (rb_objspace_garbage_object_p(str)) {
386 if (STR_SHARED_P(str)) {
388 str_make_independent(str);
391 if (!BARE_STRING_P(str)) {
395 RBASIC(str)->flags |= RSTRING_FSTR;
397 *key = *value = arg->fstr = str;
411 if (
FL_TEST(str, RSTRING_FSTR))
414 bare = BARE_STRING_P(str);
416 if (STR_EMBED_P(str)) {
420 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_SHARED_ROOT|STR_SHARED) == (STR_NOEMBED|STR_SHARED_ROOT)) {
429 fstr = register_fstring(str, FALSE);
432 str_replace_shared_without_enc(str, fstr);
440register_fstring(VALUE str,
bool copy)
447 st_table *frozen_strings = rb_vm_fstring_table();
450 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
451 }
while (args.fstr ==
Qundef);
463setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
475 RBASIC_SET_CLASS_RAW((VALUE)fake_str,
rb_cString);
476 fake_str->as.heap.
len = len;
477 fake_str->as.heap.
ptr = (
char *)name;
478 fake_str->as.heap.aux.
capa = len;
479 return (VALUE)fake_str;
486rb_setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
rb_encoding *enc)
496MJIT_FUNC_EXPORTED VALUE
497rb_fstring_new(
const char *ptr,
long len)
500 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), FALSE);
507 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), FALSE);
511rb_fstring_cstr(
const char *
ptr)
513 return rb_fstring_new(
ptr, strlen(
ptr));
517fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
519 RBASIC_SET_CLASS((VALUE)key, (VALUE)arg);
524fstring_cmp(VALUE a, VALUE b)
527 const char *aptr, *bptr;
530 return (alen != blen ||
532 memcmp(aptr, bptr, alen) != 0);
536single_byte_optimizable(VALUE str)
544 enc = STR_ENC_GET(str);
555static inline const char *
556search_nonascii(
const char *p,
const char *e)
558 const uintptr_t *s, *t;
560#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
561# if SIZEOF_UINTPTR_T == 8
562# define NONASCII_MASK UINT64_C(0x8080808080808080)
563# elif SIZEOF_UINTPTR_T == 4
564# define NONASCII_MASK UINT32_C(0x80808080)
566# error "don't know what to do."
569# if SIZEOF_UINTPTR_T == 8
570# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
571# elif SIZEOF_UINTPTR_T == 4
572# define NONASCII_MASK 0x80808080UL
574# error "don't know what to do."
578 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
579#if !UNALIGNED_WORD_ACCESS
580 if ((uintptr_t)p % SIZEOF_VOIDP) {
581 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
586 case 7:
if (p[-7]&0x80)
return p-7;
587 case 6:
if (p[-6]&0x80)
return p-6;
588 case 5:
if (p[-5]&0x80)
return p-5;
589 case 4:
if (p[-4]&0x80)
return p-4;
591 case 3:
if (p[-3]&0x80)
return p-3;
592 case 2:
if (p[-2]&0x80)
return p-2;
593 case 1:
if (p[-1]&0x80)
return p-1;
598#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
599#define aligned_ptr(value) \
600 __builtin_assume_aligned((value), sizeof(uintptr_t))
602#define aligned_ptr(value) (uintptr_t *)(value)
605 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
608 if (*s & NONASCII_MASK) {
609#ifdef WORDS_BIGENDIAN
610 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
612 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
622 case 7:
if (e[-7]&0x80)
return e-7;
623 case 6:
if (e[-6]&0x80)
return e-6;
624 case 5:
if (e[-5]&0x80)
return e-5;
625 case 4:
if (e[-4]&0x80)
return e-4;
627 case 3:
if (e[-3]&0x80)
return e-3;
628 case 2:
if (e[-2]&0x80)
return e-2;
629 case 1:
if (e[-1]&0x80)
return e-1;
637 const char *e = p +
len;
641 p = search_nonascii(p, e);
646 p = search_nonascii(p, e);
653 p = search_nonascii(p, e);
678 p = search_nonascii(p, e);
683 p = search_nonascii(p, e);
696 p = search_nonascii(p, e);
715str_enc_copy(VALUE str1, VALUE str2)
721rb_enc_cr_str_copy_for_substr(VALUE dest, VALUE src)
726 str_enc_copy(dest, src);
751rb_enc_cr_str_exact_copy(VALUE dest, VALUE src)
753 str_enc_copy(dest, src);
758enc_coderange_scan(VALUE str,
rb_encoding *enc,
int encidx)
770rb_enc_str_coderange_scan(VALUE str,
rb_encoding *enc)
783 cr = enc_coderange_scan(str, enc, encidx);
802str_mod_check(VALUE s,
const char *p,
long len)
805 rb_raise(rb_eRuntimeError,
"string modified");
810str_capacity(VALUE str,
const int termlen)
812 if (STR_EMBED_P(str)) {
814 return str_embed_capa(str) - termlen;
819 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
820 return RSTRING(str)->as.heap.len;
823 return RSTRING(str)->as.heap.aux.capa;
830 return str_capacity(str, TERM_LEN(str));
834must_not_null(
const char *
ptr)
837 rb_raise(rb_eArgError,
"NULL pointer given");
842str_alloc(VALUE klass,
size_t size)
845 RVARGC_NEWOBJ_OF(str,
struct RString, klass,
851str_alloc_embed(VALUE klass,
size_t capa)
853 size_t size = str_embed_size(
capa);
854 assert(rb_gc_size_allocatable_p(size));
856 assert(size <=
sizeof(
struct RString));
858 return str_alloc(klass, size);
862str_alloc_heap(VALUE klass)
864 return str_alloc(klass,
sizeof(
struct RString));
868empty_str_alloc(VALUE klass)
870 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
871 VALUE str = str_alloc_embed(klass, 0);
872 memset(
RSTRING(str)->as.embed.ary, 0, str_embed_capa(str));
877str_new0(VALUE klass,
const char *
ptr,
long len,
int termlen)
882 rb_raise(rb_eArgError,
"negative string size (or size too big)");
885 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
887 if (STR_EMBEDDABLE_P(
len, termlen)) {
888 str = str_alloc_embed(klass,
len + termlen);
894 str = str_alloc_heap(klass);
900 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
901 STR_SET_NOEMBED(str);
906 STR_SET_LEN(str,
len);
912str_new(VALUE klass,
const char *
ptr,
long len)
914 return str_new0(klass,
ptr,
len, 1);
959 __msan_unpoison_string(
ptr);
984 rb_raise(rb_eArgError,
"wchar encoding given");
990str_new_static(VALUE klass,
const char *
ptr,
long len,
int encindex)
995 rb_raise(rb_eArgError,
"negative string size (or size too big)");
999 rb_encoding *enc = rb_enc_get_from_index(encindex);
1003 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1004 str = str_alloc_heap(klass);
1008 STR_SET_NOEMBED(str);
1009 RBASIC(str)->flags |= STR_NOFREE;
1042 rb_warn_deprecated_to_remove_at(3.2,
"rb_tainted_str_new", NULL);
1049 rb_warn_deprecated_to_remove_at(3.2,
"rb_tainted_str_new_cstr", NULL);
1053static VALUE str_cat_conv_enc_opts(VALUE newstr,
long ofs,
const char *
ptr,
long len,
1055 int ecflags, VALUE ecopts);
1062 return is_ascii_string(str);
1073 if (!to)
return str;
1075 if (from == to)
return str;
1078 if (STR_ENC_GET(str) != to) {
1087 from, to, ecflags, ecopts);
1088 if (
NIL_P(newstr)) {
1096rb_str_cat_conv_enc_opts(VALUE newstr,
long ofs,
const char *
ptr,
long len,
1102 if (ofs < -olen || olen < ofs)
1103 rb_raise(rb_eIndexError,
"index %ld out of string", ofs);
1104 if (ofs < 0) ofs += olen;
1106 STR_SET_LEN(newstr, ofs);
1111 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1119 STR_SET_LEN(str, 0);
1126str_cat_conv_enc_opts(VALUE newstr,
long ofs,
const char *
ptr,
long len,
1128 int ecflags, VALUE ecopts)
1131 rb_econv_result_t ret;
1133 VALUE econv_wrapper;
1134 const unsigned char *start, *sp;
1135 unsigned char *dest, *dp;
1136 size_t converted_output = (size_t)ofs;
1140 econv_wrapper = rb_obj_alloc(rb_cEncodingConverter);
1141 RBASIC_CLEAR_CLASS(econv_wrapper);
1143 if (!ec)
return Qnil;
1146 sp = (
unsigned char*)
ptr;
1148 while ((dest = (
unsigned char*)
RSTRING_PTR(newstr)),
1149 (dp = dest + converted_output),
1153 size_t converted_input = sp - start;
1154 size_t rest =
len - converted_input;
1155 converted_output = dp - dest;
1157 if (converted_input && converted_output &&
1158 rest < (LONG_MAX / converted_output)) {
1159 rest = (rest * converted_output) / converted_input;
1164 olen += rest < 2 ? 2 : rest;
1205 if (!ienc || eenc == ienc) {
1219 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1220 rb_str_initialize(str,
ptr,
len, eenc);
1226rb_external_str_with_enc(VALUE str,
rb_encoding *eenc)
1293str_replace_shared_without_enc(VALUE str2, VALUE str)
1295 const int termlen = TERM_LEN(str);
1300 if (str_embed_capa(str2) >=
len + termlen) {
1301 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1302 STR_SET_EMBED(str2);
1304 STR_SET_EMBED_LEN(str2,
len);
1305 TERM_FILL(ptr2+
len, termlen);
1309 if (STR_SHARED_P(str)) {
1310 root =
RSTRING(str)->as.heap.aux.shared;
1318 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1320 rb_fatal(
"about to free a possible shared root");
1322 char *ptr2 = STR_HEAP_PTR(str2);
1324 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1327 FL_SET(str2, STR_NOEMBED);
1330 STR_SET_SHARED(str2, root);
1336str_replace_shared(VALUE str2, VALUE str)
1338 str_replace_shared_without_enc(str2, str);
1339 rb_enc_cr_str_exact_copy(str2, str);
1344str_new_shared(VALUE klass, VALUE str)
1346 return str_replace_shared(str_alloc_heap(klass), str);
1352 return str_new_shared(rb_obj_class(str), str);
1359 return str_new_frozen(rb_obj_class(orig), orig);
1363rb_str_new_frozen_String(VALUE orig)
1370rb_str_tmp_frozen_acquire(VALUE orig)
1373 return str_new_frozen_buffer(0, orig, FALSE);
1377rb_str_tmp_frozen_release(VALUE orig, VALUE tmp)
1382 if (STR_EMBED_P(tmp)) {
1395 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1396 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1401 STR_SET_EMBED_LEN(tmp, 0);
1407str_new_frozen(VALUE klass, VALUE orig)
1409 return str_new_frozen_buffer(klass, orig, TRUE);
1413heap_str_make_shared(VALUE klass, VALUE orig)
1415 assert(!STR_EMBED_P(orig));
1416 assert(!STR_SHARED_P(orig));
1418 VALUE str = str_alloc_heap(klass);
1419 STR_SET_NOEMBED(str);
1422 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1423 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1424 RBASIC(orig)->flags &= ~STR_NOFREE;
1425 STR_SET_SHARED(orig, str);
1432str_new_frozen_buffer(VALUE klass, VALUE orig,
int copy_encoding)
1438 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, 1)) {
1440 assert(STR_EMBED_P(str));
1451 assert(!STR_EMBED_P(
shared));
1455 if ((ofs > 0) || (rest > 0) ||
1458 str = str_new_shared(klass,
shared);
1459 assert(!STR_EMBED_P(str));
1460 RSTRING(str)->as.heap.ptr += ofs;
1461 RSTRING(str)->as.heap.len -= ofs + rest;
1469 else if (STR_EMBEDDABLE_P(
RSTRING_LEN(orig), TERM_LEN(orig))) {
1470 str = str_alloc_embed(klass,
RSTRING_LEN(orig) + TERM_LEN(orig));
1477 str = heap_str_make_shared(klass, orig);
1481 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1489 return str_new0(rb_obj_class(obj),
ptr,
len, TERM_LEN(obj));
1493str_new_empty_String(VALUE str)
1500#define STR_BUF_MIN_SIZE 63
1508 if (STR_EMBEDDABLE_P(
capa, 1)) {
1515 if (
capa < STR_BUF_MIN_SIZE) {
1516 capa = STR_BUF_MIN_SIZE;
1519 FL_SET(str, STR_NOEMBED);
1522 RSTRING(str)->as.heap.ptr[0] =
'\0';
1542 return str_new(0, 0,
len);
1548 if (
FL_TEST(str, RSTRING_FSTR)) {
1549 st_data_t fstr = (st_data_t)str;
1553 st_delete(rb_vm_fstring_table(), &fstr, NULL);
1554 RB_DEBUG_COUNTER_INC(obj_str_fstr);
1559 if (STR_EMBED_P(str)) {
1560 RB_DEBUG_COUNTER_INC(obj_str_embed);
1562 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1563 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1564 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1567 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1568 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1572RUBY_FUNC_EXPORTED
size_t
1573rb_str_memsize(VALUE str)
1575 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1576 return STR_HEAP_SIZE(str);
1586 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1589static inline void str_discard(VALUE str);
1590static void str_shared_replace(VALUE str, VALUE str2);
1595 if (str != str2) str_shared_replace(str, str2);
1599str_shared_replace(VALUE str, VALUE str2)
1606 enc = STR_ENC_GET(str2);
1611 if (str_embed_capa(str) >=
RSTRING_LEN(str2) + termlen) {
1620 if (STR_EMBED_P(str2)) {
1621 assert(!
FL_TEST(str2, STR_SHARED));
1623 assert(
len + termlen <= str_embed_capa(str2));
1625 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1626 memcpy(new_ptr,
RSTRING(str2)->as.embed.ary,
len + termlen);
1627 RSTRING(str2)->as.heap.ptr = new_ptr;
1630 STR_SET_NOEMBED(str2);
1634 STR_SET_NOEMBED(str);
1639 if (
FL_TEST(str2, STR_SHARED)) {
1641 STR_SET_SHARED(str,
shared);
1644 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1648 STR_SET_EMBED(str2);
1650 STR_SET_EMBED_LEN(str2, 0);
1665 return rb_obj_as_string_result(str, obj);
1668MJIT_FUNC_EXPORTED VALUE
1669rb_obj_as_string_result(VALUE str, VALUE obj)
1672 return rb_any_to_s(obj);
1677str_replace(VALUE str, VALUE str2)
1682 if (STR_SHARED_P(str2)) {
1685 STR_SET_NOEMBED(str);
1688 STR_SET_SHARED(str,
shared);
1689 rb_enc_cr_str_exact_copy(str, str2);
1692 str_replace_shared(str, str2);
1702 RB_RVARGC_EC_NEWOBJ_OF(ec, str,
struct RString, klass,
1710 size_t size = str_embed_size(
capa);
1711 assert(rb_gc_size_allocatable_p(size));
1713 assert(size <=
sizeof(
struct RString));
1715 return ec_str_alloc(ec, klass, size);
1721 return ec_str_alloc(ec, klass,
sizeof(
struct RString));
1725str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
1727 const VALUE flag_mask =
1729 RSTRING_NOEMBED | RSTRING_EMBED_LEN_MASK |
1736 if (STR_EMBED_P(str)) {
1739 assert(str_embed_capa(dup) >=
len + 1);
1740 STR_SET_EMBED_LEN(dup,
len);
1746 root =
RSTRING(str)->as.heap.aux.shared;
1748 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1749 root = str = str_new_frozen(klass, str);
1752 assert(!STR_SHARED_P(root));
1757 if (STR_EMBED_P(root)) {
1766 flags |= RSTRING_NOEMBED | STR_SHARED;
1772 flags &= ~ENCODING_MASK;
1783 if (!USE_RVARGC ||
FL_TEST(str, STR_NOEMBED)) {
1784 dup = ec_str_alloc_heap(ec, klass);
1790 return str_duplicate_setup(klass, str, dup);
1794str_duplicate(VALUE klass, VALUE str)
1797 if (!USE_RVARGC ||
FL_TEST(str, STR_NOEMBED)) {
1798 dup = str_alloc_heap(klass);
1804 return str_duplicate_setup(klass, str, dup);
1810 return str_duplicate(rb_obj_class(str), str);
1816 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1823 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1824 return ec_str_duplicate(ec,
rb_cString, str);
1874rb_str_init(
int argc, VALUE *argv, VALUE str)
1876 static ID keyword_ids[2];
1877 VALUE orig, opt, venc, vcapa;
1882 if (!keyword_ids[0]) {
1883 keyword_ids[0] = rb_id_encoding();
1884 CONST_ID(keyword_ids[1],
"capacity");
1900 if (
capa < STR_BUF_MIN_SIZE) {
1901 capa = STR_BUF_MIN_SIZE;
1909 if (orig == str) n = 0;
1911 str_modifiable(str);
1912 if (STR_EMBED_P(str)) {
1913 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1915 assert(
RSTRING(str)->as.embed.len + 1 <= str_embed_capa(str));
1916 memcpy(new_ptr,
RSTRING(str)->as.embed.ary,
RSTRING(str)->as.embed.len + 1);
1920 RSTRING(str)->as.heap.ptr = new_ptr;
1922 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
1923 const size_t size = (size_t)
capa + termlen;
1925 const size_t osize =
RSTRING(str)->as.heap.len + TERM_LEN(str);
1926 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1927 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
1929 RSTRING(str)->as.heap.ptr = new_ptr;
1931 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
1932 SIZED_REALLOC_N(
RSTRING(str)->as.heap.ptr,
char,
1933 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
1936 TERM_FILL(&
RSTRING(str)->as.heap.ptr[
len], termlen);
1939 rb_enc_cr_str_exact_copy(str, orig);
1941 FL_SET(str, STR_NOEMBED);
1959#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1974static inline uintptr_t
1975count_utf8_lead_bytes_with_word(
const uintptr_t *s)
1980 d = (d>>6) | (~d>>7);
1981 d &= NONASCII_MASK >> 7;
1984#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
1986 return rb_popcount_intptr(d);
1990# if SIZEOF_VOIDP == 8
1999enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2005 long diff = (long)(e - p);
2011 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2012 const uintptr_t *s, *t;
2013 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2014 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2015 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2016 while (p < (
const char *)s) {
2017 if (is_utf8_lead_byte(*p))
len++;
2021 len += count_utf8_lead_bytes_with_word(s);
2024 p = (
const char *)s;
2027 if (is_utf8_lead_byte(*p))
len++;
2038 q = search_nonascii(p, e);
2051 q = search_nonascii(p, e);
2064 for (c=0; p<e; c++) {
2080rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2088 long diff = (long)(e - p);
2095 q = search_nonascii(p, e);
2118 for (c=0; p<e; c++) {
2143 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2144 if (!enc) enc = STR_ENC_GET(str);
2150 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2155 return enc_strlen(p, e, enc, cr);
2162 return str_strlen(str, NULL);
2182 return LONG2NUM(str_strlen(str, NULL));
2198rb_str_bytesize(VALUE str)
2216rb_str_empty(VALUE str)
2236 char *ptr1, *ptr2, *ptr3;
2241 enc = rb_enc_check_str(str1, str2);
2245 if (len1 > LONG_MAX - len2) {
2246 rb_raise(rb_eArgError,
"string size too big");
2248 str3 = str_new0(
rb_cString, 0, len1+len2, termlen);
2250 memcpy(ptr3, ptr1, len1);
2251 memcpy(ptr3+len1, ptr2, len2);
2252 TERM_FILL(&ptr3[len1+len2], termlen);
2262MJIT_FUNC_EXPORTED VALUE
2263rb_str_opt_plus(VALUE str1, VALUE str2)
2268 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2277 else if (enc2 < 0) {
2280 else if (enc1 != enc2) {
2283 else if (len1 > LONG_MAX - len2) {
2321 rb_raise(rb_eArgError,
"negative argument");
2324 if (STR_EMBEDDABLE_P(
len, 1)) {
2332 STR_SET_NOEMBED(str2);
2334 STR_SET_LEN(str2,
len);
2339 rb_raise(rb_eArgError,
"argument too big");
2343 termlen = TERM_LEN(str);
2349 while (n <=
len/2) {
2350 memcpy(ptr2 + n, ptr2, n);
2353 memcpy(ptr2 + n, ptr2,
len-n);
2355 STR_SET_LEN(str2,
len);
2356 TERM_FILL(&ptr2[
len], termlen);
2357 rb_enc_cr_str_copy_for_substr(str2, str);
2381rb_str_format_m(VALUE str, VALUE arg)
2392rb_check_lockedtmp(VALUE str)
2394 if (
FL_TEST(str, STR_TMPLOCK)) {
2395 rb_raise(rb_eRuntimeError,
"can't modify string; temporarily locked");
2400str_modifiable(VALUE str)
2402 rb_check_lockedtmp(str);
2407str_dependent_p(VALUE str)
2409 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2418str_independent(VALUE str)
2420 str_modifiable(str);
2421 return !str_dependent_p(str);
2425str_make_independent_expand(VALUE str,
long len,
long expand,
const int termlen)
2433 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2437 TERM_FILL(
RSTRING(str)->as.embed.ary +
len, termlen);
2438 STR_SET_EMBED_LEN(str,
len);
2445 memcpy(
ptr, oldptr,
len);
2447 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2450 STR_SET_NOEMBED(str);
2451 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2452 TERM_FILL(
ptr +
len, termlen);
2461 if (!str_independent(str))
2462 str_make_independent(str);
2469 int termlen = TERM_LEN(str);
2473 rb_raise(rb_eArgError,
"negative expanding string size");
2475 if (expand >= LONG_MAX -
len) {
2476 rb_raise(rb_eArgError,
"string size too big");
2479 if (!str_independent(str)) {
2480 str_make_independent_expand(str,
len, expand, termlen);
2482 else if (expand > 0) {
2483 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2490str_modify_keep_cr(VALUE str)
2492 if (!str_independent(str))
2493 str_make_independent(str);
2500str_discard(VALUE str)
2502 str_modifiable(str);
2503 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2504 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2505 RSTRING(str)->as.heap.ptr = 0;
2506 RSTRING(str)->as.heap.len = 0;
2538zero_filled(
const char *s,
int n)
2540 for (; n > 0; --n) {
2547str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2549 const char *e = s +
len;
2552 if (zero_filled(s, minlen))
return s;
2558str_fill_term(VALUE str,
char *s,
long len,
int termlen)
2563 if (str_dependent_p(str)) {
2564 if (!zero_filled(s +
len, termlen))
2565 str_make_independent_expand(str,
len, 0L, termlen);
2568 TERM_FILL(s +
len, termlen);
2575rb_str_change_terminator_length(VALUE str,
const int oldtermlen,
const int termlen)
2577 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2582 rb_check_lockedtmp(str);
2583 str_make_independent_expand(str,
len, 0L, termlen);
2585 else if (str_dependent_p(str)) {
2586 if (termlen > oldtermlen)
2587 str_make_independent_expand(str,
len, 0L, termlen);
2590 if (!STR_EMBED_P(str)) {
2592 assert(!
FL_TEST((str), STR_SHARED));
2595 if (termlen > oldtermlen) {
2604str_null_check(VALUE str,
int *w)
2613 if (str_null_char(s,
len, minlen, enc)) {
2616 return str_fill_term(str, s,
len, minlen);
2619 if (!s || memchr(s, 0,
len)) {
2623 s = str_fill_term(str, s,
len, minlen);
2629rb_str_to_cstr(VALUE str)
2632 return str_null_check(str, &w);
2640 char *s = str_null_check(str, &w);
2643 rb_raise(rb_eArgError,
"string contains null char");
2645 rb_raise(rb_eArgError,
"string contains null byte");
2651rb_str_fill_terminator(VALUE str,
const int newminlen)
2655 return str_fill_term(str, s,
len, newminlen);
2661 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2679rb_str_s_try_convert(VALUE dummy, VALUE str)
2685str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2695 const char *p2, *e2;
2698 while (p < e && 0 < nth) {
2705 p2 = search_nonascii(p, e2);
2725 while (p < e && nth--) {
2737 return str_nth_len(p, e, &nth, enc);
2741str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2746 p = str_nth_len(p, e, &nth, enc);
2755str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2757 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2758 if (!pp)
return e - p;
2766 STR_ENC_GET(str), single_byte_optimizable(str));
2771str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2774 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2775 const uintptr_t *s, *t;
2776 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2777 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2778 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2779 while (p < (
const char *)s) {
2780 if (is_utf8_lead_byte(*p)) nth--;
2784 nth -= count_utf8_lead_bytes_with_word(s);
2786 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2790 if (is_utf8_lead_byte(*p)) {
2791 if (nth == 0)
break;
2801str_utf8_offset(
const char *p,
const char *e,
long nth)
2803 const char *pp = str_utf8_nth(p, e, &nth);
2812 if (single_byte_optimizable(str) || pos < 0)
2816 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
2825 if (!STR_EMBEDDABLE_P(
len, TERM_LEN(str)) &&
2829 RSTRING(str2)->as.heap.ptr += beg;
2830 olen =
RSTRING(str2)->as.heap.len;
2838 rb_enc_cr_str_copy_for_substr(str2, str);
2852 if (
len < 0)
return 0;
2856 if (single_byte_optimizable(str)) {
2857 if (beg > blen)
return 0;
2860 if (beg < 0)
return 0;
2862 if (
len > blen - beg)
2864 if (
len < 0)
return 0;
2869 if (
len > -beg)
len = -beg;
2881 slen = str_strlen(str, enc);
2883 if (beg < 0)
return 0;
2885 if (
len == 0)
goto end;
2892 if (beg > str_strlen(str, enc))
return 0;
2898 p = str_utf8_nth(s, e, &beg);
2899 if (beg > 0)
return 0;
2900 len = str_utf8_offset(p, e,
len);
2906 p = s + beg * char_sz;
2910 else if (
len * char_sz > e - p)
2915 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
2916 if (beg > 0)
return 0;
2920 len = str_offset(p, e,
len, enc, 0);
2928static VALUE str_substr(VALUE str,
long beg,
long len,
int empty);
2933 return str_substr(str, beg,
len, TRUE);
2937str_substr(VALUE str,
long beg,
long len,
int empty)
2942 if (!p)
return Qnil;
2943 if (!STR_EMBEDDABLE_P(
len, TERM_LEN(str)) &&
2948 RSTRING(str2)->as.heap.ptr += ofs;
2953 if (!
len && !empty)
return Qnil;
2957 rb_enc_cr_str_copy_for_substr(str2, str);
2967 return rb_obj_freeze(str);
3000str_uminus(VALUE str)
3005 return rb_fstring(str);
3009#define rb_str_dup_frozen rb_str_new_frozen
3014 if (
FL_TEST(str, STR_TMPLOCK)) {
3015 rb_raise(rb_eRuntimeError,
"temporal locking already locked string");
3017 FL_SET(str, STR_TMPLOCK);
3024 if (!
FL_TEST(str, STR_TMPLOCK)) {
3025 rb_raise(rb_eRuntimeError,
"temporal unlocking already unlocked string");
3031RUBY_FUNC_EXPORTED VALUE
3032rb_str_locktmp_ensure(VALUE str, VALUE (*func)(VALUE), VALUE arg)
3042 const int termlen = TERM_LEN(str);
3044 str_modifiable(str);
3045 if (STR_SHARED_P(str)) {
3046 rb_raise(rb_eRuntimeError,
"can't set length of shared string");
3048 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3051 STR_SET_LEN(str,
len);
3062 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3065 independent = str_independent(str);
3071 const int termlen = TERM_LEN(str);
3072 if (STR_EMBED_P(str)) {
3073 if (
len == slen)
return str;
3074 if (str_embed_capa(str) >=
len + termlen) {
3075 STR_SET_EMBED_LEN(str,
len);
3076 TERM_FILL(
RSTRING(str)->as.embed.ary +
len, termlen);
3079 str_make_independent_expand(str, slen,
len - slen, termlen);
3081 else if (str_embed_capa(str) >=
len + termlen) {
3082 char *
ptr = STR_HEAP_PTR(str);
3084 if (slen >
len) slen =
len;
3086 TERM_FILL(
RSTRING(str)->as.embed.ary +
len, termlen);
3087 STR_SET_EMBED_LEN(str,
len);
3091 else if (!independent) {
3092 if (
len == slen)
return str;
3093 str_make_independent_expand(str, slen,
len - slen, termlen);
3097 SIZED_REALLOC_N(
RSTRING(str)->as.heap.ptr,
char,
3098 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3101 else if (
len == slen)
return str;
3103 TERM_FILL(
RSTRING(str)->as.heap.ptr +
len, termlen);
3109str_buf_cat(VALUE str,
const char *
ptr,
long len)
3111 long capa, total, olen, off = -1;
3113 const int termlen = TERM_LEN(str);
3119 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3123 if (
len == 0)
return 0;
3124 if (STR_EMBED_P(str)) {
3125 capa = str_embed_capa(str) - termlen;
3126 sptr =
RSTRING(str)->as.embed.ary;
3131 sptr =
RSTRING(str)->as.heap.ptr;
3132 olen =
RSTRING(str)->as.heap.len;
3134 if (olen > LONG_MAX -
len) {
3135 rb_raise(rb_eArgError,
"string sizes too big");
3139 if (total >= LONG_MAX / 2) {
3142 while (total >
capa) {
3145 RESIZE_CAPA_TERM(str,
capa, termlen);
3151 memcpy(sptr + olen,
ptr,
len);
3152 STR_SET_LEN(str, total);
3153 TERM_FILL(sptr + total, termlen);
3158#define str_buf_cat2(str, ptr) str_buf_cat((str), (ptr), strlen(ptr))
3163 if (
len == 0)
return str;
3165 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3167 return str_buf_cat(str,
ptr,
len);
3182rb_enc_cr_str_buf_cat(VALUE str,
const char *
ptr,
long len,
3183 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3192 if (str_encindex == ptr_encindex) {
3211 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3220 *ptr_cr_ret = ptr_cr;
3222 if (str_encindex != ptr_encindex &&
3231 res_encindex = str_encindex;
3236 res_encindex = str_encindex;
3240 res_encindex = ptr_encindex;
3245 res_encindex = str_encindex;
3252 res_encindex = str_encindex;
3258 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3260 str_buf_cat(str,
ptr,
len);
3265 rb_raise(rb_eEncCompatError,
"incompatible character encodings: %s and %s",
3273 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3284 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3290 unsigned int c = (
unsigned char)*
ptr;
3293 rb_enc_cr_str_buf_cat(str, buf,
len,
3323#define MIN_PRE_ALLOC_SIZE 48
3325MJIT_FUNC_EXPORTED VALUE
3326rb_str_concat_literals(
size_t num,
const VALUE *strary)
3336 if (LIKELY(
len < MIN_PRE_ALLOC_SIZE)) {
3346 for (i = s; i < num; ++i) {
3347 const VALUE v = strary[i];
3352 if (encidx != ENCINDEX_US_ASCII) {
3379rb_str_concat_multi(
int argc, VALUE *argv, VALUE str)
3381 str_modifiable(str);
3386 else if (argc > 1) {
3390 for (i = 0; i < argc; i++) {
3425 if (rb_num_to_uint(str2, &code) == 0) {
3431 rb_raise(rb_eRangeError,
"bignum out of char range");
3439 if (encidx == ENCINDEX_ASCII || encidx == ENCINDEX_US_ASCII) {
3442 buf[0] = (char)code;
3444 rb_raise(rb_eRangeError,
"%u out of char range", code);
3447 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3459 case ONIGERR_INVALID_CODE_POINT_VALUE:
3462 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3464 rb_raise(rb_eRangeError,
"%u out of char range", code);
3495rb_str_prepend_multi(
int argc, VALUE *argv, VALUE str)
3497 str_modifiable(str);
3502 else if (argc > 1) {
3506 for (i = 0; i < argc; i++) {
3529 const char *ptr1, *ptr2;
3532 return (len1 != len2 ||
3534 memcmp(ptr1, ptr2, len1) != 0);
3548rb_str_hash_m(VALUE str)
3554#define lesser(a,b) (((a)>(b))?(b):(a))
3566 if (idx1 == idx2)
return TRUE;
3585 const char *ptr1, *ptr2;
3588 if (str1 == str2)
return 0;
3591 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
3600 if (len1 > len2)
return 1;
3603 if (retval > 0)
return 1;
3630 if (str1 == str2)
return Qtrue;
3635 return rb_equal(str2, str1);
3637 return rb_str_eql_internal(str1, str2);
3658MJIT_FUNC_EXPORTED VALUE
3659rb_str_eql(VALUE str1, VALUE str2)
3661 if (str1 == str2)
return Qtrue;
3663 return rb_str_eql_internal(str1, str2);
3689rb_str_cmp_m(VALUE str1, VALUE str2)
3694 return rb_invcmp(str1, str2);
3700static VALUE str_casecmp(VALUE str1, VALUE str2);
3701static VALUE str_casecmp_p(VALUE str1, VALUE str2);
3730rb_str_casecmp(VALUE str1, VALUE str2)
3736 return str_casecmp(str1, s);
3740str_casecmp(VALUE str1, VALUE str2)
3744 const char *p1, *p1end, *p2, *p2end;
3753 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
3754 while (p1 < p1end && p2 < p2end) {
3756 unsigned int c1 =
TOLOWER(*p1 & 0xff);
3757 unsigned int c2 =
TOLOWER(*p2 & 0xff);
3759 return INT2FIX(c1 < c2 ? -1 : 1);
3766 while (p1 < p1end && p2 < p2end) {
3770 if (0 <= c1 && 0 <= c2) {
3774 return INT2FIX(c1 < c2 ? -1 : 1);
3780 len = l1 < l2 ? l1 : l2;
3781 r = memcmp(p1, p2,
len);
3783 return INT2FIX(r < 0 ? -1 : 1);
3785 return INT2FIX(l1 < l2 ? -1 : 1);
3820rb_str_casecmp_p(VALUE str1, VALUE str2)
3826 return str_casecmp_p(str1, s);
3830str_casecmp_p(VALUE str1, VALUE str2)
3833 VALUE folded_str1, folded_str2;
3834 VALUE fold_opt = sym_fold;
3841 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
3842 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
3844 return rb_str_eql(folded_str1, folded_str2);
3848strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
3849 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
3851 const char *search_start = str_ptr;
3852 long pos, search_len = str_len - offset;
3856 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
3857 if (pos < 0)
return pos;
3859 if (t == search_start + pos)
break;
3860 search_len -= t - search_start;
3861 if (search_len <= 0)
return -1;
3862 offset += t - search_start;
3865 return pos + offset;
3868#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
3871rb_strseq_index(VALUE str, VALUE sub,
long offset,
int in_byte)
3873 const char *str_ptr, *str_ptr_end, *sub_ptr;
3874 long str_len, sub_len;
3878 if (is_broken_string(sub))
return -1;
3886 if (str_len < sub_len)
return -1;
3889 long str_len_char, sub_len_char;
3890 int single_byte = single_byte_optimizable(str);
3891 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
3892 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
3894 offset += str_len_char;
3895 if (offset < 0)
return -1;
3897 if (str_len_char - offset < sub_len_char)
return -1;
3898 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
3901 if (sub_len == 0)
return offset;
3904 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
3947rb_str_index_m(
int argc, VALUE *argv, VALUE str)
3953 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
3960 pos += str_strlen(str, NULL);
3970 if (pos > str_strlen(str, NULL))
3987 pos = rb_str_index(str, sub, pos);
3991 if (pos == -1)
return Qnil;
3997str_rindex(VALUE str, VALUE sub,
const char *s,
long pos,
rb_encoding *enc)
3999 char *hit, *adjusted;
4001 long slen, searchlen;
4005 if (slen == 0)
return pos;
4010 searchlen = s - sbeg + 1;
4013 hit = memrchr(sbeg, c, searchlen);
4016 if (hit != adjusted) {
4017 searchlen = adjusted - sbeg;
4020 if (memcmp(hit, t, slen) == 0)
4022 searchlen = adjusted - sbeg;
4023 }
while (searchlen > 0);
4029str_rindex(VALUE str, VALUE sub,
const char *s,
long pos,
rb_encoding *enc)
4040 if (memcmp(s, t, slen) == 0) {
4043 if (pos == 0)
break;
4053rb_str_rindex(VALUE str, VALUE sub,
long pos)
4061 if (is_broken_string(sub))
return -1;
4062 singlebyte = single_byte_optimizable(str);
4063 len = singlebyte ?
RSTRING_LEN(str) : str_strlen(str, enc);
4064 slen = str_strlen(sub, enc);
4067 if (len < slen)
return -1;
4068 if (len - pos < slen) pos = len - slen;
4069 if (len == 0)
return pos;
4080 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4081 return str_rindex(str, sub, s, pos, enc);
4142rb_str_rindex_m(
int argc, VALUE *argv, VALUE str)
4147 long pos, len = str_strlen(str, enc);
4149 if (
rb_scan_args(argc, argv,
"11", &sub, &vpos) == 2) {
4160 if (pos > len) pos = len;
4169 enc, single_byte_optimizable(str));
4180 pos = rb_str_rindex(str, sub, pos);
4181 if (pos >= 0)
return LONG2NUM(pos);
4216rb_str_match(VALUE x, VALUE y)
4218 switch (OBJ_BUILTIN_TYPE(y)) {
4220 rb_raise(rb_eTypeError,
"type mismatch: String given");
4231static VALUE get_pat(VALUE);
4271rb_str_match_m(
int argc, VALUE *argv, VALUE str)
4311rb_str_match_m_p(
int argc, VALUE *argv, VALUE str)
4315 re = get_pat(argv[0]);
4316 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
4325static enum neighbor_char
4335 return NEIGHBOR_NOT_CHAR;
4339 if (!l)
return NEIGHBOR_NOT_CHAR;
4340 if (l != len)
return NEIGHBOR_WRAPPED;
4344 return NEIGHBOR_NOT_CHAR;
4346 return NEIGHBOR_FOUND;
4349 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
4352 return NEIGHBOR_WRAPPED;
4353 ++((
unsigned char*)p)[i];
4358 return NEIGHBOR_FOUND;
4361 memset(p+l, 0xff, len-l);
4367 for (len2 = len-1; 0 < len2; len2--) {
4372 memset(p+len2+1, 0xff, len-(len2+1));
4377static enum neighbor_char
4386 return NEIGHBOR_NOT_CHAR;
4389 if (!c)
return NEIGHBOR_NOT_CHAR;
4392 if (!l)
return NEIGHBOR_NOT_CHAR;
4393 if (l != len)
return NEIGHBOR_WRAPPED;
4397 return NEIGHBOR_NOT_CHAR;
4399 return NEIGHBOR_FOUND;
4402 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
4405 return NEIGHBOR_WRAPPED;
4406 --((
unsigned char*)p)[i];
4411 return NEIGHBOR_FOUND;
4414 memset(p+l, 0, len-l);
4420 for (len2 = len-1; 0 < len2; len2--) {
4425 memset(p+len2+1, 0, len-(len2+1));
4439static enum neighbor_char
4440enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
4442 enum neighbor_char ret;
4446 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
4450 const int max_gaps = 1;
4454 ctype = ONIGENC_CTYPE_DIGIT;
4456 ctype = ONIGENC_CTYPE_ALPHA;
4458 return NEIGHBOR_NOT_CHAR;
4460 MEMCPY(save, p,
char, len);
4461 for (
try = 0;
try <= max_gaps; ++
try) {
4462 ret = enc_succ_char(p, len, enc);
4463 if (ret == NEIGHBOR_FOUND) {
4466 return NEIGHBOR_FOUND;
4469 MEMCPY(p, save,
char, len);
4472 MEMCPY(save, p,
char, len);
4473 ret = enc_pred_char(p, len, enc);
4474 if (ret == NEIGHBOR_FOUND) {
4477 MEMCPY(p, save,
char, len);
4482 MEMCPY(p, save,
char, len);
4488 return NEIGHBOR_NOT_CHAR;
4491 if (ctype != ONIGENC_CTYPE_DIGIT) {
4492 MEMCPY(carry, p,
char, len);
4493 return NEIGHBOR_WRAPPED;
4496 MEMCPY(carry, p,
char, len);
4497 enc_succ_char(carry, len, enc);
4498 return NEIGHBOR_WRAPPED;
4502static VALUE str_succ(VALUE str);
4568 rb_enc_cr_str_copy_for_substr(str, orig);
4569 return str_succ(str);
4576 char *sbeg, *s, *e, *last_alnum = 0;
4577 int found_alnum = 0;
4579 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
4580 long carry_pos = 0, carry_len = 1;
4581 enum neighbor_char neighbor = NEIGHBOR_FOUND;
4584 if (slen == 0)
return str;
4586 enc = STR_ENC_GET(str);
4588 s = e = sbeg + slen;
4591 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
4598 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4599 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4600 neighbor = enc_succ_alnum_char(s, l, enc, carry);
4602 case NEIGHBOR_NOT_CHAR:
4604 case NEIGHBOR_FOUND:
4606 case NEIGHBOR_WRAPPED:
4611 carry_pos = s - sbeg;
4617 enum neighbor_char neighbor;
4618 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
4620 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4621 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4623 neighbor = enc_succ_char(tmp, l, enc);
4625 case NEIGHBOR_FOUND:
4629 case NEIGHBOR_WRAPPED:
4632 case NEIGHBOR_NOT_CHAR:
4637 enc_succ_char(s, l, enc);
4640 MEMCPY(carry, s,
char, l);
4643 carry_pos = s - sbeg;
4647 RESIZE_CAPA(str, slen + carry_len);
4649 s = sbeg + carry_pos;
4650 memmove(s + carry_len, s, slen - carry_pos);
4651 memmove(s, carry, carry_len);
4653 STR_SET_LEN(str, slen);
4670rb_str_succ_bang(VALUE str)
4678all_digits_p(
const char *s,
long len)
4688str_upto_i(VALUE str, VALUE arg)
4730rb_str_upto(
int argc, VALUE *argv, VALUE beg)
4732 VALUE end, exclusive;
4736 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
4740rb_str_upto_each(VALUE beg, VALUE end,
int excl,
int (*each)(VALUE, VALUE), VALUE arg)
4742 VALUE current, after_end;
4750 ascii = (is_ascii_string(beg) && is_ascii_string(end));
4756 if (c > e || (excl && c == e))
return beg;
4759 if (!excl && c == e)
break;
4761 if (excl && c == e)
break;
4781 if (excl && bi == ei)
break;
4782 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
4787 ID op = excl ?
'<' : idLE;
4788 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
4793 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
4801 if (n > 0 || (excl && n == 0))
return beg;
4809 if ((*each)(current, arg))
break;
4810 if (
NIL_P(next))
break;
4822rb_str_upto_endless_each(VALUE beg,
int (*each)(VALUE, VALUE), VALUE arg)
4831 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
4839 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
4847 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
4855 if ((*each)(current, arg))
break;
4866include_range_i(VALUE str, VALUE arg)
4868 VALUE *argp = (VALUE *)arg;
4869 if (!rb_equal(str, *argp))
return 0;
4875rb_str_include_range_p(VALUE beg, VALUE end, VALUE val, VALUE exclusive)
4898 if (b <= v && v < e)
return Qtrue;
4899 return RBOOL(!
RTEST(exclusive) && v == e);
4912 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (VALUE)&val);
4914 return RBOOL(
NIL_P(val));
4918rb_str_subpat(VALUE str, VALUE re, VALUE backref)
4929rb_str_aref(VALUE str, VALUE indx)
4937 return rb_str_subpat(str, indx,
INT2FIX(0));
4940 if (rb_str_index(str, indx, 0) != -1)
4946 long beg, len = str_strlen(str, NULL);
4958 return str_substr(str, idx, 1, FALSE);
5054rb_str_aref_m(
int argc, VALUE *argv, VALUE str)
5058 return rb_str_subpat(str, argv[0], argv[1]);
5067 return rb_str_aref(str, argv[0]);
5076 str_modifiable(str);
5077 if (len > olen) len = olen;
5079 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5081 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5083 STR_SET_EMBED_LEN(str, nlen);
5084 ptr =
RSTRING(str)->as.embed.ary;
5085 memmove(ptr, oldptr + len, nlen);
5086 if (fl == STR_NOEMBED)
xfree(oldptr);
5089 if (!STR_SHARED_P(str)) {
5090 VALUE shared = heap_str_make_shared(rb_obj_class(str), str);
5091 rb_enc_cr_str_exact_copy(shared, str);
5094 ptr =
RSTRING(str)->as.heap.ptr += len;
5095 RSTRING(str)->as.heap.len = nlen;
5103rb_str_splice_0(VALUE str,
long beg,
long len, VALUE val)
5109 if (beg == 0 && vlen == 0) {
5114 str_modify_keep_cr(str);
5118 RESIZE_CAPA(str, slen + vlen - len);
5128 memmove(sptr + beg + vlen,
5130 slen - (beg + len));
5132 if (vlen < beg && len < 0) {
5133 MEMZERO(sptr + slen,
char, -len);
5139 STR_SET_LEN(str, slen);
5140 TERM_FILL(&sptr[slen], TERM_LEN(str));
5150 int singlebyte = single_byte_optimizable(str);
5153 if (len < 0)
rb_raise(rb_eIndexError,
"negative length %ld", len);
5157 slen = str_strlen(str, enc);
5159 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5160 rb_raise(rb_eIndexError,
"index %ld out of string", beg);
5166 assert(beg <= slen);
5167 if (len > slen - beg) {
5170 str_modify_keep_cr(str);
5173 e = str_nth(p,
RSTRING_END(str), len, enc, singlebyte);
5178 rb_str_splice_0(str, beg, len, val);
5185#define rb_str_splice(str, beg, len, val) rb_str_update(str, beg, len, val)
5188rb_str_subpat_set(VALUE str, VALUE re, VALUE backref, VALUE val)
5192 long start, end, len;
5197 rb_raise(rb_eIndexError,
"regexp not matched");
5202 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5203 rb_raise(rb_eIndexError,
"index %d out of regexp", nth);
5206 nth += regs->num_regs;
5211 rb_raise(rb_eIndexError,
"regexp group %d not matched", nth);
5216 enc = rb_enc_check_str(str, val);
5217 rb_str_splice_0(str, start, len, val);
5222rb_str_aset(VALUE str, VALUE indx, VALUE val)
5226 switch (
TYPE(indx)) {
5228 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5232 beg = rb_str_index(str, indx, 0);
5234 rb_raise(rb_eIndexError,
"string not matched");
5237 rb_str_splice(str, beg, str_strlen(indx, NULL), val);
5245 rb_str_splice(str, beg, len, val);
5253 rb_str_splice(str, idx, 1, val);
5283rb_str_aset_m(
int argc, VALUE *argv, VALUE str)
5287 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5295 return rb_str_aset(str, argv[0], argv[1]);
5317rb_str_insert(VALUE str, VALUE idx, VALUE str2)
5327 rb_str_splice(str, pos, 0, str2);
5357rb_str_slice_bang(
int argc, VALUE *argv, VALUE str)
5359 VALUE result =
Qnil;
5365 str_modify_keep_cr(str);
5373 if ((nth += regs->num_regs) <= 0)
return Qnil;
5375 else if (nth >= regs->num_regs)
return Qnil;
5377 len = END(nth) - beg;
5380 else if (argc == 2) {
5388 if (!len)
return Qnil;
5393 beg = rb_str_index(str, indx, 0);
5394 if (beg == -1)
return Qnil;
5406 if (!len)
return Qnil;
5420 rb_enc_cr_str_copy_for_substr(result, str);
5430 if (beg + len > slen)
5434 slen - (beg + len));
5436 STR_SET_LEN(str, slen);
5437 TERM_FILL(&sptr[slen], TERM_LEN(str));
5448 switch (OBJ_BUILTIN_TYPE(pat)) {
5467get_pat_quoted(VALUE pat,
int check)
5471 switch (OBJ_BUILTIN_TYPE(pat)) {
5485 if (check && is_broken_string(pat)) {
5492rb_pat_search(VALUE pat, VALUE str,
long pos,
int set_backref_str)
5495 pos = rb_strseq_index(str, pat, pos, 1);
5496 if (set_backref_str) {
5498 str = rb_str_new_frozen_String(str);
5499 rb_backref_set_string(str, pos,
RSTRING_LEN(pat));
5508 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
5528rb_str_sub_bang(
int argc, VALUE *argv, VALUE str)
5530 VALUE pat, repl, hash =
Qnil;
5548 pat = get_pat_quoted(argv[0], 1);
5550 str_modifiable(str);
5551 beg = rb_pat_search(pat, str, 0, 1);
5556 VALUE match, match0 =
Qnil;
5574 if (iter || !
NIL_P(hash)) {
5584 str_mod_check(str, p, len);
5597 rb_raise(rb_eEncCompatError,
"incompatible character encodings: %s and %s",
5601 enc = STR_ENC_GET(repl);
5617 RESIZE_CAPA(str, len + rlen - plen);
5621 memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
5624 memmove(p + beg0, rp, rlen);
5626 STR_SET_LEN(str, len);
5651rb_str_sub(
int argc, VALUE *argv, VALUE str)
5654 rb_str_sub_bang(argc, argv, str);
5659str_gsub(
int argc, VALUE *argv, VALUE str,
int bang)
5661 VALUE pat, val =
Qnil, repl, match, match0 =
Qnil, dest, hash =
Qnil;
5663 long beg, beg0, end0;
5664 long offset, blen, slen, len, last;
5665 enum {STR, ITER, MAP} mode = STR;
5667 int need_backref = -1;
5686 rb_error_arity(argc, 1, 2);
5689 pat = get_pat_quoted(argv[0], 1);
5690 beg = rb_pat_search(pat, str, 0, need_backref);
5692 if (bang)
return Qnil;
5702 str_enc = STR_ENC_GET(str);
5728 str_mod_check(str, sp, slen);
5730 rb_raise(rb_eRuntimeError,
"block should not cheat");
5733 else if (need_backref) {
5735 if (need_backref < 0) {
5736 need_backref = val != repl;
5743 len = beg0 - offset;
5760 offset = end0 + len;
5764 beg = rb_pat_search(pat, str, offset, need_backref);
5769 rb_pat_search(pat, str, last, 1);
5771 str_shared_replace(str, dest);
5799rb_str_gsub_bang(
int argc, VALUE *argv, VALUE str)
5801 str_modify_keep_cr(str);
5802 return str_gsub(argc, argv, str, 1);
5823rb_str_gsub(
int argc, VALUE *argv, VALUE str)
5825 return str_gsub(argc, argv, str, 0);
5843 str_modifiable(str);
5844 if (str == str2)
return str;
5848 return str_replace(str, str2);
5863rb_str_clear(VALUE str)
5867 STR_SET_EMBED_LEN(str, 0);
5888rb_str_chr(VALUE str)
5906rb_str_getbyte(VALUE str, VALUE index)
5931rb_str_setbyte(VALUE str, VALUE index, VALUE value)
5935 char *ptr, *head, *left = 0;
5939 if (pos < -len || len <= pos)
5940 rb_raise(rb_eIndexError,
"index %ld out of string", pos);
5944 VALUE v = rb_to_int(value);
5945 VALUE w = rb_int_and(v,
INT2FIX(0xff));
5946 char byte = (char)(
NUM2INT(w) & 0xFF);
5948 if (!str_independent(str))
5949 str_make_independent(str);
5950 enc = STR_ENC_GET(str);
5953 if (!STR_EMBED_P(str)) {
5986str_byte_substr(VALUE str,
long beg,
long len,
int empty)
5992 if (beg > n || len < 0)
return Qnil;
5995 if (beg < 0)
return Qnil;
6000 if (!empty)
return Qnil;
6007 if (!STR_EMBEDDABLE_P(len, TERM_LEN(str)) && SHARABLE_SUBSTRING_P(beg, len, n)) {
6010 RSTRING(str2)->as.heap.ptr += beg;
6011 RSTRING(str2)->as.heap.len = len;
6017 str_enc_copy(str2, str);
6040str_byte_aref(VALUE str, VALUE indx)
6056 return str_byte_substr(str, beg, len, TRUE);
6061 return str_byte_substr(str, idx, 1, FALSE);
6108rb_str_byteslice(
int argc, VALUE *argv, VALUE str)
6113 return str_byte_substr(str, beg, end, TRUE);
6116 return str_byte_aref(str, argv[0]);
6130rb_str_reverse(VALUE str)
6138 enc = STR_ENC_GET(str);
6145 if (single_byte_optimizable(str)) {
6173 str_enc_copy(rev, str);
6193rb_str_reverse_bang(VALUE str)
6196 if (single_byte_optimizable(str)) {
6199 str_modify_keep_cr(str);
6209 str_shared_replace(str, rb_str_reverse(str));
6213 str_modify_keep_cr(str);
6233rb_str_include(VALUE str, VALUE arg)
6238 i = rb_str_index(str, arg, 0);
6240 return RBOOL(i != -1);
6267rb_str_to_i(
int argc, VALUE *argv, VALUE str)
6272 rb_raise(rb_eArgError,
"invalid radix %d", base);
6298rb_str_to_f(VALUE str)
6300 return DBL2NUM(rb_str_to_dbl(str, FALSE));
6316rb_str_to_s(VALUE str)
6326str_cat_char(VALUE str,
unsigned int c,
rb_encoding *enc)
6328 char s[RUBY_MAX_CHAR_LEN];
6336#define CHAR_ESC_LEN 13
6339rb_str_buf_cat_escaped_char(VALUE result,
unsigned int c,
int unicode_p)
6341 char buf[CHAR_ESC_LEN + 1];
6349 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
6351 else if (c < 0x10000) {
6352 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
6355 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
6360 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
6363 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
6366 l = (int)strlen(buf);
6372ruby_escaped_char(
int c)
6375 case '\0':
return "\\0";
6376 case '\n':
return "\\n";
6377 case '\r':
return "\\r";
6378 case '\t':
return "\\t";
6379 case '\f':
return "\\f";
6380 case '\013':
return "\\v";
6381 case '\010':
return "\\b";
6382 case '\007':
return "\\a";
6383 case '\033':
return "\\e";
6384 case '\x7f':
return "\\c?";
6390rb_str_escape(VALUE str)
6396 const char *prev = p;
6397 char buf[CHAR_ESC_LEN + 1];
6407 if (p > prev) str_buf_cat(result, prev, p - prev);
6410 n = (int)(pend - p);
6412 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6413 str_buf_cat(result, buf, strlen(buf));
6421 cc = ruby_escaped_char(c);
6423 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6424 str_buf_cat(result, cc, strlen(cc));
6430 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6431 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6435 if (p > prev) str_buf_cat(result, prev, p - prev);
6460 const char *p, *pend, *prev;
6461 char buf[CHAR_ESC_LEN + 1];
6470 str_buf_cat2(result,
"\"");
6474 actenc = get_actual_encoding(encidx, str);
6475 if (actenc != enc) {
6485 if (p > prev) str_buf_cat(result, prev, p - prev);
6488 n = (int)(pend - p);
6490 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6491 str_buf_cat(result, buf, strlen(buf));
6499 if ((asciicompat || unicode_p) &&
6500 (c ==
'"'|| c ==
'\\' ||
6505 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
6506 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6507 str_buf_cat2(result,
"\\");
6508 if (asciicompat || enc == resenc) {
6514 case '\n': cc =
'n';
break;
6515 case '\r': cc =
'r';
break;
6516 case '\t': cc =
't';
break;
6517 case '\f': cc =
'f';
break;
6518 case '\013': cc =
'v';
break;
6519 case '\010': cc =
'b';
break;
6520 case '\007': cc =
'a';
break;
6521 case 033: cc =
'e';
break;
6522 default: cc = 0;
break;
6525 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6528 str_buf_cat(result, buf, 2);
6537 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6538 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6543 if (p > prev) str_buf_cat(result, prev, p - prev);
6544 str_buf_cat2(result,
"\"");
6549#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
6572 const char *p, *pend;
6576 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
6581 len += strlen(enc->name);
6587 unsigned char c = *p++;
6590 case '"':
case '\\':
6591 case '\n':
case '\r':
6592 case '\t':
case '\f':
6593 case '\013':
case '\010':
case '\007':
case '\033':
6598 clen = IS_EVSTR(p, pend) ? 2 : 1;
6606 if (u8 && c > 0x7F) {
6612 else if (cc <= 0xFFFFF)
6625 if (clen > LONG_MAX - len) {
6626 rb_raise(rb_eRuntimeError,
"string size too big");
6637 unsigned char c = *p++;
6639 if (c ==
'"' || c ==
'\\') {
6643 else if (c ==
'#') {
6644 if (IS_EVSTR(p, pend)) *q++ =
'\\';
6647 else if (c ==
'\n') {
6651 else if (c ==
'\r') {
6655 else if (c ==
'\t') {
6659 else if (c ==
'\f') {
6663 else if (c ==
'\013') {
6667 else if (c ==
'\010') {
6671 else if (c ==
'\007') {
6675 else if (c ==
'\033') {
6690 snprintf(q, qend-q,
"u%04X", cc);
6692 snprintf(q, qend-q,
"u{%X}", cc);
6697 snprintf(q, qend-q,
"x%02X", c);
6704 snprintf(q, qend-q, nonascii_suffix, enc->name);
6714unescape_ascii(
unsigned int c)
6738undump_after_backslash(VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
6740 const char *s = *ss;
6744 unsigned char buf[6];
6762 *buf = unescape_ascii(*s);
6768 rb_raise(rb_eRuntimeError,
"hex escape and Unicode escape are mixed");
6772 rb_raise(rb_eRuntimeError,
"invalid Unicode escape");
6775 if (*penc != enc_utf8) {
6783 rb_raise(rb_eRuntimeError,
"unterminated Unicode escape");
6794 if (hexlen == 0 || hexlen > 6) {
6795 rb_raise(rb_eRuntimeError,
"invalid Unicode escape");
6798 rb_raise(rb_eRuntimeError,
"invalid Unicode codepoint (too large)");
6800 if (0xd800 <= c && c <= 0xdfff) {
6801 rb_raise(rb_eRuntimeError,
"invalid Unicode codepoint");
6811 rb_raise(rb_eRuntimeError,
"invalid Unicode escape");
6813 if (0xd800 <= c && c <= 0xdfff) {
6814 rb_raise(rb_eRuntimeError,
"invalid Unicode codepoint");
6823 rb_raise(rb_eRuntimeError,
"hex escape and Unicode escape are mixed");
6827 rb_raise(rb_eRuntimeError,
"invalid hex escape");
6831 rb_raise(rb_eRuntimeError,
"invalid hex escape");
6844static VALUE rb_str_is_ascii_only_p(VALUE str);
6862str_undump(VALUE str)
6869 bool binary =
false;
6873 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
6874 rb_raise(rb_eRuntimeError,
"non-ASCII character detected");
6876 if (!str_null_check(str, &w)) {
6877 rb_raise(rb_eRuntimeError,
"string contains null byte");
6880 if (*s !=
'"')
goto invalid_format;
6887 rb_raise(rb_eRuntimeError,
"unterminated dumped string");
6898 static const char force_encoding_suffix[] =
".force_encoding(\"";
6899 static const char dup_suffix[] =
".dup";
6900 const char *encname;
6905 size =
sizeof(dup_suffix) - 1;
6906 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
6908 size =
sizeof(force_encoding_suffix) - 1;
6909 if (s_end - s <= size)
goto invalid_format;
6910 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
6914 rb_raise(rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
6918 s = memchr(s,
'"', s_end-s);
6920 if (!s)
goto invalid_format;
6921 if (s_end - s != 2)
goto invalid_format;
6922 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
6924 encidx = rb_enc_find_index2(encname, (
long)size);
6926 rb_raise(rb_eRuntimeError,
"dumped string has unknown encoding name");
6936 rb_raise(rb_eRuntimeError,
"invalid escape");
6938 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
6947 rb_raise(rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
6954 rb_raise(rb_eEncCompatError,
"incompatible encoding with this operation: %s",
6960str_true_enc(VALUE str)
6963 rb_str_check_dummy_enc(enc);
6967static OnigCaseFoldType
6968check_case_options(
int argc, VALUE *argv, OnigCaseFoldType flags)
6973 rb_raise(rb_eArgError,
"too many options");
6974 if (argv[0]==sym_turkic) {
6975 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
6977 if (argv[1]==sym_lithuanian)
6978 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
6980 rb_raise(rb_eArgError,
"invalid second option");
6983 else if (argv[0]==sym_lithuanian) {
6984 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
6986 if (argv[1]==sym_turkic)
6987 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
6989 rb_raise(rb_eArgError,
"invalid second option");
6993 rb_raise(rb_eArgError,
"too many options");
6994 else if (argv[0]==sym_ascii)
6995 flags |= ONIGENC_CASE_ASCII_ONLY;
6996 else if (argv[0]==sym_fold) {
6997 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
6998 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7000 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7003 rb_raise(rb_eArgError,
"invalid option");
7008case_option_single_p(OnigCaseFoldType flags,
rb_encoding *enc, VALUE str)
7016#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7017#ifndef CASEMAP_DEBUG
7018# define CASEMAP_DEBUG 0
7026 OnigUChar space[FLEX_ARY_LEN];
7030mapping_buffer_free(
void *p)
7034 while (current_buffer) {
7035 previous_buffer = current_buffer;
7036 current_buffer = current_buffer->next;
7037 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7043 {0, mapping_buffer_free,}
7047rb_str_casemap(VALUE source, OnigCaseFoldType *flags,
rb_encoding *enc)
7051 const OnigUChar *source_current, *source_end;
7052 int target_length = 0;
7053 VALUE buffer_anchor;
7056 size_t buffer_count = 0;
7057 int buffer_length_or_invalid;
7066 while (source_current < source_end) {
7068 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7069 if (CASEMAP_DEBUG) {
7070 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n", capa);
7073 *pre_buffer = current_buffer;
7074 pre_buffer = ¤t_buffer->next;
7075 current_buffer->next = NULL;
7076 current_buffer->capa = capa;
7077 buffer_length_or_invalid = enc->case_map(flags,
7078 &source_current, source_end,
7079 current_buffer->space,
7080 current_buffer->space+current_buffer->capa,
7082 if (buffer_length_or_invalid < 0) {
7083 current_buffer =
DATA_PTR(buffer_anchor);
7085 mapping_buffer_free(current_buffer);
7086 rb_raise(rb_eArgError,
"input string invalid");
7088 target_length += current_buffer->used = buffer_length_or_invalid;
7090 if (CASEMAP_DEBUG) {
7091 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7094 if (buffer_count==1) {
7095 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7098 char *target_current;
7102 current_buffer =
DATA_PTR(buffer_anchor);
7103 while (current_buffer) {
7104 memcpy(target_current, current_buffer->space, current_buffer->used);
7105 target_current += current_buffer->used;
7106 current_buffer = current_buffer->next;
7109 current_buffer =
DATA_PTR(buffer_anchor);
7111 mapping_buffer_free(current_buffer);
7114 str_enc_copy(target, source);
7121rb_str_ascii_casemap(VALUE source, VALUE target, OnigCaseFoldType *flags,
rb_encoding *enc)
7123 const OnigUChar *source_current, *source_end;
7124 OnigUChar *target_current, *target_end;
7126 int length_or_invalid;
7128 if (old_length == 0)
return Qnil;
7132 if (source == target) {
7133 target_current = (OnigUChar*)source_current;
7134 target_end = (OnigUChar*)source_end;
7141 length_or_invalid = onigenc_ascii_only_case_map(flags,
7142 &source_current, source_end,
7143 target_current, target_end, enc);
7144 if (length_or_invalid < 0)
7145 rb_raise(rb_eArgError,
"input string invalid");
7146 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7147 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7148 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7149 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7150 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7153 str_enc_copy(target, source);
7159upcase_single(VALUE str)
7162 bool modified =
false;
7165 unsigned int c = *(
unsigned char*)s;
7167 if (
'a' <= c && c <=
'z') {
7168 *s =
'A' + (c -
'a');
7196rb_str_upcase_bang(
int argc, VALUE *argv, VALUE str)
7199 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7201 flags = check_case_options(argc, argv, flags);
7202 str_modify_keep_cr(str);
7203 enc = str_true_enc(str);
7204 if (case_option_single_p(flags, enc, str)) {
7205 if (upcase_single(str))
7206 flags |= ONIGENC_CASE_MODIFIED;
7208 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7209 rb_str_ascii_casemap(str, str, &flags, enc);
7211 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7213 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7235rb_str_upcase(
int argc, VALUE *argv, VALUE str)
7238 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7241 flags = check_case_options(argc, argv, flags);
7242 enc = str_true_enc(str);
7243 if (case_option_single_p(flags, enc, str)) {
7245 str_enc_copy(ret, str);
7248 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7250 rb_str_ascii_casemap(str, ret, &flags, enc);
7253 ret = rb_str_casemap(str, &flags, enc);
7260downcase_single(VALUE str)
7263 bool modified =
false;
7266 unsigned int c = *(
unsigned char*)s;
7268 if (
'A' <= c && c <=
'Z') {
7269 *s =
'a' + (c -
'A');
7298rb_str_downcase_bang(
int argc, VALUE *argv, VALUE str)
7301 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7303 flags = check_case_options(argc, argv, flags);
7304 str_modify_keep_cr(str);
7305 enc = str_true_enc(str);
7306 if (case_option_single_p(flags, enc, str)) {
7307 if (downcase_single(str))
7308 flags |= ONIGENC_CASE_MODIFIED;
7310 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7311 rb_str_ascii_casemap(str, str, &flags, enc);
7313 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7315 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7337rb_str_downcase(
int argc, VALUE *argv, VALUE str)
7340 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7343 flags = check_case_options(argc, argv, flags);
7344 enc = str_true_enc(str);
7345 if (case_option_single_p(flags, enc, str)) {
7347 str_enc_copy(ret, str);
7348 downcase_single(ret);
7350 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7352 rb_str_ascii_casemap(str, ret, &flags, enc);
7355 ret = rb_str_casemap(str, &flags, enc);
7383rb_str_capitalize_bang(
int argc, VALUE *argv, VALUE str)
7386 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7388 flags = check_case_options(argc, argv, flags);
7389 str_modify_keep_cr(str);
7390 enc = str_true_enc(str);
7392 if (flags&ONIGENC_CASE_ASCII_ONLY)
7393 rb_str_ascii_casemap(str, str, &flags, enc);
7395 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7397 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7421rb_str_capitalize(
int argc, VALUE *argv, VALUE str)
7424 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7427 flags = check_case_options(argc, argv, flags);
7428 enc = str_true_enc(str);
7430 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7432 rb_str_ascii_casemap(str, ret, &flags, enc);
7435 ret = rb_str_casemap(str, &flags, enc);
7462rb_str_swapcase_bang(
int argc, VALUE *argv, VALUE str)
7465 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7467 flags = check_case_options(argc, argv, flags);
7468 str_modify_keep_cr(str);
7469 enc = str_true_enc(str);
7470 if (flags&ONIGENC_CASE_ASCII_ONLY)
7471 rb_str_ascii_casemap(str, str, &flags, enc);
7473 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7475 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7499rb_str_swapcase(
int argc, VALUE *argv, VALUE str)
7502 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7505 flags = check_case_options(argc, argv, flags);
7506 enc = str_true_enc(str);
7508 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7510 rb_str_ascii_casemap(str, ret, &flags, enc);
7513 ret = rb_str_casemap(str, &flags, enc);
7518typedef unsigned char *USTR;
7522 unsigned int now, max;
7534 if (t->p == t->pend)
return -1;
7535 if (
rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
7540 if (
rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
7542 if (t->p < t->pend) {
7546 if (t->now < 0x80 && c < 0x80) {
7548 "invalid range \"%c-%c\" in string transliteration",
7552 rb_raise(rb_eArgError,
"invalid range in string transliteration");
7563 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
7564 if (t->now == t->max) {
7569 if (t->now < t->max) {
7580static VALUE rb_str_delete_bang(
int,VALUE*,VALUE);
7583tr_trans(VALUE str, VALUE src, VALUE repl,
int sflag)
7585 const unsigned int errc = -1;
7586 unsigned int trans[256];
7588 struct tr trsrc, trrepl;
7590 unsigned int c, c0, last = 0;
7591 int modify = 0, i, l;
7592 unsigned char *s, *send;
7594 int singlebyte = single_byte_optimizable(str);
7598#define CHECK_IF_ASCII(c) \
7599 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
7600 (cr = ENC_CODERANGE_VALID) : 0)
7606 return rb_str_delete_bang(1, &src, str);
7621 trsrc.p + l < trsrc.pend) {
7627 trsrc.gen = trrepl.gen = 0;
7628 trsrc.now = trrepl.now = 0;
7629 trsrc.max = trrepl.max = 0;
7632 for (i=0; i<256; i++) {
7635 while ((c = trnext(&trsrc, enc)) != errc) {
7644 while ((c = trnext(&trrepl, enc)) != errc)
7647 for (i=0; i<256; i++) {
7648 if (trans[i] != errc) {
7656 for (i=0; i<256; i++) {
7659 while ((c = trnext(&trsrc, enc)) != errc) {
7660 r = trnext(&trrepl, enc);
7661 if (r == errc) r = trrepl.now;
7675 str_modify_keep_cr(str);
7681 unsigned int save = -1;
7682 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
7697 if (cflag) c = last;
7700 else if (cflag) c = errc;
7706 if (c != (
unsigned int)-1) {
7718 if (enc != e1) may_modify = 1;
7720 if ((offset = t - buf) + tlen > max) {
7721 size_t MAYBE_UNUSED(old) = max + termlen;
7722 max = offset + tlen + (send - s);
7723 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
7727 if (may_modify && memcmp(s, t, tlen) != 0) {
7733 if (!STR_EMBED_P(str)) {
7734 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
7736 TERM_FILL((
char *)t, termlen);
7737 RSTRING(str)->as.heap.ptr = (
char *)buf;
7738 RSTRING(str)->as.heap.len = t - buf;
7739 STR_SET_NOEMBED(str);
7740 RSTRING(str)->as.heap.aux.capa = max;
7744 c = (
unsigned char)*s;
7745 if (trans[c] != errc) {
7762 long offset, max = (long)((send - s) * 1.2);
7763 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
7776 if (cflag) c = last;
7779 else if (cflag) c = errc;
7783 c = cflag ? last : errc;
7791 if (enc != e1) may_modify = 1;
7793 if ((offset = t - buf) + tlen > max) {
7794 size_t MAYBE_UNUSED(old) = max + termlen;
7795 max = offset + tlen + (long)((send - s) * 1.2);
7796 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
7801 if (may_modify && memcmp(s, t, tlen) != 0) {
7809 if (!STR_EMBED_P(str)) {
7810 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
7812 TERM_FILL((
char *)t, termlen);
7813 RSTRING(str)->as.heap.ptr = (
char *)buf;
7814 RSTRING(str)->as.heap.len = t - buf;
7815 STR_SET_NOEMBED(str);
7816 RSTRING(str)->as.heap.aux.capa = max;
7839rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
7841 return tr_trans(str, src, repl, 0);
7881rb_str_tr(VALUE str, VALUE src, VALUE repl)
7884 tr_trans(str, src, repl, 0);
7888#define TR_TABLE_MAX (UCHAR_MAX+1)
7889#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
7891tr_setup_table(VALUE str,
char stable[TR_TABLE_SIZE],
int first,
7894 const unsigned int errc = -1;
7895 char buf[TR_TABLE_MAX];
7898 VALUE table = 0, ptable = 0;
7899 int i, l, cflag = 0;
7902 tr.gen =
tr.now =
tr.max = 0;
7909 for (i=0; i<TR_TABLE_MAX; i++) {
7912 stable[TR_TABLE_MAX] = cflag;
7914 else if (stable[TR_TABLE_MAX] && !cflag) {
7915 stable[TR_TABLE_MAX] = 0;
7917 for (i=0; i<TR_TABLE_MAX; i++) {
7921 while ((c = trnext(&
tr, enc)) != errc) {
7922 if (c < TR_TABLE_MAX) {
7923 buf[(
unsigned char)c] = !cflag;
7928 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
7945 for (i=0; i<TR_TABLE_MAX; i++) {
7946 stable[i] = stable[i] && buf[i];
7948 if (!table && !cflag) {
7955tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
7957 if (c < TR_TABLE_MAX) {
7958 return table[c] != 0;
7972 return table[TR_TABLE_MAX] ? TRUE : FALSE;
7985rb_str_delete_bang(
int argc, VALUE *argv, VALUE str)
7987 char squeez[TR_TABLE_SIZE];
7990 VALUE del = 0, nodel = 0;
7992 int i, ascompat, cr;
7996 for (i=0; i<argc; i++) {
8001 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8004 str_modify_keep_cr(str);
8013 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8026 if (tr_find(c, squeez, del, nodel)) {
8037 TERM_FILL(t, TERM_LEN(str));
8041 if (modify)
return str;
8061rb_str_delete(
int argc, VALUE *argv, VALUE str)
8064 rb_str_delete_bang(argc, argv, str);
8078rb_str_squeeze_bang(
int argc, VALUE *argv, VALUE str)
8080 char squeez[TR_TABLE_SIZE];
8082 VALUE del = 0, nodel = 0;
8083 unsigned char *s, *send, *t;
8085 int ascompat, singlebyte = single_byte_optimizable(str);
8089 enc = STR_ENC_GET(str);
8092 for (i=0; i<argc; i++) {
8097 if (singlebyte && !single_byte_optimizable(s))
8099 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8103 str_modify_keep_cr(str);
8112 unsigned int c = *s++;
8113 if (c != save || (argc > 0 && !squeez[c])) {
8123 if (ascompat && (c = *s) < 0x80) {
8124 if (c != save || (argc > 0 && !squeez[c])) {
8132 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8142 TERM_FILL((
char *)t, TERM_LEN(str));
8148 if (modify)
return str;
8169rb_str_squeeze(
int argc, VALUE *argv, VALUE str)
8172 rb_str_squeeze_bang(argc, argv, str);
8186rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
8188 return tr_trans(str, src, repl, 1);
8206rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
8209 tr_trans(str, src, repl, 1);
8242rb_str_count(
int argc, VALUE *argv, VALUE str)
8244 char table[TR_TABLE_SIZE];
8246 VALUE del = 0, nodel = 0, tstr;
8261 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8262 !is_broken_string(str)) {
8270 if (*(
unsigned char*)s++ == c) n++;
8276 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8277 for (i=1; i<argc; i++) {
8281 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8291 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8300 if (tr_find(c, table, del, nodel)) {
8311rb_fs_check(VALUE val)
8315 if (
NIL_P(val))
return 0;
8320static const char isspacetable[256] = {
8321 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8322 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8323 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8325 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8335 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
8339#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
8342split_string(VALUE result, VALUE str,
long beg,
long len,
long empty_count)
8344 if (empty_count >= 0 && len == 0) {
8345 return empty_count + 1;
8347 if (empty_count > 0) {
8352 }
while (--empty_count > 0);
8356 rb_yield(str_new_empty_String(str));
8357 }
while (--empty_count > 0);
8371 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
8375literal_split_pattern(VALUE spat, split_type_t default_type)
8383 return SPLIT_TYPE_CHARS;
8386 if (len == 1 && ptr[0] ==
' ') {
8387 return SPLIT_TYPE_AWK;
8392 if (
rb_enc_ascget(ptr, ptr + len, &l, enc) ==
' ' && len == l) {
8393 return SPLIT_TYPE_AWK;
8396 return default_type;
8455rb_str_split_m(
int argc, VALUE *argv, VALUE str)
8460 split_type_t split_type;
8461 long beg, end, i = 0, empty_count = -1;
8466 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
8468 if (lim <= 0) limit =
Qnil;
8469 else if (lim == 1) {
8481 if (
NIL_P(limit) && !lim) empty_count = 0;
8483 enc = STR_ENC_GET(str);
8484 split_type = SPLIT_TYPE_REGEXP;
8486 spat = get_pat_quoted(spat, 0);
8489 split_type = SPLIT_TYPE_AWK;
8491 else if (!(spat = rb_fs_check(spat))) {
8492 rb_raise(rb_eTypeError,
"value of $; must be String or Regexp");
8497 if (split_type != SPLIT_TYPE_AWK) {
8502 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
8503 if (split_type == SPLIT_TYPE_AWK) {
8505 split_type = SPLIT_TYPE_STRING;
8510 mustnot_broken(spat);
8511 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
8519#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
8525 if (split_type == SPLIT_TYPE_AWK) {
8531 if (is_ascii_string(str)) {
8532 while (ptr < eptr) {
8533 c = (
unsigned char)*ptr++;
8535 if (ascii_isspace(c)) {
8541 if (!
NIL_P(limit) && lim <= i)
break;
8544 else if (ascii_isspace(c)) {
8545 SPLIT_STR(beg, end-beg);
8548 if (!
NIL_P(limit)) ++i;
8556 while (ptr < eptr) {
8568 if (!
NIL_P(limit) && lim <= i)
break;
8572 SPLIT_STR(beg, end-beg);
8575 if (!
NIL_P(limit)) ++i;
8583 else if (split_type == SPLIT_TYPE_STRING) {
8584 char *str_start = ptr;
8585 char *substr_start = ptr;
8589 mustnot_broken(str);
8591 while (ptr < eptr &&
8592 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
8595 if (t != ptr + end) {
8599 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
8602 if (!
NIL_P(limit) && lim <= ++i)
break;
8604 beg = ptr - str_start;
8606 else if (split_type == SPLIT_TYPE_CHARS) {
8607 char *str_start = ptr;
8610 mustnot_broken(str);
8612 while (ptr < eptr &&
8614 SPLIT_STR(ptr - str_start, n);
8616 if (!
NIL_P(limit) && lim <= ++i)
break;
8618 beg = ptr - str_start;
8629 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (
void)0)) {
8634 if (start == end && BEG(0) == END(0)) {
8639 else if (last_null == 1) {
8653 SPLIT_STR(beg, end-beg);
8654 beg = start = END(0);
8658 for (idx=1; idx < regs->num_regs; idx++) {
8659 if (BEG(idx) == -1)
continue;
8660 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
8662 if (!
NIL_P(limit) && lim <= ++i)
break;
8664 if (match) rb_match_unbusy(match);
8670 return result ? result : str;
8680 return rb_str_split_m(1, &sep, str);
8683#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
8686enumerator_element(VALUE ary, VALUE e)
8698#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
8701chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
8726#define rb_rs get_rs()
8729rb_str_enumerate_lines(
int argc, VALUE *argv, VALUE str, VALUE ary)
8732 VALUE line, rs, orig = str, opts =
Qnil, chomp =
Qfalse;
8733 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
8734 long pos, len, rslen;
8740 static ID keywords[1];
8749 if (!ENUM_ELEM(ary, str)) {
8773 const char *eol = NULL;
8775 while (subend < pend) {
8781 if (eol == subend)
break;
8783 if (subptr) eol = subend;
8786 if (!subptr) subptr = subend;
8790 }
while (subend < pend);
8793 subend - subptr + (chomp ? 0 : rslen));
8794 if (ENUM_ELEM(ary, line)) {
8795 str_mod_check(str, ptr, len);
8797 subptr = eol = NULL;
8816 while (subptr < pend) {
8817 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
8821 if (hit != adjusted) {
8825 subend = hit += rslen;
8828 subend = chomp_newline(subptr, subend, enc);
8835 if (ENUM_ELEM(ary, line)) {
8836 str_mod_check(str, ptr, len);
8841 if (subptr != pend) {
8844 pend = chomp_newline(subptr, pend, enc);
8846 else if (pend - subptr >= rslen &&
8847 memcmp(pend - rslen, rsptr, rslen) == 0) {
8852 ENUM_ELEM(ary, line);
8911rb_str_each_line(
int argc, VALUE *argv, VALUE str)
8914 return rb_str_enumerate_lines(argc, argv, str, 0);
8937rb_str_lines(
int argc, VALUE *argv, VALUE str)
8939 VALUE ary = WANTARRAY(
"lines", 0);
8940 return rb_str_enumerate_lines(argc, argv, str, ary);
8944rb_str_each_byte_size(VALUE str, VALUE args, VALUE eobj)
8950rb_str_enumerate_bytes(VALUE str, VALUE ary)
8979rb_str_each_byte(VALUE str)
8982 return rb_str_enumerate_bytes(str, 0);
8997rb_str_bytes(VALUE str)
9000 return rb_str_enumerate_bytes(str, ary);
9004rb_str_each_char_size(VALUE str, VALUE args, VALUE eobj)
9010rb_str_enumerate_chars(VALUE str, VALUE ary)
9023 for (i = 0; i < len; i += n) {
9029 for (i = 0; i < len; i += n) {
9057rb_str_each_char(VALUE str)
9060 return rb_str_enumerate_chars(str, 0);
9075rb_str_chars(VALUE str)
9078 return rb_str_enumerate_chars(str, ary);
9082rb_str_enumerate_codepoints(VALUE str, VALUE ary)
9087 const char *ptr, *end;
9090 if (single_byte_optimizable(str))
9091 return rb_str_enumerate_bytes(str, ary);
9096 enc = STR_ENC_GET(str);
9131rb_str_each_codepoint(VALUE str)
9134 return rb_str_enumerate_codepoints(str, 0);
9150rb_str_codepoints(VALUE str)
9153 return rb_str_enumerate_codepoints(str, ary);
9160 regex_t *reg_grapheme_cluster = NULL;
9161 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9165 reg_grapheme_cluster = reg_grapheme_cluster_utf8;
9167 if (!reg_grapheme_cluster) {
9168 const OnigUChar source_ascii[] =
"\\X";
9170 const OnigUChar *source = source_ascii;
9171 size_t source_len =
sizeof(source_ascii) - 1;
9173#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9174#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9175#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9176#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9177#define CASE_UTF(e) \
9178 case ENCINDEX_UTF_##e: { \
9179 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9180 source = source_UTF_##e; \
9181 source_len = sizeof(source_UTF_##e); \
9184 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9191 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9192 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9194 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9195 onig_error_code_to_str(message, r, &einfo);
9196 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9199 reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
9202 return reg_grapheme_cluster;
9206rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
9208 size_t grapheme_cluster_count = 0;
9209 regex_t *reg_grapheme_cluster = NULL;
9211 const char *ptr, *end;
9217 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9222 OnigPosition len = onig_match(reg_grapheme_cluster,
9223 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9224 (
const OnigUChar *)ptr, NULL, 0);
9225 if (len <= 0)
break;
9226 grapheme_cluster_count++;
9230 return SIZET2NUM(grapheme_cluster_count);
9234rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
9237 regex_t *reg_grapheme_cluster = NULL;
9239 const char *ptr0, *ptr, *end;
9242 return rb_str_enumerate_chars(str, ary);
9246 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9251 OnigPosition len = onig_match(reg_grapheme_cluster,
9252 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9253 (
const OnigUChar *)ptr, NULL, 0);
9254 if (len <= 0)
break;
9281rb_str_each_grapheme_cluster(VALUE str)
9284 return rb_str_enumerate_grapheme_clusters(str, 0);
9299rb_str_grapheme_clusters(VALUE str)
9301 VALUE ary = WANTARRAY(
"grapheme_clusters",
rb_str_strlen(str));
9302 return rb_str_enumerate_grapheme_clusters(str, ary);
9306chopped_length(VALUE str)
9309 const char *p, *p2, *beg, *end;
9313 if (beg >= end)
return 0;
9333rb_str_chop_bang(VALUE str)
9335 str_modify_keep_cr(str);
9338 len = chopped_length(str);
9339 STR_SET_LEN(str, len);
9369rb_str_chop(VALUE str)
9375smart_chomp(VALUE str,
const char *e,
const char *p)
9394 if (--e > p && *(e-1) ==
'\r') {
9407chompped_length(VALUE str, VALUE rs)
9411 char *pp, *e, *rsptr;
9416 if (len == 0)
return 0;
9419 return smart_chomp(str, e, p);
9440 while (e > p && *(e-1) ==
'\n') {
9442 if (e > p && *(e-1) ==
'\r')
9448 if (rslen > len)
return len;
9451 newline = rsptr[rslen-1];
9454 if (newline ==
'\n')
9455 return smart_chomp(str, e, p);
9459 return smart_chomp(str, e, p);
9464 if (is_broken_string(rs)) {
9468 if (p[len-1] == newline &&
9470 memcmp(rsptr, pp, rslen) == 0)) {
9484chomp_rs(
int argc,
const VALUE *argv)
9498rb_str_chomp_string(VALUE str, VALUE rs)
9501 long len = chompped_length(str, rs);
9502 if (len >= olen)
return Qnil;
9503 str_modify_keep_cr(str);
9504 STR_SET_LEN(str, len);
9522rb_str_chomp_bang(
int argc, VALUE *argv, VALUE str)
9525 str_modifiable(str);
9527 rs = chomp_rs(argc, argv);
9529 return rb_str_chomp_string(str, rs);
9556rb_str_chomp(
int argc, VALUE *argv, VALUE str)
9558 VALUE rs = chomp_rs(argc, argv);
9564lstrip_offset(VALUE str,
const char *s,
const char *e,
rb_encoding *enc)
9566 const char *
const start = s;
9568 if (!s || s >= e)
return 0;
9571 if (single_byte_optimizable(str)) {
9572 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
9602rb_str_lstrip_bang(VALUE str)
9608 str_modify_keep_cr(str);
9609 enc = STR_ENC_GET(str);
9611 loffset = lstrip_offset(str, start, start+olen, enc);
9613 long len = olen-loffset;
9614 s = start + loffset;
9615 memmove(start, s, len);
9616 STR_SET_LEN(str, len);
9638rb_str_lstrip(VALUE str)
9643 loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
9644 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
9649rstrip_offset(VALUE str,
const char *s,
const char *e,
rb_encoding *enc)
9653 rb_str_check_dummy_enc(enc);
9654 if (!s || s >= e)
return 0;
9658 if (single_byte_optimizable(str)) {
9660 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
9690rb_str_rstrip_bang(VALUE str)
9696 str_modify_keep_cr(str);
9697 enc = STR_ENC_GET(str);
9699 roffset = rstrip_offset(str, start, start+olen, enc);
9701 long len = olen - roffset;
9703 STR_SET_LEN(str, len);
9725rb_str_rstrip(VALUE str)
9731 enc = STR_ENC_GET(str);
9733 roffset = rstrip_offset(str, start, start+olen, enc);
9735 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
9754rb_str_strip_bang(VALUE str)
9757 long olen, loffset, roffset;
9760 str_modify_keep_cr(str);
9761 enc = STR_ENC_GET(str);
9763 loffset = lstrip_offset(str, start, start+olen, enc);
9764 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9766 if (loffset > 0 || roffset > 0) {
9767 long len = olen-roffset;
9770 memmove(start, start + loffset, len);
9772 STR_SET_LEN(str, len);
9796rb_str_strip(VALUE str)
9799 long olen, loffset, roffset;
9803 loffset = lstrip_offset(str, start, start+olen, enc);
9804 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9806 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
9811scan_once(VALUE str, VALUE pat,
long *start,
int set_backref_str)
9813 VALUE result, match;
9816 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
9842 if (!regs || regs->num_regs == 1) {
9847 for (i=1; i < regs->num_regs; i++) {
9893rb_str_scan(VALUE str, VALUE pat)
9897 long last = -1, prev = 0;
9900 pat = get_pat_quoted(pat, 1);
9901 mustnot_broken(str);
9905 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
9910 if (last >= 0) rb_pat_search(pat, str, last, 1);
9915 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
9919 str_mod_check(str, p, len);
9921 if (last >= 0) rb_pat_search(pat, str, last, 1);
9941rb_str_hex(VALUE str)
9965rb_str_oct(VALUE str)
9975 rb_atomic_t initialized;
9980crypt_mutex_destroy(
void)
9984 crypt_mutex.initialized = 0;
9988crypt_mutex_initialize(
void)
9995 atexit(crypt_mutex_destroy);
10002 rb_bug(
"crypt_mutex.initialized: %d->%d", i, crypt_mutex.initialized);
10068rb_str_crypt(VALUE str, VALUE salt)
10073# define CRYPT_END() ALLOCV_END(databuf)
10075 extern char *crypt(
const char *,
const char *);
10076# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10079 const char *s, *saltp;
10082 char salt_8bit_clean[3];
10086 mustnot_wchar(str);
10087 mustnot_wchar(salt);
10090 if (
RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10091 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10095 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10096 salt_8bit_clean[0] = saltp[0] & 0x7f;
10097 salt_8bit_clean[1] = saltp[1] & 0x7f;
10098 salt_8bit_clean[2] =
'\0';
10099 saltp = salt_8bit_clean;
10104# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10105 data->initialized = 0;
10107 res = crypt_r(s, saltp, data);
10109 crypt_mutex_initialize();
10111 res = crypt(s, saltp);
10153rb_str_sum(
int argc, VALUE *argv, VALUE str)
10156 char *ptr, *p, *pend;
10159 unsigned long sum0 = 0;
10171 str_mod_check(str, ptr, len);
10174 sum0 += (
unsigned char)*p;
10185 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10186 sum0 &= (((
unsigned long)1)<<bits)-1;
10206rb_str_justify(
int argc, VALUE *argv, VALUE str,
char jflag)
10210 long width, len, flen = 1, fclen = 1;
10213 const char *f =
" ";
10214 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10216 int singlebyte = 1, cr;
10220 enc = STR_ENC_GET(str);
10228 fclen = str_strlen(pad, enc);
10229 singlebyte = single_byte_optimizable(pad);
10230 if (flen == 0 || fclen == 0) {
10231 rb_raise(rb_eArgError,
"zero width padding");
10234 len = str_strlen(str, enc);
10235 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10237 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10241 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10242 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10245 if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10246 (len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10247 (len += llen2 + rlen2) >= LONG_MAX - size) {
10248 rb_raise(rb_eArgError,
"argument too big");
10251 res = str_new0(
rb_cString, 0, len, termlen);
10254 memset(p, *f, llen);
10258 while (llen >= fclen) {
10264 memcpy(p, f, llen2);
10271 memset(p, *f, rlen);
10275 while (rlen >= fclen) {
10281 memcpy(p, f, rlen2);
10285 TERM_FILL(p, termlen);
10312rb_str_ljust(
int argc, VALUE *argv, VALUE str)
10314 return rb_str_justify(argc, argv, str,
'l');
10332rb_str_rjust(
int argc, VALUE *argv, VALUE str)
10334 return rb_str_justify(argc, argv, str,
'r');
10352rb_str_center(
int argc, VALUE *argv, VALUE str)
10354 return rb_str_justify(argc, argv, str,
'c');
10373rb_str_partition(VALUE str, VALUE sep)
10377 sep = get_pat_quoted(sep, 0);
10389 pos = rb_str_index(str, sep, 0);
10390 if (pos < 0)
goto failed;
10398 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
10431rb_str_rpartition(VALUE str, VALUE sep)
10435 sep = get_pat_quoted(sep, 0);
10448 pos = rb_str_rindex(str, sep, pos);
10460 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
10479rb_str_start_with(
int argc, VALUE *argv, VALUE str)
10483 for (i=0; i<argc; i++) {
10484 VALUE tmp = argv[i];
10486 if (rb_reg_start_with_p(tmp, str))
10514rb_str_end_with(
int argc, VALUE *argv, VALUE str)
10520 for (i=0; i<argc; i++) {
10521 VALUE tmp = argv[i];
10548deleted_prefix_length(VALUE str, VALUE prefix)
10550 char *strptr, *prefixptr;
10551 long olen, prefixlen;
10554 if (is_broken_string(prefix))
return 0;
10559 if (prefixlen <= 0)
return 0;
10561 if (olen < prefixlen)
return 0;
10564 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
10581rb_str_delete_prefix_bang(VALUE str, VALUE prefix)
10584 str_modify_keep_cr(str);
10586 prefixlen = deleted_prefix_length(str, prefix);
10587 if (prefixlen <= 0)
return Qnil;
10603rb_str_delete_prefix(VALUE str, VALUE prefix)
10607 prefixlen = deleted_prefix_length(str, prefix);
10608 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
10623deleted_suffix_length(VALUE str, VALUE suffix)
10625 char *strptr, *suffixptr, *s;
10626 long olen, suffixlen;
10630 if (is_broken_string(suffix))
return 0;
10635 if (suffixlen <= 0)
return 0;
10637 if (olen < suffixlen)
return 0;
10640 s = strptr + olen - suffixlen;
10641 if (memcmp(s, suffixptr, suffixlen) != 0)
return 0;
10659rb_str_delete_suffix_bang(VALUE str, VALUE suffix)
10661 long olen, suffixlen, len;
10662 str_modifiable(str);
10664 suffixlen = deleted_suffix_length(str, suffix);
10665 if (suffixlen <= 0)
return Qnil;
10668 str_modify_keep_cr(str);
10669 len = olen - suffixlen;
10670 STR_SET_LEN(str, len);
10671 TERM_FILL(&
RSTRING_PTR(str)[len], TERM_LEN(str));
10689rb_str_delete_suffix(VALUE str, VALUE suffix)
10693 suffixlen = deleted_suffix_length(str, suffix);
10694 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
10703 rb_raise(rb_eTypeError,
"value of %"PRIsVALUE
" must be String",
rb_id2str(
id));
10709rb_fs_setter(VALUE val, ID
id, VALUE *var)
10711 val = rb_fs_check(val);
10714 "value of %"PRIsVALUE
" must be String or Regexp",
10718 rb_warn_deprecated(
"`$;'", NULL);
10732rb_str_force_encoding(VALUE str, VALUE enc)
10734 str_modifiable(str);
10751 if (
FL_TEST(str, STR_NOEMBED)) {
10757 str_replace_shared_without_enc(str2, str);
10774rb_str_valid_encoding_p(VALUE str)
10792rb_str_is_ascii_only_p(VALUE str)
10802 static const char ellipsis[] =
"...";
10803 const long ellipsislen =
sizeof(ellipsis) - 1;
10806 const char *
const p =
RSTRING_PTR(str), *e = p + blen;
10807 VALUE estr, ret = 0;
10809 if (len < 0)
rb_raise(rb_eIndexError,
"negative length %ld", len);
10811 (e =
rb_enc_nth(p, e, len, enc)) - p == blen) {
10814 else if (len <= ellipsislen ||
10843 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
10848 rb_raise(rb_eEncCompatError,
"incompatible character encodings: %s and %s",
10855static VALUE enc_str_scrub(
rb_encoding *enc, VALUE str, VALUE repl,
int cr);
10865rb_enc_str_scrub(
rb_encoding *enc, VALUE str, VALUE repl)
10868 if (enc == STR_ENC_GET(str)) {
10873 return enc_str_scrub(enc, str, repl, cr);
10877enc_str_scrub(
rb_encoding *enc, VALUE str, VALUE repl,
int cr)
10881 const char *rep, *p, *e, *p1, *sp;
10887 rb_raise(rb_eArgError,
"both of block and replacement given");
10894 if (!
NIL_P(repl)) {
10895 repl = str_compat_and_valid(repl, enc);
10903#define DEFAULT_REPLACE_CHAR(str) do { \
10904 static const char replace[sizeof(str)-1] = str; \
10905 rep = replace; replen = (int)sizeof(replace); \
10920 else if (!
NIL_P(repl)) {
10926 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
10930 DEFAULT_REPLACE_CHAR(
"?");
10935 p = search_nonascii(p, e);
10959 if (e - p < clen) clen = e - p;
10966 for (; clen > 1; clen--) {
10979 str_mod_check(str, sp, slen);
10980 repl = str_compat_and_valid(repl, enc);
10987 p = search_nonascii(p, e);
11014 str_mod_check(str, sp, slen);
11015 repl = str_compat_and_valid(repl, enc);
11028 else if (!
NIL_P(repl)) {
11032 else if (encidx == ENCINDEX_UTF_16BE) {
11033 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11035 else if (encidx == ENCINDEX_UTF_16LE) {
11036 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11038 else if (encidx == ENCINDEX_UTF_32BE) {
11039 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11041 else if (encidx == ENCINDEX_UTF_32LE) {
11042 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11045 DEFAULT_REPLACE_CHAR(
"?");
11062 if (e - p < clen) clen = e - p;
11063 if (clen <= mbminlen * 2) {
11068 for (; clen > mbminlen; clen-=mbminlen) {
11080 str_mod_check(str, sp, slen);
11081 repl = str_compat_and_valid(repl, enc);
11107 str_mod_check(str, sp, slen);
11108 repl = str_compat_and_valid(repl, enc);
11133str_scrub(
int argc, VALUE *argv, VALUE str)
11155str_scrub_bang(
int argc, VALUE *argv, VALUE str)
11163static ID id_normalize;
11164static ID id_normalized_p;
11165static VALUE mUnicodeNormalize;
11168unicode_normalize_common(
int argc, VALUE *argv, VALUE str, ID
id)
11170 static int UnicodeNormalizeRequired = 0;
11173 if (!UnicodeNormalizeRequired) {
11174 rb_require(
"unicode_normalize/normalize.rb");
11175 UnicodeNormalizeRequired = 1;
11179 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11205rb_str_unicode_normalize(
int argc, VALUE *argv, VALUE str)
11207 return unicode_normalize_common(argc, argv, str, id_normalize);
11218rb_str_unicode_normalize_bang(
int argc, VALUE *argv, VALUE str)
11220 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11241rb_str_unicode_normalized_p(
int argc, VALUE *argv, VALUE str)
11243 return unicode_normalize_common(argc, argv, str, id_normalized_p);
11376#define sym_equal rb_obj_equal
11379sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
11395rb_str_symname_p(VALUE sym)
11403 enc = STR_ENC_GET(sym);
11406 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (
long)strlen(ptr) ||
11414rb_str_quote_unprintable(VALUE str)
11424 enc = STR_ENC_GET(str);
11427 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
11428 !sym_printable(ptr, ptr + len, enc)) {
11429 return rb_str_escape(str);
11434MJIT_FUNC_EXPORTED VALUE
11435rb_id_quote_unprintable(ID
id)
11438 if (!rb_str_symname_p(str)) {
11439 return rb_str_escape(str);
11454sym_inspect(VALUE sym)
11461 if (!rb_str_symname_p(str)) {
11466 memmove(dest + 1, dest, len);
11473 memcpy(dest + 1, ptr, len);
11533sym_to_sym(VALUE sym)
11538MJIT_FUNC_EXPORTED VALUE
11539rb_sym_proc_call(ID mid,
int argc,
const VALUE *argv,
int kw_splat, VALUE passed_proc)
11544 rb_raise(rb_eArgError,
"no receiver given");
11561rb_sym_to_proc(VALUE sym)
11595sym_cmp(VALUE sym, VALUE other)
11630sym_casecmp(VALUE sym, VALUE other)
11666sym_casecmp_p(VALUE sym, VALUE other)
11682sym_match(VALUE sym, VALUE other)
11684 return rb_str_match(
rb_sym2str(sym), other);
11696sym_match_m(
int argc, VALUE *argv, VALUE sym)
11698 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
11710sym_match_m_p(
int argc, VALUE *argv, VALUE sym)
11712 return rb_str_match_m_p(argc, argv, sym);
11726sym_aref(
int argc, VALUE *argv, VALUE sym)
11728 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
11740sym_length(VALUE sym)
11753sym_empty(VALUE sym)
11769sym_upcase(
int argc, VALUE *argv, VALUE sym)
11787sym_downcase(
int argc, VALUE *argv, VALUE sym)
11803sym_capitalize(
int argc, VALUE *argv, VALUE sym)
11819sym_swapcase(
int argc, VALUE *argv, VALUE sym)
11840sym_start_with(
int argc, VALUE *argv, VALUE sym)
11842 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
11859sym_end_with(
int argc, VALUE *argv, VALUE sym)
11861 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
11872sym_encoding(VALUE sym)
11878string_for_symbol(VALUE name)
11883 rb_raise(rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol",
11897 name = string_for_symbol(name);
11907 name = string_for_symbol(name);
11928sym_all_symbols(VALUE
_)
11936 return rb_fstring(str);
11943 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), TRUE);
11955 if (UNLIKELY(rb_enc_autoload_p(enc))) {
11956 rb_enc_autoload(enc);
11960 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), TRUE);
12366 assert(rb_vm_fstring_table());
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ATOMIC_CAS(var, oldval, newval)
Atomic compare-and-swap.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isascii(), except it additionally takes an encoding.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implenentation detail of RB_OBJ_FROZEN().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a method.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define OBJ_FREEZE_RAW
Old name of RB_OBJ_FREEZE_RAW.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t cat, const char *fmt,...)
Identical to rb_category_warning(), except it reports always regardless of runtime -W flag.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
void rb_fatal(const char *fmt,...)
Raises the unsung "fatal" exception.
VALUE rb_ensure(VALUE(*b_proc)(VALUE), VALUE data1, VALUE(*e_proc)(VALUE), VALUE data2)
An equivalent to ensure clause.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
VALUE rb_cSymbol
Sumbol class.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
int rb_enc_dummy_p(rb_encoding *enc)
Queries if the passed encoding is dummy.
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
int rb_enc_get_index(VALUE obj)
Queries the index of the encoding of the passed object, if any.
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Identical to rb_enc_associate(), except it takes an encoding itself instead of its index.
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
rb_encoding * rb_ascii8bit_encoding(void)
Queries the encoding that represents ASCII-8BIT a.k.a.
int rb_enc_codelen(int code, rb_encoding *enc)
Queries the number of bytes requested to represent the passed code point using the passed encoding.
rb_encoding * rb_to_encoding(VALUE obj)
Identical to rb_find_encoding(), except it raises an exception instead of returning NULL.
rb_encoding * rb_filesystem_encoding(void)
Queries the "filesystem" encoding.
static const char * rb_enc_name(rb_encoding *enc)
Queries the (canonical) name of the passed encoding.
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
void rb_enc_copy(VALUE dst, VALUE src)
Destructively copies the encoding of the latter object to that of former one.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_mbclen() unless the character at p overruns e.
rb_encoding * rb_enc_get(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
rb_encoding * rb_enc_from_index(int idx)
Identical to rb_find_encoding(), except it takes an encoding index instead of a Ruby object.
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_enc_unicode_p(rb_encoding *enc)
Queries if the passed encoding is either one of UTF-8/16/32.
static char * rb_enc_prev_char(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the previous (left) character.
int rb_enc_to_index(rb_encoding *enc)
Queries the index of the encoding.
void rb_enc_set_index(VALUE obj, int encindex)
Destructively assigns an encoding (via its index) to an object.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
VALUE rb_enc_from_encoding(rb_encoding *enc)
Queries the Ruby-level counterpart instance of rb_cEncoding that corresponds to the passed encoding.
static bool rb_enc_asciicompat(rb_encoding *enc)
Queries if the passed encoding is in some sense compatible with ASCII.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc)
Identical to rb_enc_uint_chr(), except it writes back to the passed buffer instead of allocating one.
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Identical to rb_enc_compatible(), except it raises an exception instead of returning NULL.
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
VALUE rb_enc_associate_index(VALUE obj, int encindex)
Identical to rb_enc_set_index(), except it additionally does contents fix-up depending on the passed ...
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Look for the "common" encoding between the two.
rb_encoding * rb_locale_encoding(void)
Queries the encoding that represents the current locale.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it additionally takes an encoding.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_obj_encoding(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcallv(VALUE recv, ID mid, int argc, const VALUE *argv)
Identical to rb_funcall(), except it takes the method arguments as a C array.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
void rb_gc_register_address(VALUE *valptr)
Inform the garbage collector that valptr points to a live Ruby object that should not be moved.
VALUE rb_check_array_type(VALUE obj)
Try converting an object to its array representation using its to_ary method, if any.
VALUE rb_ary_new(void)
Allocates a new, empty array.
VALUE rb_ary_push(VALUE ary, VALUE elem)
Special case of rb_ary_cat() that it adds only one element.
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
Identical to rb_cstr2inum(), except it takes Ruby's strings instead of C's.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
#define rb_check_frozen
Just another name of rb_check_frozen.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_check_hash_type(VALUE obj)
Try converting an object to its hash representation using its to_hash method, if any.
VALUE rb_hash_aref(VALUE hash, VALUE key)
Queries the given key in the given hash table.
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Inserts or replaces ("upsert"s) the objects into the given hash table.
VALUE rb_hash_lookup(VALUE hash, VALUE key)
Identical to rb_hash_aref(), except it always returns RUBY_Qnil for misshits.
VALUE rb_hash_new(void)
Creates a new, empty hash object.
VALUE rb_rs
The record separator character for inputs, or the $/.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
VALUE rb_utf8_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "UTF-8" encoding.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_utf8_str_new_cstr(const char *ptr)
Identical to rb_str_new_cstr(), except it generates a string of "UTF-8" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
VALUE rb_str_buf_cat(VALUE, const char *, long)
Just another name of rb_str_cat.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
VALUE rb_usascii_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
VALUE rb_usascii_str_new_cstr(const char *ptr)
Identical to rb_str_new_cstr(), except it generates a string of "US ASCII" encoding.
VALUE rb_str_buf_cat2(VALUE, const char *)
Just another name of rb_str_cat_cstr.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_str_buf_new_cstr(const char *ptr)
This is a rb_str_buf_new() + rb_str_buf_cat() combo.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
VALUE rb_tainted_str_new(const char *ptr, long len)
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_new(const char *ptr, long len)
Allocates an instance of rb_cString.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
VALUE rb_str_dup_frozen(VALUE)
Just another name of rb_str_new_frozen.
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
VALUE rb_locale_str_new_cstr(const char *ptr)
Identical to rb_locale_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_str_new_cstr(const char *ptr)
Identical to rb_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_tainted_str_new_cstr(const char *ptr)
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_external_str_new_cstr(const char *ptr)
Identical to rb_external_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
VALUE rb_str_cat_cstr(VALUE dst, const char *src)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
ID rb_intern(const char *name)
Finds or creates a symbol of the given name.
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
ID rb_intern_str(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
VALUE rb_id2str(ID id)
Identical to rb_id2name(), except it returns a Ruby's String instead of C's.
void rb_define_hooked_variable(const char *name, VALUE *var, rb_gvar_getter_t *getter, rb_gvar_setter_t *setter)
Identical to rb_define_virtual_variable(), but can also specify a storage.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
int st_foreach(st_table *q, int_type *w, st_data_t e)
Iteration over the given table.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
@ RSTRING_EMBED_LEN_MAX
Max possible number of characters that can be embedded.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static long RSTRING_EMBED_LEN(VALUE str)
Queries the length of the string.
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
long len
Length of the string, not including terminating NUL character.
char ary[RSTRING_EMBED_LEN_MAX+1]
When a string is short enough, it uses this area to store the contents themselves.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
void rb_nativethread_lock_initialize(rb_nativethread_lock_t *lock)
Fills the passed lock with an initial value.
void rb_nativethread_lock_destroy(rb_nativethread_lock_t *lock)
Destroys the passed mutex.
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
void ruby_xfree(void *ptr)
Deallocates a storage instance.