14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/encoding.h"
32#include "internal/error.h"
33#include "internal/gc.h"
34#include "internal/numeric.h"
35#include "internal/object.h"
36#include "internal/proc.h"
37#include "internal/re.h"
38#include "internal/sanitizers.h"
39#include "internal/string.h"
40#include "internal/transcode.h"
45#include "ruby_assert.h"
49#if defined HAVE_CRYPT_R
50# if defined HAVE_CRYPT_H
53#elif !defined HAVE_CRYPT
54# include "missing/crypt.h"
55# define HAVE_CRYPT_R 1
58#define BEG(no) (regs->beg[(no)])
59#define END(no) (regs->end[(no)])
62#undef rb_usascii_str_new
66#undef rb_usascii_str_new_cstr
67#undef rb_utf8_str_new_cstr
68#undef rb_enc_str_new_cstr
69#undef rb_external_str_new_cstr
70#undef rb_locale_str_new_cstr
71#undef rb_str_dup_frozen
72#undef rb_str_buf_new_cstr
126#define RUBY_MAX_CHAR_LEN 16
127#define STR_PRECOMPUTED_HASH FL_USER4
128#define STR_SHARED_ROOT FL_USER5
129#define STR_BORROWED FL_USER6
130#define STR_TMPLOCK FL_USER7
131#define STR_NOFREE FL_USER18
132#define STR_FAKESTR FL_USER19
134#define STR_SET_NOEMBED(str) do {\
135 FL_SET((str), STR_NOEMBED);\
136 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
138#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
140#define STR_SET_LEN(str, n) do { \
141 RSTRING(str)->len = (n); \
145str_encindex_fastpath(
int encindex)
149 case ENCINDEX_ASCII_8BIT:
151 case ENCINDEX_US_ASCII:
159str_enc_fastpath(
VALUE str)
164#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
165#define TERM_FILL(ptr, termlen) do {\
166 char *const term_fill_ptr = (ptr);\
167 const int term_fill_len = (termlen);\
168 *term_fill_ptr = '\0';\
169 if (UNLIKELY(term_fill_len > 1))\
170 memset(term_fill_ptr, 0, term_fill_len);\
173#define RESIZE_CAPA(str,capacity) do {\
174 const int termlen = TERM_LEN(str);\
175 RESIZE_CAPA_TERM(str,capacity,termlen);\
177#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
178 if (STR_EMBED_P(str)) {\
179 if (str_embed_capa(str) < capacity + termlen) {\
180 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
181 const long tlen = RSTRING_LEN(str);\
182 memcpy(tmp, RSTRING_PTR(str), tlen);\
183 RSTRING(str)->as.heap.ptr = tmp;\
184 RSTRING(str)->len = tlen;\
185 STR_SET_NOEMBED(str);\
186 RSTRING(str)->as.heap.aux.capa = (capacity);\
190 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
191 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
192 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
193 RSTRING(str)->as.heap.aux.capa = (capacity);\
197#define STR_SET_SHARED(str, shared_str) do { \
198 if (!FL_TEST(str, STR_FAKESTR)) { \
199 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
200 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
201 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
202 FL_SET((str), STR_SHARED); \
203 FL_SET((shared_str), STR_SHARED_ROOT); \
204 if (RBASIC_CLASS((shared_str)) == 0) \
205 FL_SET_RAW((shared_str), STR_BORROWED); \
209#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
210#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
213#define STR_ENC_GET(str) get_encoding(str)
215#if !defined SHARABLE_MIDDLE_SUBSTRING
216# define SHARABLE_MIDDLE_SUBSTRING 0
218#if !SHARABLE_MIDDLE_SUBSTRING
219#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
221#define SHARABLE_SUBSTRING_P(beg, len, end) 1
226str_embed_capa(
VALUE str)
228 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
232rb_str_reembeddable_p(
VALUE str)
234 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
238rb_str_embed_size(
long capa)
244rb_str_size_as_embedded(
VALUE str)
247 if (STR_EMBED_P(str)) {
248 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
252 else if (rb_str_reembeddable_p(str)) {
253 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
256 real_size =
sizeof(
struct RString);
260 real_size +=
sizeof(st_index_t);
267STR_EMBEDDABLE_P(
long len,
long termlen)
269 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
274static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
275static VALUE str_new_static(
VALUE klass,
const char *ptr,
long len,
int encindex);
277static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
278static inline void str_modifiable(
VALUE str);
283str_make_independent(
VALUE str)
286 int termlen = TERM_LEN(str);
287 str_make_independent_expand((str),
len, 0L, termlen);
290static inline int str_dependent_p(
VALUE str);
293rb_str_make_independent(
VALUE str)
295 if (str_dependent_p(str)) {
296 str_make_independent(str);
301rb_str_make_embedded(
VALUE str)
306 char *buf =
RSTRING(str)->as.heap.ptr;
310 STR_SET_LEN(str,
len);
317 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
321rb_debug_rstring_null_ptr(
const char *func)
323 fprintf(stderr,
"%s is returning NULL!! "
324 "SIGSEGV is highly expected to follow immediately.\n"
325 "If you could reproduce, attach your debugger here, "
326 "and look at the passed string.\n",
331static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
334get_encoding(
VALUE str)
340mustnot_broken(
VALUE str)
342 if (is_broken_string(str)) {
343 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
348mustnot_wchar(
VALUE str)
350 rb_encoding *enc = STR_ENC_GET(str);
351 if (rb_enc_mbminlen(enc) > 1) {
352 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
358static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
360#if SIZEOF_LONG == SIZEOF_VOIDP
361#define PRECOMPUTED_FAKESTR_HASH 1
365#ifdef PRECOMPUTED_FAKESTR_HASH
367fstring_hash(
VALUE str)
371 return (st_index_t)
RSTRING(str)->as.heap.aux.capa;
378#define fstring_hash rb_str_hash
381const struct st_hash_type rb_fstring_hash_type = {
386#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
388static inline st_index_t
389str_do_hash(
VALUE str)
391 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
393 if (e && !is_ascii_string(str)) {
400str_store_precomputed_hash(
VALUE str, st_index_t hash)
406 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
407 size_t free_bytes = str_embed_capa(str) - used_bytes;
411 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
413 FL_SET(str, STR_PRECOMPUTED_HASH);
421 bool force_precompute_hash;
425fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
434 if (rb_objspace_garbage_object_p(str)) {
453 long len = RSTRING_LEN(str);
454 long capa =
len +
sizeof(st_index_t);
455 int term_len = TERM_LEN(str);
457 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
459 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
460 STR_SET_LEN(new_str, RSTRING_LEN(str));
462 rb_enc_copy(new_str, str);
463 str_store_precomputed_hash(new_str, fstring_hash(str));
467 rb_enc_copy(new_str, str);
468#ifdef PRECOMPUTED_FAKESTR_HASH
469 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
470 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
484 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
487 if (STR_SHARED_P(str)) {
489 str_make_independent(str);
492 if (!BARE_STRING_P(str)) {
498 RBASIC(str)->flags |= RSTRING_FSTR;
500 *key = *value = arg->fstr = str;
513 if (
FL_TEST(str, RSTRING_FSTR))
516 bare = BARE_STRING_P(str);
518 if (STR_EMBED_P(str)) {
523 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
530 rb_str_resize(str, RSTRING_LEN(str));
532 fstr = register_fstring(str,
false,
false);
535 str_replace_shared_without_enc(str, fstr);
543register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
547 .force_precompute_hash = force_precompute_hash
550#if SIZEOF_VOIDP == SIZEOF_LONG
554 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
560 st_table *frozen_strings = rb_vm_fstring_table();
563 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
564 }
while (UNDEF_P(args.fstr));
577setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
592 return (
VALUE)fake_str;
599rb_setup_fake_str(
struct RString *fake_str,
const char *name,
long len, rb_encoding *enc)
601 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
610rb_fstring_new(
const char *ptr,
long len)
613 return register_fstring(setup_fake_str(&fake_str, ptr,
len, ENCINDEX_US_ASCII),
false,
false);
617rb_fstring_enc_new(
const char *ptr,
long len, rb_encoding *enc)
620 return register_fstring(rb_setup_fake_str(&fake_str, ptr,
len, enc),
false,
false);
624rb_fstring_cstr(
const char *ptr)
626 return rb_fstring_new(ptr, strlen(ptr));
630fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
640 const char *aptr, *bptr;
643 return (alen != blen ||
645 memcmp(aptr, bptr, alen) != 0);
649single_byte_optimizable(
VALUE str)
653 case ENCINDEX_ASCII_8BIT:
654 case ENCINDEX_US_ASCII:
676static inline const char *
677search_nonascii(
const char *p,
const char *e)
679 const uintptr_t *s, *t;
681#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
682# if SIZEOF_UINTPTR_T == 8
683# define NONASCII_MASK UINT64_C(0x8080808080808080)
684# elif SIZEOF_UINTPTR_T == 4
685# define NONASCII_MASK UINT32_C(0x80808080)
687# error "don't know what to do."
690# if SIZEOF_UINTPTR_T == 8
691# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
692# elif SIZEOF_UINTPTR_T == 4
693# define NONASCII_MASK 0x80808080UL
695# error "don't know what to do."
699 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
700#if !UNALIGNED_WORD_ACCESS
701 if ((uintptr_t)p % SIZEOF_VOIDP) {
702 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
707 case 7:
if (p[-7]&0x80)
return p-7;
708 case 6:
if (p[-6]&0x80)
return p-6;
709 case 5:
if (p[-5]&0x80)
return p-5;
710 case 4:
if (p[-4]&0x80)
return p-4;
712 case 3:
if (p[-3]&0x80)
return p-3;
713 case 2:
if (p[-2]&0x80)
return p-2;
714 case 1:
if (p[-1]&0x80)
return p-1;
719#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
720#define aligned_ptr(value) \
721 __builtin_assume_aligned((value), sizeof(uintptr_t))
723#define aligned_ptr(value) (uintptr_t *)(value)
726 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
729 if (*s & NONASCII_MASK) {
730#ifdef WORDS_BIGENDIAN
731 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
733 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
743 case 7:
if (e[-7]&0x80)
return e-7;
744 case 6:
if (e[-6]&0x80)
return e-6;
745 case 5:
if (e[-5]&0x80)
return e-5;
746 case 4:
if (e[-4]&0x80)
return e-4;
748 case 3:
if (e[-3]&0x80)
return e-3;
749 case 2:
if (e[-2]&0x80)
return e-2;
750 case 1:
if (e[-1]&0x80)
return e-1;
756coderange_scan(
const char *p,
long len, rb_encoding *enc)
758 const char *e = p +
len;
762 p = search_nonascii(p, e);
766 if (rb_enc_asciicompat(enc)) {
767 p = search_nonascii(p, e);
770 int ret = rb_enc_precise_mbclen(p, e, enc);
774 p = search_nonascii(p, e);
780 int ret = rb_enc_precise_mbclen(p, e, enc);
799 p = search_nonascii(p, e);
803 else if (rb_enc_asciicompat(enc)) {
804 p = search_nonascii(p, e);
810 int ret = rb_enc_precise_mbclen(p, e, enc);
817 p = search_nonascii(p, e);
823 int ret = rb_enc_precise_mbclen(p, e, enc);
848 rb_enc_set_index(str1, rb_enc_get_index(str2));
856rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
861 str_enc_copy(dest, src);
863 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
874 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
886rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
888 str_enc_copy(dest, src);
893enc_coderange_scan(
VALUE str, rb_encoding *enc)
899rb_enc_str_coderange_scan(
VALUE str, rb_encoding *enc)
901 return enc_coderange_scan(str, enc);
910 cr = enc_coderange_scan(str, get_encoding(str));
917rb_enc_str_asciicompat(
VALUE str)
920 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
928 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
937str_mod_check(
VALUE s,
const char *p,
long len)
945str_capacity(
VALUE str,
const int termlen)
947 if (STR_EMBED_P(str)) {
948 return str_embed_capa(str) - termlen;
950 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
954 return RSTRING(str)->as.heap.aux.capa;
961 return str_capacity(str, TERM_LEN(str));
965must_not_null(
const char *ptr)
968 rb_raise(rb_eArgError,
"NULL pointer given");
975 size_t size = rb_str_embed_size(
capa);
979 NEWOBJ_OF(str,
struct RString, klass,
986str_alloc_heap(
VALUE klass)
988 NEWOBJ_OF(str,
struct RString, klass,
995empty_str_alloc(
VALUE klass)
997 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
998 VALUE str = str_alloc_embed(klass, 0);
999 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1005str_enc_new(
VALUE klass,
const char *ptr,
long len, rb_encoding *enc)
1010 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1017 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1019 int termlen = rb_enc_mbminlen(enc);
1021 if (STR_EMBEDDABLE_P(
len, termlen)) {
1022 str = str_alloc_embed(klass,
len + termlen);
1028 str = str_alloc_heap(klass);
1034 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1037 rb_enc_raw_set(str, enc);
1043 STR_SET_LEN(str,
len);
1049str_new(
VALUE klass,
const char *ptr,
long len)
1073rb_enc_str_new(
const char *ptr,
long len, rb_encoding *enc)
1086 __msan_unpoison_string(ptr);
1106 if (rb_enc_mbminlen(enc) != 1) {
1107 rb_raise(rb_eArgError,
"wchar encoding given");
1109 return rb_enc_str_new(ptr, strlen(ptr), enc);
1113str_new_static(
VALUE klass,
const char *ptr,
long len,
int encindex)
1118 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1122 str = str_enc_new(klass, ptr,
len, rb_enc_from_index(encindex));
1125 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1126 str = str_alloc_heap(klass);
1128 RSTRING(str)->as.heap.ptr = (
char *)ptr;
1130 RBASIC(str)->flags |= STR_NOFREE;
1131 rb_enc_associate_index(str, encindex);
1145 return str_new_static(
rb_cString, ptr,
len, ENCINDEX_US_ASCII);
1151 return str_new_static(
rb_cString, ptr,
len, ENCINDEX_UTF_8);
1157 return str_new_static(
rb_cString, ptr,
len, rb_enc_to_index(enc));
1160static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *ptr,
long len,
1161 rb_encoding *from, rb_encoding *to,
1162 int ecflags,
VALUE ecopts);
1165is_enc_ascii_string(
VALUE str, rb_encoding *enc)
1167 int encidx = rb_enc_to_index(enc);
1168 if (rb_enc_get_index(str) == encidx)
1169 return is_ascii_string(str);
1180 if (!to)
return str;
1181 if (!from) from = rb_enc_get(str);
1182 if (from == to)
return str;
1183 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1184 rb_is_ascii8bit_enc(to)) {
1185 if (STR_ENC_GET(str) != to) {
1187 rb_enc_associate(str, to);
1194 from, to, ecflags, ecopts);
1195 if (
NIL_P(newstr)) {
1203rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *ptr,
long len,
1204 rb_encoding *from,
int ecflags,
VALUE ecopts)
1209 if (ofs < -olen || olen < ofs)
1211 if (ofs < 0) ofs += olen;
1213 STR_SET_LEN(newstr, ofs);
1217 rb_str_modify(newstr);
1218 return str_cat_conv_enc_opts(newstr, ofs, ptr,
len, from,
1224rb_str_initialize(
VALUE str,
const char *ptr,
long len, rb_encoding *enc)
1226 STR_SET_LEN(str, 0);
1227 rb_enc_associate(str, enc);
1233str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *ptr,
long len,
1234 rb_encoding *from, rb_encoding *to,
1235 int ecflags,
VALUE ecopts)
1240 VALUE econv_wrapper;
1241 const unsigned char *start, *sp;
1242 unsigned char *dest, *dp;
1243 size_t converted_output = (size_t)ofs;
1248 RBASIC_CLEAR_CLASS(econv_wrapper);
1250 if (!ec)
return Qnil;
1253 sp = (
unsigned char*)ptr;
1255 while ((dest = (
unsigned char*)
RSTRING_PTR(newstr)),
1256 (dp = dest + converted_output),
1260 size_t converted_input = sp - start;
1261 size_t rest =
len - converted_input;
1262 converted_output = dp - dest;
1264 if (converted_input && converted_output &&
1265 rest < (LONG_MAX / converted_output)) {
1266 rest = (rest * converted_output) / converted_input;
1271 olen += rest < 2 ? 2 : rest;
1272 rb_str_resize(newstr, olen);
1281 rb_enc_associate(newstr, to);
1300 const int eidx = rb_enc_to_index(eenc);
1303 return rb_enc_str_new(ptr,
len, eenc);
1313 if (!ienc || eenc == ienc) {
1314 return rb_enc_str_new(ptr,
len, eenc);
1320 (rb_enc_asciicompat(eenc) && !search_nonascii(ptr, ptr +
len))) {
1321 return rb_enc_str_new(ptr,
len, ienc);
1324 str = rb_enc_str_new(NULL, 0, ienc);
1327 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0, ptr,
len, eenc, 0,
Qnil))) {
1328 rb_str_initialize(str, ptr,
len, eenc);
1334rb_external_str_with_enc(
VALUE str, rb_encoding *eenc)
1336 int eidx = rb_enc_to_index(eenc);
1338 !is_ascii_string(str)) {
1342 rb_enc_associate_index(str, eidx);
1401str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1403 const int termlen = TERM_LEN(str);
1408 if (str_embed_capa(str2) >=
len + termlen) {
1409 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1410 STR_SET_EMBED(str2);
1412 TERM_FILL(ptr2+
len, termlen);
1416 if (STR_SHARED_P(str)) {
1417 root =
RSTRING(str)->as.heap.aux.shared;
1426 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1428 rb_fatal(
"about to free a possible shared root");
1430 char *ptr2 = STR_HEAP_PTR(str2);
1432 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1435 FL_SET(str2, STR_NOEMBED);
1436 RSTRING(str2)->as.heap.ptr = ptr;
1437 STR_SET_SHARED(str2, root);
1440 STR_SET_LEN(str2,
len);
1448 str_replace_shared_without_enc(str2, str);
1449 rb_enc_cr_str_exact_copy(str2, str);
1456 return str_replace_shared(str_alloc_heap(klass), str);
1473rb_str_new_frozen_String(
VALUE orig)
1480rb_str_tmp_frozen_acquire(
VALUE orig)
1483 return str_new_frozen_buffer(0, orig, FALSE);
1487rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1489 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1490 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1492 VALUE str = str_alloc_heap(0);
1495 FL_SET(str, STR_SHARED_ROOT);
1497 size_t capa = str_capacity(orig, TERM_LEN(orig));
1503 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1504 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1511 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1512 RBASIC(orig)->flags &= ~STR_NOFREE;
1513 STR_SET_SHARED(orig, str);
1523rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1528 if (STR_EMBED_P(tmp)) {
1541 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1542 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1547 STR_SET_LEN(tmp, 0);
1555 return str_new_frozen_buffer(klass, orig, TRUE);
1564 VALUE str = str_alloc_heap(klass);
1567 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1568 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1569 RBASIC(orig)->flags &= ~STR_NOFREE;
1570 STR_SET_SHARED(orig, str);
1577str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1582 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1583 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1585 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1599 if ((ofs > 0) || (rest > 0) ||
1602 str = str_new_shared(klass,
shared);
1604 RSTRING(str)->as.heap.ptr += ofs;
1605 STR_SET_LEN(str,
RSTRING_LEN(str) - (ofs + rest));
1613 else if (STR_EMBEDDABLE_P(
RSTRING_LEN(orig), TERM_LEN(orig))) {
1614 str = str_alloc_embed(klass,
RSTRING_LEN(orig) + TERM_LEN(orig));
1622 str = heap_str_make_shared(klass, orig);
1626 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1638str_new_empty_String(
VALUE str)
1641 rb_enc_copy(v, str);
1645#define STR_BUF_MIN_SIZE 63
1650 if (STR_EMBEDDABLE_P(
capa, 1)) {
1658 RSTRING(str)->as.heap.ptr[0] =
'\0';
1667 long len = strlen(ptr);
1678 return str_new(0, 0,
len);
1684 if (STR_EMBED_P(str)) {
1685 RB_DEBUG_COUNTER_INC(obj_str_embed);
1687 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1688 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1689 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1692 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1693 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1698rb_str_memsize(
VALUE str)
1700 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1701 return STR_HEAP_SIZE(str);
1711 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1714static inline void str_discard(
VALUE str);
1715static void str_shared_replace(
VALUE str,
VALUE str2);
1720 if (str != str2) str_shared_replace(str, str2);
1731 enc = STR_ENC_GET(str2);
1734 termlen = rb_enc_mbminlen(enc);
1738 if (str_embed_capa(str) >=
RSTRING_LEN(str2) + termlen) {
1741 rb_enc_associate(str, enc);
1745 if (STR_EMBED_P(str2)) {
1750 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1751 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1752 RSTRING(str2)->as.heap.ptr = new_ptr;
1753 STR_SET_LEN(str2,
len);
1755 STR_SET_NOEMBED(str2);
1758 STR_SET_NOEMBED(str);
1762 if (
FL_TEST(str2, STR_SHARED)) {
1764 STR_SET_SHARED(str,
shared);
1767 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1771 STR_SET_EMBED(str2);
1773 STR_SET_LEN(str2, 0);
1774 rb_enc_associate(str, enc);
1788 return rb_obj_as_string_result(str, obj);
1805 if (STR_SHARED_P(str2)) {
1808 STR_SET_NOEMBED(str);
1809 STR_SET_LEN(str,
len);
1811 STR_SET_SHARED(str,
shared);
1812 rb_enc_cr_str_exact_copy(str, str2);
1815 str_replace_shared(str, str2);
1824 size_t size = rb_str_embed_size(
capa);
1828 NEWOBJ_OF(str,
struct RString, klass,
1837 NEWOBJ_OF(str,
struct RString, klass,
1848 encidx = rb_enc_get_index(str);
1852 if (encidx) rb_enc_associate_index(dup, encidx);
1868 return str_duplicate_setup_encoding(str, dup, flags);
1877 root =
RSTRING(str)->as.heap.aux.shared;
1879 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1880 root = str = str_new_frozen(klass, str);
1887 FL_SET(root, STR_SHARED_ROOT);
1889 flags |= RSTRING_NOEMBED | STR_SHARED;
1892 return str_duplicate_setup_encoding(str, dup, flags);
1898 if (STR_EMBED_P(str)) {
1899 return str_duplicate_setup_embed(klass, str, dup);
1902 return str_duplicate_setup_heap(klass, str, dup);
1910 if (STR_EMBED_P(str)) {
1911 dup = str_alloc_embed(klass,
RSTRING_LEN(str) + TERM_LEN(str));
1914 dup = str_alloc_heap(klass);
1917 return str_duplicate_setup(klass, str, dup);
1928rb_str_dup_m(
VALUE str)
1930 if (LIKELY(BARE_STRING_P(str))) {
1941 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1948 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1952 new_str = ec_str_alloc_embed(ec, klass,
RSTRING_LEN(str) + TERM_LEN(str));
1953 str_duplicate_setup_embed(klass, str, new_str);
1956 new_str = ec_str_alloc_heap(ec, klass);
1957 str_duplicate_setup_heap(klass, str, new_str);
1966rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
1968 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
1970 rb_ivar_set(str, id_debug_created_info, rb_ary_freeze(debug_info));
1987 static ID keyword_ids[2];
1988 VALUE orig, opt, venc, vcapa;
1990 rb_encoding *enc = 0;
1993 if (!keyword_ids[0]) {
1994 keyword_ids[0] = rb_id_encoding();
1995 CONST_ID(keyword_ids[1],
"capacity");
2003 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2004 enc = rb_to_encoding(venc);
2006 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2009 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2011 if (
capa < STR_BUF_MIN_SIZE) {
2012 capa = STR_BUF_MIN_SIZE;
2020 if (orig == str) n = 0;
2022 str_modifiable(str);
2023 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2025 const size_t size = (size_t)
capa + termlen;
2027 const size_t osize =
RSTRING_LEN(str) + TERM_LEN(str);
2028 char *new_ptr =
ALLOC_N(
char, size);
2029 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2030 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2032 RSTRING(str)->as.heap.ptr = new_ptr;
2034 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2035 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2036 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2038 STR_SET_LEN(str,
len);
2041 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2042 rb_enc_cr_str_exact_copy(str, orig);
2044 FL_SET(str, STR_NOEMBED);
2051 rb_enc_associate(str, enc);
2063rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2069 static ID keyword_ids[2];
2072 rb_encoding *enc = NULL;
2079 keyword_ids[0] = rb_id_encoding();
2080 CONST_ID(keyword_ids[1],
"capacity");
2082 encoding = kwargs[0];
2083 capacity = kwargs[1];
2092 if (UNDEF_P(encoding)) {
2094 encoding = rb_obj_encoding(orig);
2098 if (!UNDEF_P(encoding)) {
2099 enc = rb_to_encoding(encoding);
2103 if (UNDEF_P(capacity)) {
2105 VALUE empty_str = str_new(klass,
"", 0);
2107 rb_enc_associate(empty_str, enc);
2111 VALUE copy = str_duplicate(klass, orig);
2112 rb_enc_associate(copy, enc);
2125 if (orig_capa >
capa) {
2130 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2131 STR_SET_LEN(str, 0);
2142#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2157static inline uintptr_t
2158count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2163 d = (d>>6) | (~d>>7);
2164 d &= NONASCII_MASK >> 7;
2167#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2169 return rb_popcount_intptr(d);
2173# if SIZEOF_VOIDP == 8
2182enc_strlen(
const char *p,
const char *e, rb_encoding *enc,
int cr)
2188 long diff = (long)(e - p);
2189 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2194 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2195 const uintptr_t *s, *t;
2196 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2197 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2198 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2199 while (p < (
const char *)s) {
2200 if (is_utf8_lead_byte(*p))
len++;
2204 len += count_utf8_lead_bytes_with_word(s);
2207 p = (
const char *)s;
2210 if (is_utf8_lead_byte(*p))
len++;
2216 else if (rb_enc_asciicompat(enc)) {
2221 q = search_nonascii(p, e);
2227 p += rb_enc_fast_mbclen(p, e, enc);
2234 q = search_nonascii(p, e);
2240 p += rb_enc_mbclen(p, e, enc);
2247 for (c=0; p<e; c++) {
2248 p += rb_enc_mbclen(p, e, enc);
2263rb_enc_strlen_cr(
const char *p,
const char *e, rb_encoding *enc,
int *cr)
2271 long diff = (long)(e - p);
2272 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2274 else if (rb_enc_asciicompat(enc)) {
2278 q = search_nonascii(p, e);
2286 ret = rb_enc_precise_mbclen(p, e, enc);
2301 for (c=0; p<e; c++) {
2302 ret = rb_enc_precise_mbclen(p, e, enc);
2309 if (p + rb_enc_mbminlen(enc) <= e)
2310 p += rb_enc_mbminlen(enc);
2321str_strlen(
VALUE str, rb_encoding *enc)
2326 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2327 if (!enc) enc = STR_ENC_GET(str);
2333 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2338 return enc_strlen(p, e, enc, cr);
2345 return str_strlen(str, NULL);
2359 return LONG2NUM(str_strlen(str, NULL));
2371rb_str_bytesize(
VALUE str)
2389rb_str_empty(
VALUE str)
2409 char *ptr1, *ptr2, *ptr3;
2414 enc = rb_enc_check_str(str1, str2);
2417 termlen = rb_enc_mbminlen(enc);
2418 if (len1 > LONG_MAX - len2) {
2419 rb_raise(rb_eArgError,
"string size too big");
2421 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2423 memcpy(ptr3, ptr1, len1);
2424 memcpy(ptr3+len1, ptr2, len2);
2425 TERM_FILL(&ptr3[len1+len2], termlen);
2441 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2444 int enc1 = rb_enc_get_index(str1);
2445 int enc2 = rb_enc_get_index(str2);
2450 else if (enc2 < 0) {
2453 else if (enc1 != enc2) {
2456 else if (len1 > LONG_MAX - len2) {
2489 rb_enc_copy(str2, str);
2494 rb_raise(rb_eArgError,
"negative argument");
2497 if (STR_EMBEDDABLE_P(
len, 1)) {
2506 STR_SET_LEN(str2,
len);
2507 rb_enc_copy(str2, str);
2511 rb_raise(rb_eArgError,
"argument too big");
2515 termlen = TERM_LEN(str);
2521 while (n <=
len/2) {
2522 memcpy(ptr2 + n, ptr2, n);
2525 memcpy(ptr2 + n, ptr2,
len-n);
2527 STR_SET_LEN(str2,
len);
2528 TERM_FILL(&ptr2[
len], termlen);
2529 rb_enc_cr_str_copy_for_substr(str2, str);
2555 VALUE tmp = rb_check_array_type(arg);
2564rb_check_lockedtmp(
VALUE str)
2566 if (
FL_TEST(str, STR_TMPLOCK)) {
2573#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2575str_modifiable(
VALUE str)
2577 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2578 if (CHILLED_STRING_P(str)) {
2579 CHILLED_STRING_MUTATED(str);
2581 rb_check_lockedtmp(str);
2582 rb_check_frozen(str);
2587str_dependent_p(
VALUE str)
2589 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2599#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2601str_independent(
VALUE str)
2603 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2604 str_modifiable(str);
2605 return !str_dependent_p(str);
2611str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2619 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2620 ptr =
RSTRING(str)->as.heap.ptr;
2624 STR_SET_LEN(str,
len);
2631 memcpy(ptr, oldptr,
len);
2633 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2636 STR_SET_NOEMBED(str);
2637 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2638 TERM_FILL(ptr +
len, termlen);
2639 RSTRING(str)->as.heap.ptr = ptr;
2640 STR_SET_LEN(str,
len);
2647 if (!str_independent(str))
2648 str_make_independent(str);
2655 int termlen = TERM_LEN(str);
2659 rb_raise(rb_eArgError,
"negative expanding string size");
2661 if (expand >= LONG_MAX -
len) {
2662 rb_raise(rb_eArgError,
"string size too big");
2665 if (!str_independent(str)) {
2666 str_make_independent_expand(str,
len, expand, termlen);
2668 else if (expand > 0) {
2669 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2676str_modify_keep_cr(
VALUE str)
2678 if (!str_independent(str))
2679 str_make_independent(str);
2686str_discard(
VALUE str)
2688 str_modifiable(str);
2689 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2690 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2691 RSTRING(str)->as.heap.ptr = 0;
2692 STR_SET_LEN(str, 0);
2699 int encindex = rb_enc_get_index(str);
2701 if (RB_UNLIKELY(encindex == -1)) {
2705 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2709 rb_encoding *enc = rb_enc_from_index(encindex);
2710 if (!rb_enc_asciicompat(enc)) {
2734zero_filled(
const char *s,
int n)
2736 for (; n > 0; --n) {
2743str_null_char(
const char *s,
long len,
const int minlen, rb_encoding *enc)
2745 const char *e = s +
len;
2747 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2748 if (zero_filled(s, minlen))
return s;
2754str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2759 if (str_dependent_p(str)) {
2760 if (!zero_filled(s +
len, termlen))
2761 str_make_independent_expand(str,
len, 0L, termlen);
2764 TERM_FILL(s +
len, termlen);
2771rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2773 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2778 rb_check_lockedtmp(str);
2779 str_make_independent_expand(str,
len, 0L, termlen);
2781 else if (str_dependent_p(str)) {
2782 if (termlen > oldtermlen)
2783 str_make_independent_expand(str,
len, 0L, termlen);
2786 if (!STR_EMBED_P(str)) {
2791 if (termlen > oldtermlen) {
2800str_null_check(
VALUE str,
int *w)
2804 rb_encoding *enc = rb_enc_get(str);
2805 const int minlen = rb_enc_mbminlen(enc);
2809 if (str_null_char(s,
len, minlen, enc)) {
2812 return str_fill_term(str, s,
len, minlen);
2815 if (!s || memchr(s, 0,
len)) {
2819 s = str_fill_term(str, s,
len, minlen);
2825rb_str_to_cstr(
VALUE str)
2828 return str_null_check(str, &w);
2836 char *s = str_null_check(str, &w);
2839 rb_raise(rb_eArgError,
"string contains null char");
2841 rb_raise(rb_eArgError,
"string contains null byte");
2847rb_str_fill_terminator(
VALUE str,
const int newminlen)
2851 return str_fill_term(str, s,
len, newminlen);
2857 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2881str_nth_len(
const char *p,
const char *e,
long *nthp, rb_encoding *enc)
2890 else if (rb_enc_asciicompat(enc)) {
2891 const char *p2, *e2;
2894 while (p < e && 0 < nth) {
2901 p2 = search_nonascii(p, e2);
2910 n = rb_enc_mbclen(p, e, enc);
2921 while (p < e && nth--) {
2922 p += rb_enc_mbclen(p, e, enc);
2931rb_enc_nth(
const char *p,
const char *e,
long nth, rb_encoding *enc)
2933 return str_nth_len(p, e, &nth, enc);
2937str_nth(
const char *p,
const char *e,
long nth, rb_encoding *enc,
int singlebyte)
2942 p = str_nth_len(p, e, &nth, enc);
2951str_offset(
const char *p,
const char *e,
long nth, rb_encoding *enc,
int singlebyte)
2953 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2954 if (!pp)
return e - p;
2962 STR_ENC_GET(str), single_byte_optimizable(str));
2967str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2970 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2971 const uintptr_t *s, *t;
2972 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2973 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2974 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2975 while (p < (
const char *)s) {
2976 if (is_utf8_lead_byte(*p)) nth--;
2980 nth -= count_utf8_lead_bytes_with_word(s);
2982 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2986 if (is_utf8_lead_byte(*p)) {
2987 if (nth == 0)
break;
2997str_utf8_offset(
const char *p,
const char *e,
long nth)
2999 const char *pp = str_utf8_nth(p, e, &nth);
3008 if (single_byte_optimizable(str) || pos < 0)
3012 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3017str_subseq(
VALUE str,
long beg,
long len)
3025 const int termlen = TERM_LEN(str);
3033 if (str_embed_capa(str2) >=
len + termlen) {
3034 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3035 STR_SET_EMBED(str2);
3037 TERM_FILL(ptr2+
len, termlen);
3039 STR_SET_LEN(str2,
len);
3043 str_replace_shared(str2, str);
3046 RSTRING(str2)->as.heap.ptr += beg;
3048 STR_SET_LEN(str2,
len);
3058 VALUE str2 = str_subseq(str, beg,
len);
3059 rb_enc_cr_str_copy_for_substr(str2, str);
3069 rb_encoding *enc = STR_ENC_GET(str);
3072 if (
len < 0)
return 0;
3073 if (beg < 0 && -beg < 0)
return 0;
3077 if (single_byte_optimizable(str)) {
3078 if (beg > blen)
return 0;
3081 if (beg < 0)
return 0;
3083 if (
len > blen - beg)
3085 if (
len < 0)
return 0;
3090 if (
len > -beg)
len = -beg;
3094 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3097 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3103 slen = str_strlen(str, enc);
3105 if (beg < 0)
return 0;
3107 if (
len == 0)
goto end;
3110 else if (beg > 0 && beg > blen) {
3114 if (beg > str_strlen(str, enc))
return 0;
3120 p = str_utf8_nth(s, e, &beg);
3121 if (beg > 0)
return 0;
3122 len = str_utf8_offset(p, e,
len);
3128 p = s + beg * char_sz;
3132 else if (
len * char_sz > e - p)
3137 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3138 if (beg > 0)
return 0;
3142 len = str_offset(p, e,
len, enc, 0);
3150static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3155 return str_substr(str, beg,
len, TRUE);
3165str_substr(
VALUE str,
long beg,
long len,
int empty)
3169 if (!p)
return Qnil;
3170 if (!
len && !empty)
return Qnil;
3174 VALUE str2 = str_subseq(str, beg,
len);
3175 rb_enc_cr_str_copy_for_substr(str2, str);
3183 if (CHILLED_STRING_P(str)) {
3204 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3234str_uminus(
VALUE str)
3239 return rb_fstring(str);
3243#define rb_str_dup_frozen rb_str_new_frozen
3248 if (
FL_TEST(str, STR_TMPLOCK)) {
3251 FL_SET(str, STR_TMPLOCK);
3258 if (!
FL_TEST(str, STR_TMPLOCK)) {
3276 const int termlen = TERM_LEN(str);
3278 str_modifiable(str);
3279 if (STR_SHARED_P(str)) {
3282 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3283 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3299 rb_encoding *enc = rb_enc_get(str);
3316 STR_SET_LEN(str,
len);
3324 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3327 int independent = str_independent(str);
3329 const int termlen = TERM_LEN(str);
3331 if (slen >
len || (termlen != 1 && slen <
len)) {
3337 if (STR_EMBED_P(str)) {
3338 if (
len == slen)
return str;
3339 if (str_embed_capa(str) >=
len + termlen) {
3340 STR_SET_LEN(str,
len);
3344 str_make_independent_expand(str, slen,
len - slen, termlen);
3346 else if (str_embed_capa(str) >=
len + termlen) {
3347 char *ptr = STR_HEAP_PTR(str);
3349 if (slen >
len) slen =
len;
3352 STR_SET_LEN(str,
len);
3353 if (independent) ruby_xfree(ptr);
3356 else if (!independent) {
3357 if (
len == slen)
return str;
3358 str_make_independent_expand(str, slen,
len - slen, termlen);
3362 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3363 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3366 else if (
len == slen)
return str;
3367 STR_SET_LEN(str,
len);
3374str_ensure_available_capa(
VALUE str,
long len)
3376 str_modify_keep_cr(str);
3378 const int termlen = TERM_LEN(str);
3381 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3382 rb_raise(rb_eArgError,
"string sizes too big");
3385 long total = olen +
len;
3386 long capa = str_capacity(str, termlen);
3389 if (total >= LONG_MAX / 2) {
3392 while (total >
capa) {
3395 RESIZE_CAPA_TERM(str,
capa, termlen);
3400str_buf_cat4(
VALUE str,
const char *ptr,
long len,
bool keep_cr)
3403 str_modify_keep_cr(str);
3408 if (
len == 0)
return 0;
3410 long total, olen,
off = -1;
3412 const int termlen = TERM_LEN(str);
3415 if (ptr >= sptr && ptr <= sptr + olen) {
3419 long capa = str_capacity(str, termlen);
3421 if (olen > LONG_MAX -
len) {
3422 rb_raise(rb_eArgError,
"string sizes too big");
3426 if (total >= LONG_MAX / 2) {
3429 while (total >
capa) {
3432 RESIZE_CAPA_TERM(str,
capa, termlen);
3438 memcpy(sptr + olen, ptr,
len);
3439 STR_SET_LEN(str, total);
3440 TERM_FILL(sptr + total, termlen);
3445#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3446#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3451 if (
len == 0)
return str;
3453 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3455 return str_buf_cat(str, ptr,
len);
3466rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3471 if (UNLIKELY(!str_independent(str))) {
3472 str_make_independent(str);
3475 long string_length = -1;
3476 const int null_terminator_length = 1;
3481 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3482 rb_raise(rb_eArgError,
"string sizes too big");
3485 long string_capacity = str_capacity(str, null_terminator_length);
3491 if (LIKELY(string_capacity >= string_length + 1)) {
3493 sptr[string_length] = byte;
3494 STR_SET_LEN(str, string_length + 1);
3495 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3499 str_buf_cat(str, (
char *)&
byte, 1);
3515 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3526rb_enc_cr_str_buf_cat(
VALUE str,
const char *ptr,
long len,
3527 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3532 rb_encoding *str_enc, *ptr_enc;
3536 if (str_encindex == ptr_encindex) {
3538 ptr_cr = coderange_scan(ptr,
len, rb_enc_from_index(ptr_encindex));
3542 str_enc = rb_enc_from_index(str_encindex);
3543 ptr_enc = rb_enc_from_index(ptr_encindex);
3544 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3550 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3556 ptr_cr = coderange_scan(ptr,
len, ptr_enc);
3565 *ptr_cr_ret = ptr_cr;
3567 if (str_encindex != ptr_encindex &&
3570 str_enc = rb_enc_from_index(str_encindex);
3571 ptr_enc = rb_enc_from_index(ptr_encindex);
3576 res_encindex = str_encindex;
3581 res_encindex = str_encindex;
3585 res_encindex = ptr_encindex;
3590 res_encindex = str_encindex;
3597 res_encindex = str_encindex;
3603 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3605 str_buf_cat(str, ptr,
len);
3611 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3618 return rb_enc_cr_str_buf_cat(str, ptr,
len,
3627 rb_encoding *enc = rb_enc_from_index(encindex);
3628 if (rb_enc_asciicompat(enc)) {
3629 return rb_enc_cr_str_buf_cat(str, ptr, strlen(ptr),
3635 unsigned int c = (
unsigned char)*ptr;
3636 int len = rb_enc_codelen(c, enc);
3637 rb_enc_mbcput(c, buf, enc);
3638 rb_enc_cr_str_buf_cat(str, buf,
len,
3651 if (str_enc_fastpath(str)) {
3688rb_str_concat_literals(
size_t num,
const VALUE *strary)
3692 unsigned long len = 1;
3699 str_enc_copy_direct(str, strary[0]);
3701 for (i = s; i < num; ++i) {
3702 const VALUE v = strary[i];
3706 if (encidx != ENCINDEX_US_ASCII) {
3708 rb_enc_set_index(str, encidx);
3733rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3735 str_modifiable(str);
3740 else if (argc > 1) {
3743 rb_enc_copy(arg_str, str);
3744 for (i = 0; i < argc; i++) {
3777rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3779 long needed_capacity = 0;
3783 for (
int index = 0; index < argc; index++) {
3784 VALUE obj = argv[index];
3797 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3804 str_ensure_available_capa(str, needed_capacity);
3807 for (
int index = 0; index < argc; index++) {
3808 VALUE obj = argv[index];
3813 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3814 char byte = (char)(
NUM2INT(obj) & 0xFF);
3823 memcpy(sptr, ptr,
len);
3828 rb_bug(
"append_as_bytes arguments should have been validated");
3832 STR_SET_LEN(str,
RSTRING_LEN(str) + needed_capacity);
3833 TERM_FILL(sptr, TERM_LEN(str));
3838 for (
int index = 0; index < argc; index++) {
3839 VALUE obj = argv[index];
3856 rb_bug(
"append_as_bytes arguments should have been validated");
3926 rb_encoding *enc = STR_ENC_GET(str1);
3930 if (rb_num_to_uint(str2, &code) == 0) {
3943 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
3946 rb_str_buf_cat_byte(str1, (
unsigned char)code);
3954 switch (
len = rb_enc_codelen(code, enc)) {
3955 case ONIGERR_INVALID_CODE_POINT_VALUE:
3956 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
3958 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3964 rb_enc_mbcput(code, buf, enc);
3965 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
3966 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
3968 rb_str_resize(str1, pos+
len);
3982rb_ascii8bit_appendable_encoding_index(rb_encoding *enc,
unsigned int code)
3984 int encidx = rb_enc_to_index(enc);
3986 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
3991 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3992 return ENCINDEX_ASCII_8BIT;
4015rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4017 str_modifiable(str);
4022 else if (argc > 1) {
4025 rb_enc_copy(arg_str, str);
4026 for (i = 0; i < argc; i++) {
4039 st_index_t precomputed_hash;
4040 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4042 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4043 return precomputed_hash;
4046 return str_do_hash(str);
4053 const char *ptr1, *ptr2;
4056 return (len1 != len2 ||
4058 memcmp(ptr1, ptr2, len1) != 0);
4072rb_str_hash_m(
VALUE str)
4078#define lesser(a,b) (((a)>(b))?(b):(a))
4090 if (idx1 == idx2)
return TRUE;
4095 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4099 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4109 const char *ptr1, *ptr2;
4112 if (str1 == str2)
return 0;
4115 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4124 if (len1 > len2)
return 1;
4127 if (retval > 0)
return 1;
4154 if (str1 == str2)
return Qtrue;
4161 return rb_str_eql_internal(str1, str2);
4185 if (str1 == str2)
return Qtrue;
4187 return rb_str_eql_internal(str1, str2);
4218 return rb_invcmp(str1, str2);
4260 return str_casecmp(str1, s);
4268 const char *p1, *p1end, *p2, *p2end;
4270 enc = rb_enc_compatible(str1, str2);
4277 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4278 while (p1 < p1end && p2 < p2end) {
4280 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4281 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4283 return INT2FIX(c1 < c2 ? -1 : 1);
4290 while (p1 < p1end && p2 < p2end) {
4291 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4292 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4294 if (0 <= c1 && 0 <= c2) {
4298 return INT2FIX(c1 < c2 ? -1 : 1);
4302 l1 = rb_enc_mbclen(p1, p1end, enc);
4303 l2 = rb_enc_mbclen(p2, p2end, enc);
4304 len = l1 < l2 ? l1 : l2;
4305 r = memcmp(p1, p2,
len);
4307 return INT2FIX(r < 0 ? -1 : 1);
4309 return INT2FIX(l1 < l2 ? -1 : 1);
4350 return str_casecmp_p(str1, s);
4357 VALUE folded_str1, folded_str2;
4358 VALUE fold_opt = sym_fold;
4360 enc = rb_enc_compatible(str1, str2);
4365 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4366 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4368 return rb_str_eql(folded_str1, folded_str2);
4372strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4373 const char *sub_ptr,
long sub_len,
long offset, rb_encoding *enc)
4375 const char *search_start = str_ptr;
4376 long pos, search_len = str_len - offset;
4380 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4381 if (pos < 0)
return pos;
4383 if (t == search_start + pos)
break;
4384 search_len -= t - search_start;
4385 if (search_len <= 0)
return -1;
4386 offset += t - search_start;
4389 return pos + offset;
4393#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4394#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4397rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4399 const char *str_ptr, *str_ptr_end, *sub_ptr;
4400 long str_len, sub_len;
4403 enc = rb_enc_check(str, sub);
4404 if (is_broken_string(sub))
return -1;
4412 if (str_len < sub_len)
return -1;
4415 long str_len_char, sub_len_char;
4416 int single_byte = single_byte_optimizable(str);
4417 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4418 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4420 offset += str_len_char;
4421 if (offset < 0)
return -1;
4423 if (str_len_char - offset < sub_len_char)
return -1;
4424 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4427 if (sub_len == 0)
return offset;
4430 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4444rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4448 rb_encoding *enc = STR_ENC_GET(str);
4451 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4452 long slen = str_strlen(str, enc);
4454 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4467 enc, single_byte_optimizable(str));
4478 pos = rb_str_index(str, sub, pos);
4492str_ensure_byte_pos(
VALUE str,
long pos)
4494 if (!single_byte_optimizable(str)) {
4495 const char *s = RSTRING_PTR(str);
4497 const char *p = s + pos;
4498 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4500 "offset %ld does not land on character boundary", pos);
4547rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4553 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4554 long slen = RSTRING_LEN(str);
4556 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4567 str_ensure_byte_pos(str, pos);
4579 pos = rb_str_byteindex(str, sub, pos);
4580 if (pos >= 0)
return LONG2NUM(pos);
4587memrchr(
const char *search_str,
int chr,
long search_len)
4589 const char *ptr = search_str + search_len;
4590 while (ptr > search_str) {
4591 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4599str_rindex(
VALUE str,
VALUE sub,
const char *s, rb_encoding *enc)
4601 char *hit, *adjusted;
4603 long slen, searchlen;
4606 sbeg = RSTRING_PTR(str);
4607 slen = RSTRING_LEN(sub);
4608 if (slen == 0)
return s - sbeg;
4610 t = RSTRING_PTR(sub);
4612 searchlen = s - sbeg + 1;
4614 if (memcmp(s, t, slen) == 0) {
4619 hit = memrchr(sbeg, c, searchlen);
4622 if (hit != adjusted) {
4623 searchlen = adjusted - sbeg;
4626 if (memcmp(hit, t, slen) == 0)
4628 searchlen = adjusted - sbeg;
4629 }
while (searchlen > 0);
4643 enc = rb_enc_check(str, sub);
4644 if (is_broken_string(sub))
return -1;
4645 singlebyte = single_byte_optimizable(str);
4646 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4647 slen = str_strlen(sub, enc);
4650 if (
len < slen)
return -1;
4651 if (
len - pos < slen) pos =
len - slen;
4652 if (
len == 0)
return pos;
4654 sbeg = RSTRING_PTR(str);
4657 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4663 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4664 return str_rindex(str, sub, s, enc);
4725rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4729 rb_encoding *enc = STR_ENC_GET(str);
4730 long pos,
len = str_strlen(str, enc);
4732 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4734 if (pos < 0 && (pos +=
len) < 0) {
4740 if (pos >
len) pos =
len;
4748 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4749 enc, single_byte_optimizable(str));
4760 pos = rb_str_rindex(str, sub, pos);
4770rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4776 enc = rb_enc_check(str, sub);
4777 if (is_broken_string(sub))
return -1;
4778 len = RSTRING_LEN(str);
4779 slen = RSTRING_LEN(sub);
4782 if (
len < slen)
return -1;
4783 if (
len - pos < slen) pos =
len - slen;
4784 if (
len == 0)
return pos;
4786 sbeg = RSTRING_PTR(str);
4789 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4796 return str_rindex(str, sub, s, enc);
4861rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4865 long pos,
len = RSTRING_LEN(str);
4867 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4869 if (pos < 0 && (pos +=
len) < 0) {
4875 if (pos >
len) pos =
len;
4881 str_ensure_byte_pos(str, pos);
4893 pos = rb_str_byterindex(str, sub, pos);
4894 if (pos >= 0)
return LONG2NUM(pos);
4930 switch (OBJ_BUILTIN_TYPE(y)) {
4982rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
4989 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5021rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5025 re = get_pat(argv[0]);
5026 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5035static enum neighbor_char
5036enc_succ_char(
char *p,
long len, rb_encoding *enc)
5041 if (rb_enc_mbminlen(enc) > 1) {
5043 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5045 return NEIGHBOR_NOT_CHAR;
5047 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5049 if (!l)
return NEIGHBOR_NOT_CHAR;
5050 if (l !=
len)
return NEIGHBOR_WRAPPED;
5051 rb_enc_mbcput(c, p, enc);
5052 r = rb_enc_precise_mbclen(p, p +
len, enc);
5054 return NEIGHBOR_NOT_CHAR;
5056 return NEIGHBOR_FOUND;
5059 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5062 return NEIGHBOR_WRAPPED;
5063 ++((
unsigned char*)p)[i];
5064 l = rb_enc_precise_mbclen(p, p+
len, enc);
5068 return NEIGHBOR_FOUND;
5071 memset(p+l, 0xff,
len-l);
5077 for (len2 =
len-1; 0 < len2; len2--) {
5078 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5082 memset(p+len2+1, 0xff,
len-(len2+1));
5087static enum neighbor_char
5088enc_pred_char(
char *p,
long len, rb_encoding *enc)
5092 if (rb_enc_mbminlen(enc) > 1) {
5094 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5096 return NEIGHBOR_NOT_CHAR;
5098 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5099 if (!c)
return NEIGHBOR_NOT_CHAR;
5102 if (!l)
return NEIGHBOR_NOT_CHAR;
5103 if (l !=
len)
return NEIGHBOR_WRAPPED;
5104 rb_enc_mbcput(c, p, enc);
5105 r = rb_enc_precise_mbclen(p, p +
len, enc);
5107 return NEIGHBOR_NOT_CHAR;
5109 return NEIGHBOR_FOUND;
5112 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5115 return NEIGHBOR_WRAPPED;
5116 --((
unsigned char*)p)[i];
5117 l = rb_enc_precise_mbclen(p, p+
len, enc);
5121 return NEIGHBOR_FOUND;
5124 memset(p+l, 0,
len-l);
5130 for (len2 =
len-1; 0 < len2; len2--) {
5131 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5135 memset(p+len2+1, 0,
len-(len2+1));
5149static enum neighbor_char
5150enc_succ_alnum_char(
char *p,
long len, rb_encoding *enc,
char *carry)
5152 enum neighbor_char ret;
5156 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5160 const int max_gaps = 1;
5162 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5164 ctype = ONIGENC_CTYPE_DIGIT;
5166 ctype = ONIGENC_CTYPE_ALPHA;
5168 return NEIGHBOR_NOT_CHAR;
5171 for (
try = 0;
try <= max_gaps; ++
try) {
5172 ret = enc_succ_char(p,
len, enc);
5173 if (ret == NEIGHBOR_FOUND) {
5174 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5176 return NEIGHBOR_FOUND;
5183 ret = enc_pred_char(p,
len, enc);
5184 if (ret == NEIGHBOR_FOUND) {
5185 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5198 return NEIGHBOR_NOT_CHAR;
5201 if (ctype != ONIGENC_CTYPE_DIGIT) {
5203 return NEIGHBOR_WRAPPED;
5207 enc_succ_char(carry,
len, enc);
5208 return NEIGHBOR_WRAPPED;
5276 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5277 rb_enc_cr_str_copy_for_substr(str, orig);
5278 return str_succ(str);
5285 char *sbeg, *s, *e, *last_alnum = 0;
5286 int found_alnum = 0;
5288 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5289 long carry_pos = 0, carry_len = 1;
5290 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5292 slen = RSTRING_LEN(str);
5293 if (slen == 0)
return str;
5295 enc = STR_ENC_GET(str);
5296 sbeg = RSTRING_PTR(str);
5297 s = e = sbeg + slen;
5299 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5300 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5306 l = rb_enc_precise_mbclen(s, e, enc);
5307 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5308 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5309 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5311 case NEIGHBOR_NOT_CHAR:
5313 case NEIGHBOR_FOUND:
5315 case NEIGHBOR_WRAPPED:
5320 carry_pos = s - sbeg;
5325 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5326 enum neighbor_char neighbor;
5327 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5328 l = rb_enc_precise_mbclen(s, e, enc);
5329 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5330 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5332 neighbor = enc_succ_char(tmp, l, enc);
5334 case NEIGHBOR_FOUND:
5338 case NEIGHBOR_WRAPPED:
5341 case NEIGHBOR_NOT_CHAR:
5344 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5346 enc_succ_char(s, l, enc);
5348 if (!rb_enc_asciicompat(enc)) {
5349 MEMCPY(carry, s,
char, l);
5352 carry_pos = s - sbeg;
5356 RESIZE_CAPA(str, slen + carry_len);
5357 sbeg = RSTRING_PTR(str);
5358 s = sbeg + carry_pos;
5359 memmove(s + carry_len, s, slen - carry_pos);
5360 memmove(s, carry, carry_len);
5362 STR_SET_LEN(str, slen);
5363 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5377rb_str_succ_bang(
VALUE str)
5385all_digits_p(
const char *s,
long len)
5439 VALUE end, exclusive;
5443 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5449 VALUE current, after_end;
5456 enc = rb_enc_check(beg, end);
5457 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5459 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5460 char c = RSTRING_PTR(beg)[0];
5461 char e = RSTRING_PTR(end)[0];
5463 if (c > e || (excl && c == e))
return beg;
5465 VALUE str = rb_enc_str_new(&c, 1, enc);
5467 if ((*each)(str, arg))
break;
5468 if (!excl && c == e)
break;
5470 if (excl && c == e)
break;
5475 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5476 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5477 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5482 b = rb_str_to_inum(beg, 10, FALSE);
5483 e = rb_str_to_inum(end, 10, FALSE);
5490 if (excl && bi == ei)
break;
5491 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5496 ID op = excl ?
'<' : idLE;
5497 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5502 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5503 b = rb_funcallv(b, succ, 0, 0);
5510 if (n > 0 || (excl && n == 0))
return beg;
5512 after_end = rb_funcallv(end, succ, 0, 0);
5517 next = rb_funcallv(current, succ, 0, 0);
5518 if ((*each)(current, arg))
break;
5519 if (
NIL_P(next))
break;
5523 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5538 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5539 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5540 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5542 b = rb_str_to_inum(beg, 10, FALSE);
5548 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5556 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5557 b = rb_funcallv(b, succ, 0, 0);
5563 VALUE next = rb_funcallv(current, succ, 0, 0);
5564 if ((*each)(current, arg))
break;
5567 if (RSTRING_LEN(current) == 0)
5578 if (!
rb_equal(str, *argp))
return 0;
5592 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5593 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5594 rb_enc_asciicompat(STR_ENC_GET(val))) {
5595 const char *bp = RSTRING_PTR(beg);
5596 const char *ep = RSTRING_PTR(end);
5597 const char *vp = RSTRING_PTR(val);
5598 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5599 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5607 if (b <= v && v < e)
return Qtrue;
5608 return RBOOL(!
RTEST(exclusive) && v == e);
5615 all_digits_p(bp, RSTRING_LEN(beg)) &&
5616 all_digits_p(ep, RSTRING_LEN(end))) {
5621 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5623 return RBOOL(
NIL_P(val));
5646 return rb_str_subpat(str, indx,
INT2FIX(0));
5649 if (rb_str_index(str, indx, 0) != -1)
5655 long beg,
len = str_strlen(str, NULL);
5667 return str_substr(str, idx, 1, FALSE);
5686rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5690 return rb_str_subpat(str, argv[0], argv[1]);
5693 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5697 return rb_str_aref(str, argv[0]);
5703 char *ptr = RSTRING_PTR(str);
5704 long olen = RSTRING_LEN(str), nlen;
5706 str_modifiable(str);
5707 if (
len > olen)
len = olen;
5709 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5711 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5713 ptr =
RSTRING(str)->as.embed.ary;
5714 memmove(ptr, oldptr +
len, nlen);
5715 if (fl == STR_NOEMBED)
xfree(oldptr);
5718 if (!STR_SHARED_P(str)) {
5720 rb_enc_cr_str_exact_copy(shared, str);
5725 STR_SET_LEN(str, nlen);
5727 if (!SHARABLE_MIDDLE_SUBSTRING) {
5728 TERM_FILL(ptr + nlen, TERM_LEN(str));
5735rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5741 if (beg == 0 && vlen == 0) {
5746 str_modify_keep_cr(str);
5750 RESIZE_CAPA(str, slen + vlen -
len);
5751 sptr = RSTRING_PTR(str);
5760 memmove(sptr + beg + vlen,
5762 slen - (beg +
len));
5764 if (vlen < beg &&
len < 0) {
5768 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5771 STR_SET_LEN(str, slen);
5772 TERM_FILL(&sptr[slen], TERM_LEN(str));
5779 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5788 int singlebyte = single_byte_optimizable(str);
5794 enc = rb_enc_check(str, val);
5795 slen = str_strlen(str, enc);
5797 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5806 if (
len > slen - beg) {
5809 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5814 beg = p - RSTRING_PTR(str);
5816 rb_str_update_0(str, beg,
len, val);
5817 rb_enc_associate(str, enc);
5828 long start, end,
len;
5838 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5842 nth += regs->num_regs;
5852 enc = rb_enc_check_str(str, val);
5853 rb_str_update_0(str, start,
len, val);
5854 rb_enc_associate(str, enc);
5862 switch (
TYPE(indx)) {
5864 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5868 beg = rb_str_index(str, indx, 0);
5922rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5926 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5934 return rb_str_aset(str, argv[0], argv[1]);
5994rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6002 str_modify_keep_cr(str);
6010 if ((nth += regs->num_regs) <= 0)
return Qnil;
6012 else if (nth >= regs->num_regs)
return Qnil;
6014 len = END(nth) - beg;
6017 else if (argc == 2) {
6026 beg = p - RSTRING_PTR(str);
6030 beg = rb_str_index(str, indx, 0);
6031 if (beg == -1)
return Qnil;
6032 len = RSTRING_LEN(indx);
6044 beg = p - RSTRING_PTR(str);
6053 beg = p - RSTRING_PTR(str);
6057 rb_enc_cr_str_copy_for_substr(result, str);
6065 char *sptr = RSTRING_PTR(str);
6066 long slen = RSTRING_LEN(str);
6067 if (beg +
len > slen)
6071 slen - (beg +
len));
6073 STR_SET_LEN(str, slen);
6074 TERM_FILL(&sptr[slen], TERM_LEN(str));
6085 switch (OBJ_BUILTIN_TYPE(pat)) {
6104get_pat_quoted(
VALUE pat,
int check)
6108 switch (OBJ_BUILTIN_TYPE(pat)) {
6122 if (check && is_broken_string(pat)) {
6123 rb_exc_raise(rb_reg_check_preprocess(pat));
6129rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6132 pos = rb_str_byteindex(str, pat, pos);
6133 if (set_backref_str) {
6135 str = rb_str_new_frozen_String(str);
6136 rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6145 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
6165rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6179 hash = rb_check_hash_type(argv[1]);
6185 pat = get_pat_quoted(argv[0], 1);
6187 str_modifiable(str);
6188 beg = rb_pat_search(pat, str, 0, 1);
6202 end0 = beg0 + RSTRING_LEN(pat);
6211 if (iter || !
NIL_P(hash)) {
6212 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6215 repl = rb_obj_as_string(
rb_yield(match0));
6218 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6219 repl = rb_obj_as_string(repl);
6221 str_mod_check(str, p,
len);
6222 rb_check_frozen(str);
6228 enc = rb_enc_compatible(str, repl);
6230 rb_encoding *str_enc = STR_ENC_GET(str);
6231 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6235 rb_enc_inspect_name(str_enc),
6236 rb_enc_inspect_name(STR_ENC_GET(repl)));
6238 enc = STR_ENC_GET(repl);
6241 rb_enc_associate(str, enc);
6251 rlen = RSTRING_LEN(repl);
6252 len = RSTRING_LEN(str);
6254 RESIZE_CAPA(str,
len + rlen - plen);
6256 p = RSTRING_PTR(str);
6258 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6260 rp = RSTRING_PTR(repl);
6261 memmove(p + beg0, rp, rlen);
6263 STR_SET_LEN(str,
len);
6264 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6293 rb_str_sub_bang(argc, argv, str);
6298str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6301 long beg, beg0, end0;
6302 long offset, blen, slen,
len, last;
6303 enum {STR, ITER, MAP} mode = STR;
6305 int need_backref = -1;
6306 rb_encoding *str_enc;
6315 hash = rb_check_hash_type(argv[1]);
6324 rb_error_arity(argc, 1, 2);
6327 pat = get_pat_quoted(argv[0], 1);
6328 beg = rb_pat_search(pat, str, 0, need_backref);
6330 if (bang)
return Qnil;
6335 blen = RSTRING_LEN(str) + 30;
6337 sp = RSTRING_PTR(str);
6338 slen = RSTRING_LEN(str);
6340 str_enc = STR_ENC_GET(str);
6341 rb_enc_associate(dest, str_enc);
6349 end0 = beg0 + RSTRING_LEN(pat);
6360 val = rb_obj_as_string(
rb_yield(match0));
6363 val = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6364 val = rb_obj_as_string(val);
6366 str_mod_check(str, sp, slen);
6371 else if (need_backref) {
6373 if (need_backref < 0) {
6374 need_backref = val != repl;
6381 len = beg0 - offset;
6395 if (RSTRING_LEN(str) <= end0)
break;
6396 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6398 offset = end0 +
len;
6400 cp = RSTRING_PTR(str) + offset;
6401 if (offset > RSTRING_LEN(str))
break;
6402 beg = rb_pat_search(pat, str, offset, need_backref);
6406 if (RSTRING_LEN(str) > offset) {
6409 rb_pat_search(pat, str, last, 1);
6411 str_shared_replace(str, dest);
6439rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6441 str_modify_keep_cr(str);
6442 return str_gsub(argc, argv, str, 1);
6465 return str_gsub(argc, argv, str, 0);
6483 str_modifiable(str);
6484 if (str == str2)
return str;
6488 return str_replace(str, str2);
6503rb_str_clear(
VALUE str)
6507 STR_SET_LEN(str, 0);
6508 RSTRING_PTR(str)[0] = 0;
6509 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6528rb_str_chr(
VALUE str)
6552 pos += RSTRING_LEN(str);
6553 if (pos < 0 || RSTRING_LEN(str) <= pos)
6556 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6575 long len = RSTRING_LEN(str);
6576 char *ptr, *head, *left = 0;
6580 if (pos < -
len ||
len <= pos)
6587 char byte = (char)(
NUM2INT(w) & 0xFF);
6589 if (!str_independent(str))
6590 str_make_independent(str);
6591 enc = STR_ENC_GET(str);
6592 head = RSTRING_PTR(str);
6594 if (!STR_EMBED_P(str)) {
6601 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6609 width = rb_enc_precise_mbclen(left, head+
len, enc);
6611 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6627str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6629 long n = RSTRING_LEN(str);
6631 if (beg > n ||
len < 0)
return Qnil;
6634 if (beg < 0)
return Qnil;
6639 if (!empty)
return Qnil;
6643 VALUE str2 = str_subseq(str, beg,
len);
6645 str_enc_copy_direct(str2, str);
6647 if (RSTRING_LEN(str2) == 0) {
6648 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6682 long beg,
len = RSTRING_LEN(str);
6690 return str_byte_substr(str, beg,
len, TRUE);
6695 return str_byte_substr(str, idx, 1, FALSE);
6742rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6747 return str_byte_substr(str, beg,
len, TRUE);
6750 return str_byte_aref(str, argv[0]);
6754str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6756 long end, slen = RSTRING_LEN(str);
6759 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6768 if (*
len > slen - *beg) {
6772 str_ensure_byte_pos(str, *beg);
6773 str_ensure_byte_pos(str, end);
6798rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6800 long beg,
len, vbeg, vlen;
6805 if (!(argc == 2 || argc == 3 || argc == 5)) {
6806 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6810 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6811 rb_builtin_class_name(argv[0]));
6818 vlen = RSTRING_LEN(val);
6823 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6824 rb_builtin_class_name(argv[2]));
6836 vlen = RSTRING_LEN(val);
6844 str_check_beg_len(str, &beg, &
len);
6845 str_check_beg_len(val, &vbeg, &vlen);
6846 str_modify_keep_cr(str);
6849 rb_enc_associate(str, rb_enc_check(str, val));
6852 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6870rb_str_reverse(
VALUE str)
6877 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6878 enc = STR_ENC_GET(str);
6884 if (RSTRING_LEN(str) > 1) {
6885 if (single_byte_optimizable(str)) {
6892 int clen = rb_enc_fast_mbclen(s, e, enc);
6900 cr = rb_enc_asciicompat(enc) ?
6903 int clen = rb_enc_mbclen(s, e, enc);
6912 STR_SET_LEN(rev, RSTRING_LEN(str));
6913 str_enc_copy_direct(rev, str);
6933rb_str_reverse_bang(
VALUE str)
6935 if (RSTRING_LEN(str) > 1) {
6936 if (single_byte_optimizable(str)) {
6939 str_modify_keep_cr(str);
6940 s = RSTRING_PTR(str);
6949 str_shared_replace(str, rb_str_reverse(str));
6953 str_modify_keep_cr(str);
6978 i = rb_str_index(str, arg, 0);
6980 return RBOOL(i != -1);
7022 rb_raise(rb_eArgError,
"invalid radix %d", base);
7024 return rb_str_to_inum(str, base, FALSE);
7048rb_str_to_f(
VALUE str)
7063rb_str_to_s(
VALUE str)
7073str_cat_char(
VALUE str,
unsigned int c, rb_encoding *enc)
7075 char s[RUBY_MAX_CHAR_LEN];
7076 int n = rb_enc_codelen(c, enc);
7078 rb_enc_mbcput(c, s, enc);
7083#define CHAR_ESC_LEN 13
7086rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7088 char buf[CHAR_ESC_LEN + 1];
7096 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7098 else if (c < 0x10000) {
7099 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7102 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7107 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7110 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7113 l = (int)strlen(buf);
7119ruby_escaped_char(
int c)
7122 case '\0':
return "\\0";
7123 case '\n':
return "\\n";
7124 case '\r':
return "\\r";
7125 case '\t':
return "\\t";
7126 case '\f':
return "\\f";
7127 case '\013':
return "\\v";
7128 case '\010':
return "\\b";
7129 case '\007':
return "\\a";
7130 case '\033':
return "\\e";
7131 case '\x7f':
return "\\c?";
7137rb_str_escape(
VALUE str)
7140 rb_encoding *enc = rb_enc_from_index(encidx);
7141 const char *p = RSTRING_PTR(str);
7143 const char *prev = p;
7144 char buf[CHAR_ESC_LEN + 1];
7146 int unicode_p = rb_enc_unicode_p(enc);
7147 int asciicompat = rb_enc_asciicompat(enc);
7152 int n = rb_enc_precise_mbclen(p, pend, enc);
7154 if (p > prev) str_buf_cat(result, prev, p - prev);
7155 n = rb_enc_mbminlen(enc);
7157 n = (int)(pend - p);
7159 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7160 str_buf_cat(result, buf, strlen(buf));
7166 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7168 cc = ruby_escaped_char(c);
7170 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7171 str_buf_cat(result, cc, strlen(cc));
7174 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7177 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7178 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7182 if (p > prev) str_buf_cat(result, prev, p - prev);
7205 rb_encoding *enc = rb_enc_from_index(encidx);
7206 const char *p, *pend, *prev;
7207 char buf[CHAR_ESC_LEN + 1];
7210 int unicode_p = rb_enc_unicode_p(enc);
7211 int asciicompat = rb_enc_asciicompat(enc);
7215 rb_enc_associate(result, resenc);
7216 str_buf_cat2(result,
"\"");
7224 n = rb_enc_precise_mbclen(p, pend, enc);
7226 if (p > prev) str_buf_cat(result, prev, p - prev);
7227 n = rb_enc_mbminlen(enc);
7229 n = (int)(pend - p);
7231 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7232 str_buf_cat(result, buf, strlen(buf));
7238 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7240 if ((asciicompat || unicode_p) &&
7241 (c ==
'"'|| c ==
'\\' ||
7246 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7247 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7248 str_buf_cat2(result,
"\\");
7249 if (asciicompat || enc == resenc) {
7255 case '\n': cc =
'n';
break;
7256 case '\r': cc =
'r';
break;
7257 case '\t': cc =
't';
break;
7258 case '\f': cc =
'f';
break;
7259 case '\013': cc =
'v';
break;
7260 case '\010': cc =
'b';
break;
7261 case '\007': cc =
'a';
break;
7262 case 033: cc =
'e';
break;
7263 default: cc = 0;
break;
7266 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7269 str_buf_cat(result, buf, 2);
7282 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7286 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7287 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7292 if (p > prev) str_buf_cat(result, prev, p - prev);
7293 str_buf_cat2(result,
"\"");
7298#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7318 int encidx = rb_enc_get_index(str);
7319 rb_encoding *enc = rb_enc_from_index(encidx);
7321 const char *p, *pend;
7325 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7328 if (!rb_enc_asciicompat(enc)) {
7330 len += strlen(enc->name);
7333 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7336 unsigned char c = *p++;
7339 case '"':
case '\\':
7340 case '\n':
case '\r':
7341 case '\t':
case '\f':
7342 case '\013':
case '\010':
case '\007':
case '\033':
7347 clen = IS_EVSTR(p, pend) ? 2 : 1;
7355 if (u8 && c > 0x7F) {
7356 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7358 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7361 else if (cc <= 0xFFFFF)
7374 if (clen > LONG_MAX -
len) {
7381 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7382 q = RSTRING_PTR(result); qend = q +
len + 1;
7386 unsigned char c = *p++;
7388 if (c ==
'"' || c ==
'\\') {
7392 else if (c ==
'#') {
7393 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7396 else if (c ==
'\n') {
7400 else if (c ==
'\r') {
7404 else if (c ==
'\t') {
7408 else if (c ==
'\f') {
7412 else if (c ==
'\013') {
7416 else if (c ==
'\010') {
7420 else if (c ==
'\007') {
7424 else if (c ==
'\033') {
7434 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7436 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7439 snprintf(q, qend-q,
"u%04X", cc);
7441 snprintf(q, qend-q,
"u{%X}", cc);
7446 snprintf(q, qend-q,
"x%02X", c);
7452 if (!rb_enc_asciicompat(enc)) {
7453 snprintf(q, qend-q, nonascii_suffix, enc->name);
7457 rb_enc_associate_index(result, encidx);
7463unescape_ascii(
unsigned int c)
7487undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end, rb_encoding **penc,
bool *utf8,
bool *binary)
7489 const char *s = *ss;
7493 unsigned char buf[6];
7494 static rb_encoding *enc_utf8 = NULL;
7511 *buf = unescape_ascii(*s);
7524 if (*penc != enc_utf8) {
7526 rb_enc_associate(undumped, enc_utf8);
7543 if (hexlen == 0 || hexlen > 6) {
7549 if (0xd800 <= c && c <= 0xdfff) {
7552 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7562 if (0xd800 <= c && c <= 0xdfff) {
7565 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7593static VALUE rb_str_is_ascii_only_p(
VALUE str);
7611str_undump(
VALUE str)
7613 const char *s = RSTRING_PTR(str);
7615 rb_encoding *enc = rb_enc_get(str);
7616 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7618 bool binary =
false;
7622 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7625 if (!str_null_check(str, &w)) {
7628 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7629 if (*s !=
'"')
goto invalid_format;
7647 static const char force_encoding_suffix[] =
".force_encoding(\"";
7648 static const char dup_suffix[] =
".dup";
7649 const char *encname;
7654 size =
sizeof(dup_suffix) - 1;
7655 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7657 size =
sizeof(force_encoding_suffix) - 1;
7658 if (s_end - s <= size)
goto invalid_format;
7659 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7663 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7667 s = memchr(s,
'"', s_end-s);
7669 if (!s)
goto invalid_format;
7670 if (s_end - s != 2)
goto invalid_format;
7671 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7673 encidx = rb_enc_find_index2(encname, (
long)size);
7677 rb_enc_associate_index(undumped, encidx);
7687 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7698 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7702rb_str_check_dummy_enc(rb_encoding *enc)
7704 if (rb_enc_dummy_p(enc)) {
7711str_true_enc(
VALUE str)
7713 rb_encoding *enc = STR_ENC_GET(str);
7714 rb_str_check_dummy_enc(enc);
7718static OnigCaseFoldType
7719check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7724 rb_raise(rb_eArgError,
"too many options");
7725 if (argv[0]==sym_turkic) {
7726 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7728 if (argv[1]==sym_lithuanian)
7729 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7731 rb_raise(rb_eArgError,
"invalid second option");
7734 else if (argv[0]==sym_lithuanian) {
7735 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7737 if (argv[1]==sym_turkic)
7738 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7740 rb_raise(rb_eArgError,
"invalid second option");
7744 rb_raise(rb_eArgError,
"too many options");
7745 else if (argv[0]==sym_ascii)
7746 flags |= ONIGENC_CASE_ASCII_ONLY;
7747 else if (argv[0]==sym_fold) {
7748 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7749 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7751 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7754 rb_raise(rb_eArgError,
"invalid option");
7759case_option_single_p(OnigCaseFoldType flags, rb_encoding *enc,
VALUE str)
7767#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7768#ifndef CASEMAP_DEBUG
7769# define CASEMAP_DEBUG 0
7777 OnigUChar space[FLEX_ARY_LEN];
7781mapping_buffer_free(
void *p)
7785 while (current_buffer) {
7786 previous_buffer = current_buffer;
7787 current_buffer = current_buffer->next;
7788 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7794 {0, mapping_buffer_free,},
7795 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7799rb_str_casemap(
VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
7803 const OnigUChar *source_current, *source_end;
7804 int target_length = 0;
7805 VALUE buffer_anchor;
7808 size_t buffer_count = 0;
7809 int buffer_length_or_invalid;
7811 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7813 source_current = (OnigUChar*)RSTRING_PTR(source);
7818 while (source_current < source_end) {
7820 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7821 if (CASEMAP_DEBUG) {
7822 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7825 *pre_buffer = current_buffer;
7826 pre_buffer = ¤t_buffer->next;
7827 current_buffer->next = NULL;
7828 current_buffer->capa =
capa;
7829 buffer_length_or_invalid = enc->case_map(flags,
7830 &source_current, source_end,
7831 current_buffer->space,
7832 current_buffer->space+current_buffer->capa,
7834 if (buffer_length_or_invalid < 0) {
7835 current_buffer =
DATA_PTR(buffer_anchor);
7837 mapping_buffer_free(current_buffer);
7838 rb_raise(rb_eArgError,
"input string invalid");
7840 target_length += current_buffer->used = buffer_length_or_invalid;
7842 if (CASEMAP_DEBUG) {
7843 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7846 if (buffer_count==1) {
7847 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7850 char *target_current;
7853 target_current = RSTRING_PTR(target);
7854 current_buffer =
DATA_PTR(buffer_anchor);
7855 while (current_buffer) {
7856 memcpy(target_current, current_buffer->space, current_buffer->used);
7857 target_current += current_buffer->used;
7858 current_buffer = current_buffer->next;
7861 current_buffer =
DATA_PTR(buffer_anchor);
7863 mapping_buffer_free(current_buffer);
7868 str_enc_copy_direct(target, source);
7875rb_str_ascii_casemap(
VALUE source,
VALUE target, OnigCaseFoldType *flags, rb_encoding *enc)
7877 const OnigUChar *source_current, *source_end;
7878 OnigUChar *target_current, *target_end;
7879 long old_length = RSTRING_LEN(source);
7880 int length_or_invalid;
7882 if (old_length == 0)
return Qnil;
7884 source_current = (OnigUChar*)RSTRING_PTR(source);
7886 if (source == target) {
7887 target_current = (OnigUChar*)source_current;
7888 target_end = (OnigUChar*)source_end;
7891 target_current = (OnigUChar*)RSTRING_PTR(target);
7895 length_or_invalid = onigenc_ascii_only_case_map(flags,
7896 &source_current, source_end,
7897 target_current, target_end, enc);
7898 if (length_or_invalid < 0)
7899 rb_raise(rb_eArgError,
"input string invalid");
7900 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7901 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7902 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7903 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7904 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7907 str_enc_copy(target, source);
7913upcase_single(
VALUE str)
7915 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7916 bool modified =
false;
7919 unsigned int c = *(
unsigned char*)s;
7921 if (
'a' <= c && c <=
'z') {
7922 *s =
'A' + (c -
'a');
7950rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7953 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7955 flags = check_case_options(argc, argv, flags);
7956 str_modify_keep_cr(str);
7957 enc = str_true_enc(str);
7958 if (case_option_single_p(flags, enc, str)) {
7959 if (upcase_single(str))
7960 flags |= ONIGENC_CASE_MODIFIED;
7962 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7963 rb_str_ascii_casemap(str, str, &flags, enc);
7965 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7967 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7989rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7992 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7995 flags = check_case_options(argc, argv, flags);
7996 enc = str_true_enc(str);
7997 if (case_option_single_p(flags, enc, str)) {
7998 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7999 str_enc_copy_direct(ret, str);
8002 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8004 rb_str_ascii_casemap(str, ret, &flags, enc);
8007 ret = rb_str_casemap(str, &flags, enc);
8014downcase_single(
VALUE str)
8016 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8017 bool modified =
false;
8020 unsigned int c = *(
unsigned char*)s;
8022 if (
'A' <= c && c <=
'Z') {
8023 *s =
'a' + (c -
'A');
8052rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8055 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8057 flags = check_case_options(argc, argv, flags);
8058 str_modify_keep_cr(str);
8059 enc = str_true_enc(str);
8060 if (case_option_single_p(flags, enc, str)) {
8061 if (downcase_single(str))
8062 flags |= ONIGENC_CASE_MODIFIED;
8064 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8065 rb_str_ascii_casemap(str, str, &flags, enc);
8067 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8069 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8091rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8094 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8097 flags = check_case_options(argc, argv, flags);
8098 enc = str_true_enc(str);
8099 if (case_option_single_p(flags, enc, str)) {
8100 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8101 str_enc_copy_direct(ret, str);
8102 downcase_single(ret);
8104 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8106 rb_str_ascii_casemap(str, ret, &flags, enc);
8109 ret = rb_str_casemap(str, &flags, enc);
8137rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8140 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8142 flags = check_case_options(argc, argv, flags);
8143 str_modify_keep_cr(str);
8144 enc = str_true_enc(str);
8145 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8146 if (flags&ONIGENC_CASE_ASCII_ONLY)
8147 rb_str_ascii_casemap(str, str, &flags, enc);
8149 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8151 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8175rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8178 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8181 flags = check_case_options(argc, argv, flags);
8182 enc = str_true_enc(str);
8183 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8184 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8186 rb_str_ascii_casemap(str, ret, &flags, enc);
8189 ret = rb_str_casemap(str, &flags, enc);
8216rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8219 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8221 flags = check_case_options(argc, argv, flags);
8222 str_modify_keep_cr(str);
8223 enc = str_true_enc(str);
8224 if (flags&ONIGENC_CASE_ASCII_ONLY)
8225 rb_str_ascii_casemap(str, str, &flags, enc);
8227 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8229 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8253rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8256 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8259 flags = check_case_options(argc, argv, flags);
8260 enc = str_true_enc(str);
8261 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8262 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8264 rb_str_ascii_casemap(str, ret, &flags, enc);
8267 ret = rb_str_casemap(str, &flags, enc);
8272typedef unsigned char *USTR;
8276 unsigned int now, max;
8281trnext(
struct tr *t, rb_encoding *enc)
8288 if (t->p == t->pend)
return -1;
8289 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8292 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8294 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8296 if (t->p < t->pend) {
8297 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8300 if (t->now < 0x80 && c < 0x80) {
8301 rb_raise(rb_eArgError,
8302 "invalid range \"%c-%c\" in string transliteration",
8306 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8310 else if (t->now < c) {
8319 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8320 if (t->now == t->max) {
8325 if (t->now < t->max) {
8341 const unsigned int errc = -1;
8342 unsigned int trans[256];
8343 rb_encoding *enc, *e1, *e2;
8344 struct tr trsrc, trrepl;
8346 unsigned int c, c0, last = 0;
8347 int modify = 0, i, l;
8348 unsigned char *s, *send;
8350 int singlebyte = single_byte_optimizable(str);
8354#define CHECK_IF_ASCII(c) \
8355 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8356 (cr = ENC_CODERANGE_VALID) : 0)
8360 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8361 if (RSTRING_LEN(repl) == 0) {
8362 return rb_str_delete_bang(1, &src, str);
8366 e1 = rb_enc_check(str, src);
8367 e2 = rb_enc_check(str, repl);
8372 enc = rb_enc_check(src, repl);
8374 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8375 if (RSTRING_LEN(src) > 1 &&
8376 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8377 trsrc.p + l < trsrc.pend) {
8381 trrepl.p = RSTRING_PTR(repl);
8382 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8383 trsrc.gen = trrepl.gen = 0;
8384 trsrc.now = trrepl.now = 0;
8385 trsrc.max = trrepl.max = 0;
8388 for (i=0; i<256; i++) {
8391 while ((c = trnext(&trsrc, enc)) != errc) {
8400 while ((c = trnext(&trrepl, enc)) != errc)
8403 for (i=0; i<256; i++) {
8404 if (trans[i] != errc) {
8412 for (i=0; i<256; i++) {
8415 while ((c = trnext(&trsrc, enc)) != errc) {
8416 r = trnext(&trrepl, enc);
8417 if (r == errc) r = trrepl.now;
8420 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8431 str_modify_keep_cr(str);
8432 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8433 termlen = rb_enc_mbminlen(enc);
8436 long offset, max = RSTRING_LEN(str);
8437 unsigned int save = -1;
8438 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8443 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8446 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8449 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8451 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8460 if (cflag) c = last;
8463 else if (cflag) c = errc;
8469 if (c != (
unsigned int)-1) {
8475 tlen = rb_enc_codelen(c, enc);
8481 if (enc != e1) may_modify = 1;
8483 if ((offset = t - buf) + tlen > max) {
8484 size_t MAYBE_UNUSED(old) = max + termlen;
8485 max = offset + tlen + (send - s);
8486 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8489 rb_enc_mbcput(c, t, enc);
8490 if (may_modify && memcmp(s, t, tlen) != 0) {
8496 if (!STR_EMBED_P(str)) {
8497 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8499 TERM_FILL((
char *)t, termlen);
8500 RSTRING(str)->as.heap.ptr = (
char *)buf;
8501 STR_SET_LEN(str, t - buf);
8502 STR_SET_NOEMBED(str);
8503 RSTRING(str)->as.heap.aux.capa = max;
8507 c = (
unsigned char)*s;
8508 if (trans[c] != errc) {
8525 long offset, max = (long)((send - s) * 1.2);
8526 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8531 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8534 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8537 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8539 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8547 if (cflag) c = last;
8550 else if (cflag) c = errc;
8554 c = cflag ? last : errc;
8557 tlen = rb_enc_codelen(c, enc);
8562 if (enc != e1) may_modify = 1;
8564 if ((offset = t - buf) + tlen > max) {
8565 size_t MAYBE_UNUSED(old) = max + termlen;
8566 max = offset + tlen + (long)((send - s) * 1.2);
8567 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8571 rb_enc_mbcput(c, t, enc);
8572 if (may_modify && memcmp(s, t, tlen) != 0) {
8580 if (!STR_EMBED_P(str)) {
8581 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8583 TERM_FILL((
char *)t, termlen);
8584 RSTRING(str)->as.heap.ptr = (
char *)buf;
8585 STR_SET_LEN(str, t - buf);
8586 STR_SET_NOEMBED(str);
8587 RSTRING(str)->as.heap.aux.capa = max;
8593 rb_enc_associate(str, enc);
8612 return tr_trans(str, src, repl, 0);
8659 tr_trans(str, src, repl, 0);
8663#define TR_TABLE_MAX (UCHAR_MAX+1)
8664#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8666tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8667 VALUE *tablep,
VALUE *ctablep, rb_encoding *enc)
8669 const unsigned int errc = -1;
8670 char buf[TR_TABLE_MAX];
8673 VALUE table = 0, ptable = 0;
8674 int i, l, cflag = 0;
8676 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8677 tr.gen =
tr.now =
tr.max = 0;
8679 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8684 for (i=0; i<TR_TABLE_MAX; i++) {
8687 stable[TR_TABLE_MAX] = cflag;
8689 else if (stable[TR_TABLE_MAX] && !cflag) {
8690 stable[TR_TABLE_MAX] = 0;
8692 for (i=0; i<TR_TABLE_MAX; i++) {
8696 while ((c = trnext(&
tr, enc)) != errc) {
8697 if (c < TR_TABLE_MAX) {
8698 buf[(
unsigned char)c] = !cflag;
8703 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8715 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8716 rb_hash_aset(table, key,
Qtrue);
8720 for (i=0; i<TR_TABLE_MAX; i++) {
8721 stable[i] = stable[i] && buf[i];
8723 if (!table && !cflag) {
8730tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8732 if (c < TR_TABLE_MAX) {
8733 return table[c] != 0;
8739 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8740 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8744 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8747 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8761rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8763 char squeez[TR_TABLE_SIZE];
8764 rb_encoding *enc = 0;
8766 VALUE del = 0, nodel = 0;
8768 int i, ascompat, cr;
8770 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8772 for (i=0; i<argc; i++) {
8776 enc = rb_enc_check(str, s);
8777 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8780 str_modify_keep_cr(str);
8781 ascompat = rb_enc_asciicompat(enc);
8782 s = t = RSTRING_PTR(str);
8789 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8800 c = rb_enc_codepoint_len(s, send, &clen, enc);
8802 if (tr_find(c, squeez, del, nodel)) {
8806 if (t != s) rb_enc_mbcput(c, t, enc);
8813 TERM_FILL(t, TERM_LEN(str));
8814 STR_SET_LEN(str, t - RSTRING_PTR(str));
8817 if (modify)
return str;
8837rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8840 rb_str_delete_bang(argc, argv, str);
8854rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8856 char squeez[TR_TABLE_SIZE];
8857 rb_encoding *enc = 0;
8858 VALUE del = 0, nodel = 0;
8859 unsigned char *s, *send, *t;
8861 int ascompat, singlebyte = single_byte_optimizable(str);
8865 enc = STR_ENC_GET(str);
8868 for (i=0; i<argc; i++) {
8872 enc = rb_enc_check(str, s);
8873 if (singlebyte && !single_byte_optimizable(s))
8875 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8879 str_modify_keep_cr(str);
8880 s = t = (
unsigned char *)RSTRING_PTR(str);
8881 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8884 ascompat = rb_enc_asciicompat(enc);
8888 unsigned int c = *s++;
8889 if (c != save || (argc > 0 && !squeez[c])) {
8899 if (ascompat && (c = *s) < 0x80) {
8900 if (c != save || (argc > 0 && !squeez[c])) {
8906 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8908 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8909 if (t != s) rb_enc_mbcput(c, t, enc);
8918 TERM_FILL((
char *)t, TERM_LEN(str));
8919 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8920 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8924 if (modify)
return str;
8947rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8950 rb_str_squeeze_bang(argc, argv, str);
8968 return tr_trans(str, src, repl, 1);
8991 tr_trans(str, src, repl, 1);
9020rb_str_count(
int argc,
VALUE *argv,
VALUE str)
9022 char table[TR_TABLE_SIZE];
9023 rb_encoding *enc = 0;
9024 VALUE del = 0, nodel = 0, tstr;
9034 enc = rb_enc_check(str, tstr);
9037 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
9038 (ptstr = RSTRING_PTR(tstr),
9039 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9040 !is_broken_string(str)) {
9042 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
9044 s = RSTRING_PTR(str);
9045 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9048 if (*(
unsigned char*)s++ == c) n++;
9054 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9055 for (i=1; i<argc; i++) {
9058 enc = rb_enc_check(str, tstr);
9059 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9062 s = RSTRING_PTR(str);
9063 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9065 ascompat = rb_enc_asciicompat(enc);
9069 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9077 c = rb_enc_codepoint_len(s, send, &clen, enc);
9078 if (tr_find(c, table, del, nodel)) {
9089rb_fs_check(
VALUE val)
9093 if (
NIL_P(val))
return 0;
9098static const char isspacetable[256] = {
9099 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9101 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9102 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9108 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9111 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9112 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9113 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9114 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9117#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9120split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9122 if (empty_count >= 0 &&
len == 0) {
9123 return empty_count + 1;
9125 if (empty_count > 0) {
9129 rb_ary_push(result, str_new_empty_String(str));
9130 }
while (--empty_count > 0);
9134 rb_yield(str_new_empty_String(str));
9135 }
while (--empty_count > 0);
9140 rb_ary_push(result, str);
9149 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9153literal_split_pattern(
VALUE spat, split_type_t default_type)
9155 rb_encoding *enc = STR_ENC_GET(spat);
9161 return SPLIT_TYPE_CHARS;
9163 else if (rb_enc_asciicompat(enc)) {
9164 if (
len == 1 && ptr[0] ==
' ') {
9165 return SPLIT_TYPE_AWK;
9170 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9171 return SPLIT_TYPE_AWK;
9174 return default_type;
9187rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9192 split_type_t split_type;
9193 long beg, end, i = 0, empty_count = -1;
9198 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9200 if (lim <= 0) limit =
Qnil;
9201 else if (lim == 1) {
9202 if (RSTRING_LEN(str) == 0)
9213 if (
NIL_P(limit) && !lim) empty_count = 0;
9215 enc = STR_ENC_GET(str);
9216 split_type = SPLIT_TYPE_REGEXP;
9218 spat = get_pat_quoted(spat, 0);
9221 split_type = SPLIT_TYPE_AWK;
9223 else if (!(spat = rb_fs_check(spat))) {
9224 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9229 if (split_type != SPLIT_TYPE_AWK) {
9234 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9235 if (split_type == SPLIT_TYPE_AWK) {
9237 split_type = SPLIT_TYPE_STRING;
9242 mustnot_broken(spat);
9243 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9251#define SPLIT_STR(beg, len) ( \
9252 empty_count = split_string(result, str, beg, len, empty_count), \
9253 str_mod_check(str, str_start, str_len))
9256 char *ptr = RSTRING_PTR(str);
9257 char *
const str_start = ptr;
9258 const long str_len = RSTRING_LEN(str);
9259 char *
const eptr = str_start + str_len;
9260 if (split_type == SPLIT_TYPE_AWK) {
9267 if (is_ascii_string(str)) {
9268 while (ptr < eptr) {
9269 c = (
unsigned char)*ptr++;
9271 if (ascii_isspace(c)) {
9277 if (!
NIL_P(limit) && lim <= i)
break;
9280 else if (ascii_isspace(c)) {
9281 SPLIT_STR(beg, end-beg);
9284 if (!
NIL_P(limit)) ++i;
9292 while (ptr < eptr) {
9295 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9304 if (!
NIL_P(limit) && lim <= i)
break;
9308 SPLIT_STR(beg, end-beg);
9311 if (!
NIL_P(limit)) ++i;
9319 else if (split_type == SPLIT_TYPE_STRING) {
9320 char *substr_start = ptr;
9321 char *sptr = RSTRING_PTR(spat);
9322 long slen = RSTRING_LEN(spat);
9325 mustnot_broken(str);
9326 enc = rb_enc_check(str, spat);
9327 while (ptr < eptr &&
9328 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9331 if (t != ptr + end) {
9335 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9336 str_mod_check(spat, sptr, slen);
9339 if (!
NIL_P(limit) && lim <= ++i)
break;
9341 beg = ptr - str_start;
9343 else if (split_type == SPLIT_TYPE_CHARS) {
9346 if (result) result = rb_ary_new_capa(RSTRING_LEN(str));
9347 mustnot_broken(str);
9348 enc = rb_enc_get(str);
9349 while (ptr < eptr &&
9350 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9351 SPLIT_STR(ptr - str_start, n);
9353 if (!
NIL_P(limit) && lim <= ++i)
break;
9355 beg = ptr - str_start;
9359 long len = RSTRING_LEN(str);
9367 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9372 if (start == end && BEG(0) == END(0)) {
9377 else if (last_null == 1) {
9378 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9385 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9391 SPLIT_STR(beg, end-beg);
9392 beg = start = END(0);
9396 for (idx=1; idx < regs->num_regs; idx++) {
9397 if (BEG(idx) == -1)
continue;
9398 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9400 if (!
NIL_P(limit) && lim <= ++i)
break;
9402 if (match) rb_match_unbusy(match);
9404 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9405 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9408 return result ? result : str;
9418 return rb_str_split_m(1, &sep, str);
9421#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9427 rb_ary_push(ary, e);
9436#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9439chomp_newline(
const char *p,
const char *e, rb_encoding *enc)
9441 const char *prev = rb_enc_prev_char(p, e, e, enc);
9444 prev = rb_enc_prev_char(p, e, e, enc);
9445 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9457 RSTRING_LEN(rs) != 1 ||
9458 RSTRING_PTR(rs)[0] !=
'\n')) {
9464#define rb_rs get_rs()
9471 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9472 long pos,
len, rslen;
9478 static ID keywords[1];
9483 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9487 if (!ENUM_ELEM(ary, str)) {
9495 if (!RSTRING_LEN(str))
goto end;
9497 ptr = subptr = RSTRING_PTR(str);
9499 len = RSTRING_LEN(str);
9501 rslen = RSTRING_LEN(rs);
9504 enc = rb_enc_get(str);
9506 enc = rb_enc_check(str, rs);
9511 const char *eol = NULL;
9513 while (subend < pend) {
9514 long chomp_rslen = 0;
9516 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9518 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9520 if (eol == subend)
break;
9524 chomp_rslen = -rslen;
9528 if (!subptr) subptr = subend;
9532 }
while (subend < pend);
9534 if (rslen == 0) chomp_rslen = 0;
9536 subend - subptr + (chomp ? chomp_rslen : rslen));
9537 if (ENUM_ELEM(ary, line)) {
9538 str_mod_check(str, ptr,
len);
9540 subptr = eol = NULL;
9545 rsptr = RSTRING_PTR(rs);
9546 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9555 rsptr = RSTRING_PTR(rs);
9556 rslen = RSTRING_LEN(rs);
9559 while (subptr < pend) {
9560 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9564 if (hit != adjusted) {
9568 subend = hit += rslen;
9571 subend = chomp_newline(subptr, subend, enc);
9578 if (ENUM_ELEM(ary, line)) {
9579 str_mod_check(str, ptr,
len);
9584 if (subptr != pend) {
9587 pend = chomp_newline(subptr, pend, enc);
9589 else if (pend - subptr >= rslen &&
9590 memcmp(pend - rslen, rsptr, rslen) == 0) {
9595 ENUM_ELEM(ary, line);
9616rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9619 return rb_str_enumerate_lines(argc, argv, str, 0);
9632rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9634 VALUE ary = WANTARRAY(
"lines", 0);
9635 return rb_str_enumerate_lines(argc, argv, str, ary);
9649 for (i=0; i<RSTRING_LEN(str); i++) {
9650 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9668rb_str_each_byte(
VALUE str)
9671 return rb_str_enumerate_bytes(str, 0);
9683rb_str_bytes(
VALUE str)
9685 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9686 return rb_str_enumerate_bytes(str, ary);
9704 ptr = RSTRING_PTR(str);
9705 len = RSTRING_LEN(str);
9706 enc = rb_enc_get(str);
9709 for (i = 0; i <
len; i += n) {
9710 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9715 for (i = 0; i <
len; i += n) {
9716 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9737rb_str_each_char(
VALUE str)
9740 return rb_str_enumerate_chars(str, 0);
9752rb_str_chars(
VALUE str)
9755 return rb_str_enumerate_chars(str, ary);
9759rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9764 const char *ptr, *end;
9767 if (single_byte_optimizable(str))
9768 return rb_str_enumerate_bytes(str, ary);
9771 ptr = RSTRING_PTR(str);
9773 enc = STR_ENC_GET(str);
9776 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9797rb_str_each_codepoint(
VALUE str)
9800 return rb_str_enumerate_codepoints(str, 0);
9812rb_str_codepoints(
VALUE str)
9815 return rb_str_enumerate_codepoints(str, ary);
9819get_reg_grapheme_cluster(rb_encoding *enc)
9821 int encidx = rb_enc_to_index(enc);
9823 const OnigUChar source_ascii[] =
"\\X";
9824 const OnigUChar *source = source_ascii;
9825 size_t source_len =
sizeof(source_ascii) - 1;
9828#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9829#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9830#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9831#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9832#define CASE_UTF(e) \
9833 case ENCINDEX_UTF_##e: { \
9834 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9835 source = source_UTF_##e; \
9836 source_len = sizeof(source_UTF_##e); \
9839 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9847 regex_t *reg_grapheme_cluster;
9849 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9850 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9852 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9853 onig_error_code_to_str(message, r, &einfo);
9854 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9857 return reg_grapheme_cluster;
9861get_cached_reg_grapheme_cluster(rb_encoding *enc)
9863 int encidx = rb_enc_to_index(enc);
9864 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9867 if (!reg_grapheme_cluster_utf8) {
9868 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9871 return reg_grapheme_cluster_utf8;
9880 size_t grapheme_cluster_count = 0;
9881 rb_encoding *enc = get_encoding(str);
9882 const char *ptr, *end;
9884 if (!rb_enc_unicode_p(enc)) {
9888 bool cached_reg_grapheme_cluster =
true;
9889 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9890 if (!reg_grapheme_cluster) {
9891 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9892 cached_reg_grapheme_cluster =
false;
9895 ptr = RSTRING_PTR(str);
9899 OnigPosition
len = onig_match(reg_grapheme_cluster,
9900 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9901 (
const OnigUChar *)ptr, NULL, 0);
9902 if (
len <= 0)
break;
9903 grapheme_cluster_count++;
9907 if (!cached_reg_grapheme_cluster) {
9908 onig_free(reg_grapheme_cluster);
9911 return SIZET2NUM(grapheme_cluster_count);
9915rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9918 rb_encoding *enc = get_encoding(str);
9919 const char *ptr0, *ptr, *end;
9921 if (!rb_enc_unicode_p(enc)) {
9922 return rb_str_enumerate_chars(str, ary);
9927 bool cached_reg_grapheme_cluster =
true;
9928 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9929 if (!reg_grapheme_cluster) {
9930 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9931 cached_reg_grapheme_cluster =
false;
9934 ptr0 = ptr = RSTRING_PTR(str);
9938 OnigPosition
len = onig_match(reg_grapheme_cluster,
9939 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9940 (
const OnigUChar *)ptr, NULL, 0);
9941 if (
len <= 0)
break;
9946 if (!cached_reg_grapheme_cluster) {
9947 onig_free(reg_grapheme_cluster);
9967rb_str_each_grapheme_cluster(
VALUE str)
9970 return rb_str_enumerate_grapheme_clusters(str, 0);
9982rb_str_grapheme_clusters(
VALUE str)
9985 return rb_str_enumerate_grapheme_clusters(str, ary);
9989chopped_length(
VALUE str)
9991 rb_encoding *enc = STR_ENC_GET(str);
9992 const char *p, *p2, *beg, *end;
9994 beg = RSTRING_PTR(str);
9995 end = beg + RSTRING_LEN(str);
9996 if (beg >= end)
return 0;
9997 p = rb_enc_prev_char(beg, end, end, enc);
9999 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
10000 p2 = rb_enc_prev_char(beg, p, end, enc);
10001 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
10017rb_str_chop_bang(
VALUE str)
10019 str_modify_keep_cr(str);
10020 if (RSTRING_LEN(str) > 0) {
10022 len = chopped_length(str);
10023 STR_SET_LEN(str,
len);
10024 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10043rb_str_chop(
VALUE str)
10049smart_chomp(
VALUE str,
const char *e,
const char *p)
10051 rb_encoding *enc = rb_enc_get(str);
10052 if (rb_enc_mbminlen(enc) > 1) {
10057 pp = e - rb_enc_mbminlen(enc);
10060 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10068 if (--e > p && *(e-1) ==
'\r') {
10085 char *pp, *e, *rsptr;
10087 char *
const p = RSTRING_PTR(str);
10088 long len = RSTRING_LEN(str);
10090 if (
len == 0)
return 0;
10093 return smart_chomp(str, e, p);
10096 enc = rb_enc_get(str);
10099 if (rb_enc_mbminlen(enc) > 1) {
10104 pp -= rb_enc_mbminlen(enc);
10107 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10114 while (e > p && *(e-1) ==
'\n') {
10116 if (e > p && *(e-1) ==
'\r')
10122 if (rslen >
len)
return len;
10124 enc = rb_enc_get(rs);
10125 newline = rsptr[rslen-1];
10126 if (rslen == rb_enc_mbminlen(enc)) {
10128 if (newline ==
'\n')
10129 return smart_chomp(str, e, p);
10133 return smart_chomp(str, e, p);
10137 enc = rb_enc_check(str, rs);
10138 if (is_broken_string(rs)) {
10142 if (p[
len-1] == newline &&
10144 memcmp(rsptr, pp, rslen) == 0)) {
10145 if (at_char_boundary(p, pp, e, enc))
10146 return len - rslen;
10158chomp_rs(
int argc,
const VALUE *argv)
10162 VALUE rs = argv[0];
10174 long olen = RSTRING_LEN(str);
10175 long len = chompped_length(str, rs);
10176 if (
len >= olen)
return Qnil;
10177 str_modify_keep_cr(str);
10178 STR_SET_LEN(str,
len);
10179 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10196rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10199 str_modifiable(str);
10200 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10201 rs = chomp_rs(argc, argv);
10203 return rb_str_chomp_string(str, rs);
10216rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10218 VALUE rs = chomp_rs(argc, argv);
10224lstrip_offset(
VALUE str,
const char *s,
const char *e, rb_encoding *enc)
10226 const char *
const start = s;
10228 if (!s || s >= e)
return 0;
10231 if (single_byte_optimizable(str)) {
10232 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10237 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10257rb_str_lstrip_bang(
VALUE str)
10261 long olen, loffset;
10263 str_modify_keep_cr(str);
10264 enc = STR_ENC_GET(str);
10266 loffset = lstrip_offset(str, start, start+olen, enc);
10268 long len = olen-loffset;
10269 s = start + loffset;
10270 memmove(start, s,
len);
10271 STR_SET_LEN(str,
len);
10272 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10295rb_str_lstrip(
VALUE str)
10300 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10301 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10306rstrip_offset(
VALUE str,
const char *s,
const char *e, rb_encoding *enc)
10310 rb_str_check_dummy_enc(enc);
10314 if (!s || s >= e)
return 0;
10318 if (single_byte_optimizable(str)) {
10320 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10325 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10345rb_str_rstrip_bang(
VALUE str)
10349 long olen, roffset;
10351 str_modify_keep_cr(str);
10352 enc = STR_ENC_GET(str);
10354 roffset = rstrip_offset(str, start, start+olen, enc);
10356 long len = olen - roffset;
10358 STR_SET_LEN(str,
len);
10359 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10382rb_str_rstrip(
VALUE str)
10386 long olen, roffset;
10388 enc = STR_ENC_GET(str);
10390 roffset = rstrip_offset(str, start, start+olen, enc);
10392 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10408rb_str_strip_bang(
VALUE str)
10411 long olen, loffset, roffset;
10414 str_modify_keep_cr(str);
10415 enc = STR_ENC_GET(str);
10417 loffset = lstrip_offset(str, start, start+olen, enc);
10418 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10420 if (loffset > 0 || roffset > 0) {
10421 long len = olen-roffset;
10424 memmove(start, start + loffset,
len);
10426 STR_SET_LEN(str,
len);
10427 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10450rb_str_strip(
VALUE str)
10453 long olen, loffset, roffset;
10454 rb_encoding *enc = STR_ENC_GET(str);
10457 loffset = lstrip_offset(str, start, start+olen, enc);
10458 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10460 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10465scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10468 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10474 end = pos + RSTRING_LEN(pat);
10484 rb_encoding *enc = STR_ENC_GET(str);
10488 if (RSTRING_LEN(str) > end)
10489 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10498 if (!regs || regs->num_regs == 1) {
10504 for (
int i = 1; i < regs->num_regs; i++) {
10510 rb_ary_push(result, s);
10565 long last = -1, prev = 0;
10566 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10568 pat = get_pat_quoted(pat, 1);
10569 mustnot_broken(str);
10573 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10576 rb_ary_push(ary, result);
10578 if (last >= 0) rb_pat_search(pat, str, last, 1);
10583 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10587 str_mod_check(str, p,
len);
10589 if (last >= 0) rb_pat_search(pat, str, last, 1);
10613rb_str_hex(
VALUE str)
10615 return rb_str_to_inum(str, 16, FALSE);
10640rb_str_oct(
VALUE str)
10642 return rb_str_to_inum(str, -8, FALSE);
10645#ifndef HAVE_CRYPT_R
10650 rb_nativethread_lock_t lock;
10651} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10654crypt_mutex_initialize(
void)
10725# define CRYPT_END() ALLOCV_END(databuf)
10727 extern char *crypt(
const char *,
const char *);
10728# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10731 const char *s, *saltp;
10734 char salt_8bit_clean[3];
10738 mustnot_wchar(str);
10739 mustnot_wchar(salt);
10741 saltp = RSTRING_PTR(salt);
10742 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10743 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10747 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10748 salt_8bit_clean[0] = saltp[0] & 0x7f;
10749 salt_8bit_clean[1] = saltp[1] & 0x7f;
10750 salt_8bit_clean[2] =
'\0';
10751 saltp = salt_8bit_clean;
10756# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10757 data->initialized = 0;
10759 res = crypt_r(s, saltp, data);
10761 crypt_mutex_initialize();
10763 res = crypt(s, saltp);
10804 char *ptr, *p, *pend;
10807 unsigned long sum0 = 0;
10812 ptr = p = RSTRING_PTR(str);
10813 len = RSTRING_LEN(str);
10819 str_mod_check(str, ptr,
len);
10822 sum0 += (
unsigned char)*p;
10833 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10834 sum0 &= (((
unsigned long)1)<<bits)-1;
10854rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10858 long width,
len, flen = 1, fclen = 1;
10861 const char *f =
" ";
10862 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10864 int singlebyte = 1, cr;
10868 enc = STR_ENC_GET(str);
10869 termlen = rb_enc_mbminlen(enc);
10873 enc = rb_enc_check(str, pad);
10874 f = RSTRING_PTR(pad);
10875 flen = RSTRING_LEN(pad);
10876 fclen = str_strlen(pad, enc);
10877 singlebyte = single_byte_optimizable(pad);
10878 if (flen == 0 || fclen == 0) {
10879 rb_raise(rb_eArgError,
"zero width padding");
10882 len = str_strlen(str, enc);
10883 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10885 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10889 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10890 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10892 size = RSTRING_LEN(str);
10893 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10894 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10895 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10896 rb_raise(rb_eArgError,
"argument too big");
10900 p = RSTRING_PTR(res);
10902 memset(p, *f, llen);
10906 while (llen >= fclen) {
10912 memcpy(p, f, llen2);
10916 memcpy(p, RSTRING_PTR(str), size);
10919 memset(p, *f, rlen);
10923 while (rlen >= fclen) {
10929 memcpy(p, f, rlen2);
10933 TERM_FILL(p, termlen);
10934 STR_SET_LEN(res, p-RSTRING_PTR(res));
10957rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10959 return rb_str_justify(argc, argv, str,
'l');
10973rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10975 return rb_str_justify(argc, argv, str,
'r');
10990rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10992 return rb_str_justify(argc, argv, str,
'c');
11008 sep = get_pat_quoted(sep, 0);
11020 pos = rb_str_index(str, sep, 0);
11021 if (pos < 0)
goto failed;
11026 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11029 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11043 long pos = RSTRING_LEN(str);
11045 sep = get_pat_quoted(sep, 0);
11058 pos = rb_str_rindex(str, sep, pos);
11067 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11069 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11081rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11085 for (i=0; i<argc; i++) {
11086 VALUE tmp = argv[i];
11088 if (rb_reg_start_with_p(tmp, str))
11092 const char *p, *s, *e;
11097 enc = rb_enc_check(str, tmp);
11098 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11099 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11100 p = RSTRING_PTR(str);
11103 if (!at_char_right_boundary(p, s, e, enc))
11105 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11121rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11125 for (i=0; i<argc; i++) {
11126 VALUE tmp = argv[i];
11127 const char *p, *s, *e;
11132 enc = rb_enc_check(str, tmp);
11133 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11134 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11135 p = RSTRING_PTR(str);
11138 if (!at_char_boundary(p, s, e, enc))
11140 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11156deleted_prefix_length(
VALUE str,
VALUE prefix)
11158 const char *strptr, *prefixptr;
11159 long olen, prefixlen;
11160 rb_encoding *enc = rb_enc_get(str);
11164 if (!is_broken_string(prefix) ||
11165 !rb_enc_asciicompat(enc) ||
11166 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11167 enc = rb_enc_check(str, prefix);
11171 prefixlen = RSTRING_LEN(prefix);
11172 if (prefixlen <= 0)
return 0;
11173 olen = RSTRING_LEN(str);
11174 if (olen < prefixlen)
return 0;
11175 strptr = RSTRING_PTR(str);
11176 prefixptr = RSTRING_PTR(prefix);
11177 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11178 if (is_broken_string(prefix)) {
11179 if (!is_broken_string(str)) {
11183 const char *strend = strptr + olen;
11184 const char *after_prefix = strptr + prefixlen;
11185 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11205rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11208 str_modify_keep_cr(str);
11210 prefixlen = deleted_prefix_length(str, prefix);
11211 if (prefixlen <= 0)
return Qnil;
11225rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11229 prefixlen = deleted_prefix_length(str, prefix);
11230 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11232 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11245deleted_suffix_length(
VALUE str,
VALUE suffix)
11247 const char *strptr, *suffixptr;
11248 long olen, suffixlen;
11252 if (is_broken_string(suffix))
return 0;
11253 enc = rb_enc_check(str, suffix);
11256 suffixlen = RSTRING_LEN(suffix);
11257 if (suffixlen <= 0)
return 0;
11258 olen = RSTRING_LEN(str);
11259 if (olen < suffixlen)
return 0;
11260 strptr = RSTRING_PTR(str);
11261 suffixptr = RSTRING_PTR(suffix);
11262 const char *strend = strptr + olen;
11263 const char *before_suffix = strend - suffixlen;
11264 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11265 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11280rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11282 long olen, suffixlen,
len;
11283 str_modifiable(str);
11285 suffixlen = deleted_suffix_length(str, suffix);
11286 if (suffixlen <= 0)
return Qnil;
11288 olen = RSTRING_LEN(str);
11289 str_modify_keep_cr(str);
11290 len = olen - suffixlen;
11291 STR_SET_LEN(str,
len);
11292 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11308rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11312 suffixlen = deleted_suffix_length(str, suffix);
11313 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11315 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11322 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11330 val = rb_fs_check(val);
11333 "value of %"PRIsVALUE
" must be String or Regexp",
11337 rb_warn_deprecated(
"'$;'", NULL);
11354 str_modifiable(str);
11356 rb_encoding *encoding = rb_to_encoding(enc);
11357 int idx = rb_enc_to_index(encoding);
11364 rb_enc_associate_index(str, idx);
11388 if (STR_EMBED_P(str)) {
11389 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11394 str_replace_shared_without_enc(str2, str);
11396 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11429rb_str_valid_encoding_p(
VALUE str)
11449rb_str_is_ascii_only_p(
VALUE str)
11459 static const char ellipsis[] =
"...";
11460 const long ellipsislen =
sizeof(ellipsis) - 1;
11461 rb_encoding *
const enc = rb_enc_get(str);
11462 const long blen = RSTRING_LEN(str);
11463 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11464 VALUE estr, ret = 0;
11467 if (
len * rb_enc_mbminlen(enc) >= blen ||
11471 else if (
len <= ellipsislen ||
11473 if (rb_enc_asciicompat(enc)) {
11475 rb_enc_associate(ret, enc);
11482 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11487 rb_enc_from_encoding(enc), 0,
Qnil);
11494str_compat_and_valid(
VALUE str, rb_encoding *enc)
11500 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11503 rb_encoding *e = STR_ENC_GET(str);
11506 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11512static VALUE enc_str_scrub(rb_encoding *enc,
VALUE str,
VALUE repl,
int cr);
11517 rb_encoding *enc = STR_ENC_GET(str);
11522rb_enc_str_scrub(rb_encoding *enc,
VALUE str,
VALUE repl)
11525 if (enc == STR_ENC_GET(str)) {
11530 return enc_str_scrub(enc, str, repl, cr);
11534enc_str_scrub(rb_encoding *enc,
VALUE str,
VALUE repl,
int cr)
11538 const char *rep, *p, *e, *p1, *sp;
11544 rb_raise(rb_eArgError,
"both of block and replacement given");
11551 if (!
NIL_P(repl)) {
11552 repl = str_compat_and_valid(repl, enc);
11555 if (rb_enc_dummy_p(enc)) {
11558 encidx = rb_enc_to_index(enc);
11560#define DEFAULT_REPLACE_CHAR(str) do { \
11561 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11562 rep = replace; replen = (int)sizeof(replace); \
11565 slen = RSTRING_LEN(str);
11566 p = RSTRING_PTR(str);
11571 if (rb_enc_asciicompat(enc)) {
11577 else if (!
NIL_P(repl)) {
11578 rep = RSTRING_PTR(repl);
11579 replen = RSTRING_LEN(repl);
11583 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11587 DEFAULT_REPLACE_CHAR(
"?");
11592 p = search_nonascii(p, e);
11597 int ret = rb_enc_precise_mbclen(p, e, enc);
11616 if (e - p < clen) clen = e - p;
11623 for (; clen > 1; clen--) {
11624 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11635 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11636 str_mod_check(str, sp, slen);
11637 repl = str_compat_and_valid(repl, enc);
11644 p = search_nonascii(p, e);
11670 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11671 str_mod_check(str, sp, slen);
11672 repl = str_compat_and_valid(repl, enc);
11681 long mbminlen = rb_enc_mbminlen(enc);
11685 else if (!
NIL_P(repl)) {
11686 rep = RSTRING_PTR(repl);
11687 replen = RSTRING_LEN(repl);
11689 else if (encidx == ENCINDEX_UTF_16BE) {
11690 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11692 else if (encidx == ENCINDEX_UTF_16LE) {
11693 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11695 else if (encidx == ENCINDEX_UTF_32BE) {
11696 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11698 else if (encidx == ENCINDEX_UTF_32LE) {
11699 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11702 DEFAULT_REPLACE_CHAR(
"?");
11706 int ret = rb_enc_precise_mbclen(p, e, enc);
11719 if (e - p < clen) clen = e - p;
11720 if (clen <= mbminlen * 2) {
11725 for (; clen > mbminlen; clen-=mbminlen) {
11726 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11736 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11737 str_mod_check(str, sp, slen);
11738 repl = str_compat_and_valid(repl, enc);
11763 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11764 str_mod_check(str, sp, slen);
11765 repl = str_compat_and_valid(repl, enc);
11801str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11809static ID id_normalize;
11810static ID id_normalized_p;
11811static VALUE mUnicodeNormalize;
11814unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11816 static int UnicodeNormalizeRequired = 0;
11819 if (!UnicodeNormalizeRequired) {
11820 rb_require(
"unicode_normalize/normalize.rb");
11821 UnicodeNormalizeRequired = 1;
11825 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11862rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11864 return unicode_normalize_common(argc, argv, str, id_normalize);
11878rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11880 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11907rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11909 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12041#define sym_equal rb_obj_equal
12044sym_printable(
const char *s,
const char *send, rb_encoding *enc)
12048 int c = rb_enc_precise_mbclen(s, send, enc);
12052 c = rb_enc_mbc_to_codepoint(s, send, enc);
12060rb_str_symname_p(
VALUE sym)
12068 enc = STR_ENC_GET(sym);
12069 ptr = RSTRING_PTR(sym);
12070 len = RSTRING_LEN(sym);
12071 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12079rb_str_quote_unprintable(
VALUE str)
12084 rb_encoding *resenc;
12089 enc = STR_ENC_GET(str);
12090 ptr = RSTRING_PTR(str);
12091 len = RSTRING_LEN(str);
12092 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12093 !sym_printable(ptr, ptr +
len, enc)) {
12094 return rb_str_escape(str);
12100rb_id_quote_unprintable(
ID id)
12102 VALUE str = rb_id2str(
id);
12103 if (!rb_str_symname_p(str)) {
12104 return rb_str_escape(str);
12122sym_inspect(
VALUE sym)
12129 if (!rb_str_symname_p(str)) {
12131 len = RSTRING_LEN(str);
12132 rb_str_resize(str,
len + 1);
12133 dest = RSTRING_PTR(str);
12134 memmove(dest + 1, dest,
len);
12137 rb_encoding *enc = STR_ENC_GET(str);
12138 VALUE orig_str = str;
12140 len = RSTRING_LEN(orig_str);
12141 str = rb_enc_str_new(0,
len + 1, enc);
12144 ptr = RSTRING_PTR(orig_str);
12145 dest = RSTRING_PTR(str);
12146 memcpy(dest + 1, ptr,
len);
12166rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12171 rb_raise(rb_eArgError,
"no receiver given");
12268 return rb_str_match(
rb_sym2str(sym), other);
12283sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12285 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12298sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12300 return rb_str_match_m_p(argc, argv, sym);
12318 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12329sym_length(
VALUE sym)
12343sym_empty(
VALUE sym)
12377sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12393sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12409sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12423sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12425 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12438sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12440 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12452sym_encoding(
VALUE sym)
12458string_for_symbol(
VALUE name)
12463 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12477 name = string_for_symbol(name);
12478 return rb_intern_str(name);
12487 name = string_for_symbol(name);
12511 return rb_fstring(str);
12518 return register_fstring(setup_fake_str(&fake_str, ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12530 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12531 rb_enc_autoload(enc);
12535 return register_fstring(rb_setup_fake_str(&fake_str, ptr,
len, enc),
true,
false);
12539rb_enc_literal_str(
const char *ptr,
long len, rb_encoding *enc)
12541 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12542 rb_enc_autoload(enc);
12546 return register_fstring(rb_setup_fake_str(&fake_str, ptr,
len, enc),
true,
true);
12557rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12562 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12563 rb_str_buf_cat_byte(str, (
char) code);
12743 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
rb_encoding * rb_ascii8bit_encoding(void)
Queries the encoding that represents ASCII-8BIT a.k.a.
rb_encoding * rb_filesystem_encoding(void)
Queries the "filesystem" encoding.
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
rb_encoding * rb_locale_encoding(void)
Queries the encoding that represents the current locale.
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_ary_new(void)
Allocates a new, empty array.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_hash_new(void)
Creates a new, empty hash object.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "defaultexternal" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "defaultexternal" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
int st_foreach(st_table *q, int_type *w, st_data_t e)
Iteration over the given table.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
struct rb_data_type_struct rb_data_type_t
This is the struct that holds necessary info for a struct.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
long len
Length of the string, not including terminating NUL character.
union RString::@157025041137035241047331270155043025061071337053::@157067065136062356112324002106172053054013023024::@365170260060164113275356137374160141226332013204 aux
Auxiliary info.
struct RString::@157025041137035241047331270155043025061071337053::@153056146250355212360325351117351053336274231135 embed
Embedded contents.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
union RString::@157025041137035241047331270155043025061071337053 as
String's specific fields.
struct RString::@157025041137035241047331270155043025061071337053::@157067065136062356112324002106172053054013023024 heap
Strings that use separated memory region for contents use this pattern.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.