Ruby 3.4.2p28 (2025-02-15 revision d2930f8e7a5db8a7337fa43370940381b420cc3e)
symbol.c
1/**********************************************************************
2
3 symbol.h -
4
5 $Author$
6 created at: Tue Jul 8 15:49:54 JST 2014
7
8 Copyright (C) 2014 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "internal.h"
13#include "internal/error.h"
14#include "internal/gc.h"
15#include "internal/hash.h"
16#include "internal/object.h"
17#include "internal/symbol.h"
18#include "internal/vm.h"
19#include "probes.h"
20#include "ruby/encoding.h"
21#include "ruby/st.h"
22#include "symbol.h"
23#include "vm_sync.h"
24#include "builtin.h"
25
26#if defined(USE_SYMBOL_GC) && !(USE_SYMBOL_GC+0)
27# undef USE_SYMBOL_GC
28# define USE_SYMBOL_GC 0
29#else
30# undef USE_SYMBOL_GC
31# define USE_SYMBOL_GC 1
32#endif
33#if defined(SYMBOL_DEBUG) && (SYMBOL_DEBUG+0)
34# undef SYMBOL_DEBUG
35# define SYMBOL_DEBUG 1
36#else
37# undef SYMBOL_DEBUG
38# define SYMBOL_DEBUG 0
39#endif
40#ifndef CHECK_ID_SERIAL
41# define CHECK_ID_SERIAL SYMBOL_DEBUG
42#endif
43
44#define SYMBOL_PINNED_P(sym) (RSYMBOL(sym)->id&~ID_SCOPE_MASK)
45
46#define STATIC_SYM2ID(sym) RSHIFT((VALUE)(sym), RUBY_SPECIAL_SHIFT)
47
48static ID register_static_symid(ID, const char *, long, rb_encoding *);
49static ID register_static_symid_str(ID, VALUE);
50#define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc)
51#include "id.c"
52
53#define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
54
55#define op_tbl_count numberof(op_tbl)
56STATIC_ASSERT(op_tbl_name_size, sizeof(op_tbl[0].name) == 3);
57#define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3)
58
59static void
60Init_op_tbl(void)
61{
62 int i;
63 rb_encoding *const enc = rb_usascii_encoding();
64
65 for (i = '!'; i <= '~'; ++i) {
66 if (!ISALNUM(i) && i != '_') {
67 char c = (char)i;
68 register_static_symid(i, &c, 1, enc);
69 }
70 }
71 for (i = 0; i < op_tbl_count; ++i) {
72 register_static_symid(op_tbl[i].token, op_tbl[i].name, op_tbl_len(i), enc);
73 }
74}
75
76static const int ID_ENTRY_UNIT = 512;
77
78enum id_entry_type {
79 ID_ENTRY_STR,
80 ID_ENTRY_SYM,
81 ID_ENTRY_SIZE
82};
83
84rb_symbols_t ruby_global_symbols = {tNEXT_ID-1};
85
86static const struct st_hash_type symhash = {
89};
90
91void
92Init_sym(void)
93{
94 rb_symbols_t *symbols = &ruby_global_symbols;
95
96 VALUE dsym_fstrs = rb_ident_hash_new();
97 symbols->dsymbol_fstr_hash = dsym_fstrs;
98 rb_vm_register_global_object(dsym_fstrs);
99 rb_obj_hide(dsym_fstrs);
100
101 symbols->str_sym = st_init_table_with_size(&symhash, 1000);
102 symbols->ids = rb_ary_hidden_new(0);
103 rb_vm_register_global_object(symbols->ids);
104
105 Init_op_tbl();
106 Init_id();
107}
108
109WARN_UNUSED_RESULT(static VALUE dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding *const enc, const ID type));
110WARN_UNUSED_RESULT(static VALUE dsymbol_check(rb_symbols_t *symbols, const VALUE sym));
111WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str));
112WARN_UNUSED_RESULT(static VALUE lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str));
113WARN_UNUSED_RESULT(static VALUE lookup_str_sym(const VALUE str));
114WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id));
115WARN_UNUSED_RESULT(static ID intern_str(VALUE str, int mutable));
116
117#define GLOBAL_SYMBOLS_ENTER(symbols) rb_symbols_t *symbols = &ruby_global_symbols; RB_VM_LOCK_ENTER()
118#define GLOBAL_SYMBOLS_LEAVE() RB_VM_LOCK_LEAVE()
119
120ID
121rb_id_attrset(ID id)
122{
123 VALUE str, sym;
124 int scope;
125
126 if (!is_notop_id(id)) {
127 switch (id) {
128 case tAREF: case tASET:
129 return tASET; /* only exception */
130 }
131 rb_name_error(id, "cannot make operator ID :%"PRIsVALUE" attrset",
132 rb_id2str(id));
133 }
134 else {
135 scope = id_type(id);
136 switch (scope) {
137 case ID_LOCAL: case ID_INSTANCE: case ID_GLOBAL:
138 case ID_CONST: case ID_CLASS: case ID_JUNK:
139 break;
140 case ID_ATTRSET:
141 return id;
142 default:
143 {
144 if ((str = lookup_id_str(id)) != 0) {
145 rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset",
146 scope, str);
147 }
148 else {
149 rb_name_error_str(Qnil, "cannot make unknown type anonymous ID %d:%"PRIxVALUE" attrset",
150 scope, (VALUE)id);
151 }
152 }
153 }
154 }
155
156 /* make new symbol and ID */
157 if (!(str = lookup_id_str(id))) {
158 static const char id_types[][8] = {
159 "local",
160 "instance",
161 "invalid",
162 "global",
163 "attrset",
164 "const",
165 "class",
166 "junk",
167 };
168 rb_name_error(id, "cannot make anonymous %.*s ID %"PRIxVALUE" attrset",
169 (int)sizeof(id_types[0]), id_types[scope], (VALUE)id);
170 }
171 str = rb_str_dup(str);
172 rb_str_cat(str, "=", 1);
173 sym = lookup_str_sym(str);
174 id = sym ? rb_sym2id(sym) : intern_str(str, 1);
175 return id;
176}
177
178static int
179is_special_global_name(const char *m, const char *e, rb_encoding *enc)
180{
181 int mb = 0;
182
183 if (m >= e) return 0;
184 if (is_global_name_punct(*m)) {
185 ++m;
186 }
187 else if (*m == '-') {
188 if (++m >= e) return 0;
189 if (is_identchar(m, e, enc)) {
190 if (!ISASCII(*m)) mb = 1;
191 m += rb_enc_mbclen(m, e, enc);
192 }
193 }
194 else {
195 if (!ISDIGIT(*m)) return 0;
196 do {
197 if (!ISASCII(*m)) mb = 1;
198 ++m;
199 } while (m < e && ISDIGIT(*m));
200 }
201 return m == e ? mb + 1 : 0;
202}
203
204int
205rb_symname_p(const char *name)
206{
207 return rb_enc_symname_p(name, rb_ascii8bit_encoding());
208}
209
210int
211rb_enc_symname_p(const char *name, rb_encoding *enc)
212{
213 return rb_enc_symname2_p(name, strlen(name), enc);
214}
215
216static int
217rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
218{
219 int c, len;
220 const char *end = name + nlen;
221
222 if (nlen < 1) return FALSE;
223 if (ISASCII(*name)) return ISUPPER(*name);
224 c = rb_enc_precise_mbclen(name, end, enc);
225 if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
227 c = rb_enc_mbc_to_codepoint(name, end, enc);
228 if (rb_enc_isupper(c, enc)) return TRUE;
229 if (rb_enc_islower(c, enc)) return FALSE;
230 if (ONIGENC_IS_UNICODE(enc)) {
231 static int ctype_titlecase = 0;
232 if (!ctype_titlecase) {
233 static const UChar cname[] = "titlecaseletter";
234 static const UChar *const end = cname + sizeof(cname) - 1;
235 ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
236 }
237 if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
238 }
239 else {
240 /* fallback to case-folding */
241 OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
242 const OnigUChar *beg = (const OnigUChar *)name;
243 int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
244 &beg, (const OnigUChar *)end,
245 fold, enc);
246 if (r > 0 && (r != len || memcmp(fold, name, r)))
247 return TRUE;
248 }
249 return FALSE;
250}
251
252#define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
253#define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
254
256 const enum { invalid, stophere, needmore, } kind;
257 const enum ruby_id_types type;
258 const long nread;
259};
260
261#define t struct enc_synmane_type_leading_chars_tag
262
264enc_synmane_type_leading_chars(const char *name, long len, rb_encoding *enc, int allowed_attrset)
265{
266 const char *m = name;
267 const char *e = m + len;
268
269 if (! rb_enc_asciicompat(enc)) {
270 return (t) { invalid, 0, 0, };
271 }
272 else if (! m) {
273 return (t) { invalid, 0, 0, };
274 }
275 else if ( len <= 0 ) {
276 return (t) { invalid, 0, 0, };
277 }
278 switch (*m) {
279 case '\0':
280 return (t) { invalid, 0, 0, };
281
282 case '$':
283 if (is_special_global_name(++m, e, enc)) {
284 return (t) { stophere, ID_GLOBAL, len, };
285 }
286 else {
287 return (t) { needmore, ID_GLOBAL, 1, };
288 }
289
290 case '@':
291 switch (*++m) {
292 default: return (t) { needmore, ID_INSTANCE, 1, };
293 case '@': return (t) { needmore, ID_CLASS, 2, };
294 }
295
296 case '<':
297 switch (*++m) {
298 default: return (t) { stophere, ID_JUNK, 1, };
299 case '<': return (t) { stophere, ID_JUNK, 2, };
300 case '=':
301 switch (*++m) {
302 default: return (t) { stophere, ID_JUNK, 2, };
303 case '>': return (t) { stophere, ID_JUNK, 3, };
304 }
305 }
306
307 case '>':
308 switch (*++m) {
309 default: return (t) { stophere, ID_JUNK, 1, };
310 case '>': case '=': return (t) { stophere, ID_JUNK, 2, };
311 }
312
313 case '=':
314 switch (*++m) {
315 default: return (t) { invalid, 0, 1, };
316 case '~': return (t) { stophere, ID_JUNK, 2, };
317 case '=':
318 switch (*++m) {
319 default: return (t) { stophere, ID_JUNK, 2, };
320 case '=': return (t) { stophere, ID_JUNK, 3, };
321 }
322 }
323
324 case '*':
325 switch (*++m) {
326 default: return (t) { stophere, ID_JUNK, 1, };
327 case '*': return (t) { stophere, ID_JUNK, 2, };
328 }
329
330 case '+': case '-':
331 switch (*++m) {
332 default: return (t) { stophere, ID_JUNK, 1, };
333 case '@': return (t) { stophere, ID_JUNK, 2, };
334 }
335
336 case '|': case '^': case '&': case '/': case '%': case '~': case '`':
337 return (t) { stophere, ID_JUNK, 1, };
338
339 case '[':
340 switch (*++m) {
341 default: return (t) { needmore, ID_JUNK, 0, };
342 case ']':
343 switch (*++m) {
344 default: return (t) { stophere, ID_JUNK, 2, };
345 case '=': return (t) { stophere, ID_JUNK, 3, };
346 }
347 }
348
349 case '!':
350 switch (*++m) {
351 case '=': case '~': return (t) { stophere, ID_JUNK, 2, };
352 default:
353 if (allowed_attrset & (1U << ID_JUNK)) {
354 return (t) { needmore, ID_JUNK, 1, };
355 }
356 else {
357 return (t) { stophere, ID_JUNK, 1, };
358 }
359 }
360
361 default:
362 if (rb_sym_constant_char_p(name, len, enc)) {
363 return (t) { needmore, ID_CONST, 0, };
364 }
365 else {
366 return (t) { needmore, ID_LOCAL, 0, };
367 }
368 }
369}
370#undef t
371
372int
373rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int allowed_attrset)
374{
376 enc_synmane_type_leading_chars(name, len, enc, allowed_attrset);
377 const char *m = name + f.nread;
378 const char *e = name + len;
379 int type = (int)f.type;
380
381 switch (f.kind) {
382 case invalid: return -1;
383 case stophere: break;
384 case needmore:
385
386 if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
387 if (len > 1 && *(e-1) == '=') {
388 type = rb_enc_symname_type(name, len-1, enc, allowed_attrset);
389 if (allowed_attrset & (1U << type)) return ID_ATTRSET;
390 }
391 return -1;
392 }
393 while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
394 if (m >= e) break;
395 switch (*m) {
396 case '!': case '?':
397 if (type == ID_GLOBAL || type == ID_CLASS || type == ID_INSTANCE) return -1;
398 type = ID_JUNK;
399 ++m;
400 if (m + 1 < e || *m != '=') break;
401 /* fall through */
402 case '=':
403 if (!(allowed_attrset & (1U << type))) return -1;
404 type = ID_ATTRSET;
405 ++m;
406 break;
407 }
408 }
409
410 return m == e ? type : -1;
411}
412
413int
414rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
415{
416 return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1;
417}
418
419static int
420rb_str_symname_type(VALUE name, unsigned int allowed_attrset)
421{
422 const char *ptr = StringValuePtr(name);
423 long len = RSTRING_LEN(name);
424 int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset);
425 RB_GC_GUARD(name);
426 return type;
427}
428
429static void
430set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym)
431{
432 ASSERT_vm_locking();
435
436 size_t idx = num / ID_ENTRY_UNIT;
437
438 VALUE ary, ids = symbols->ids;
439 if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
440 ary = rb_ary_hidden_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE);
441 rb_ary_store(ids, (long)idx, ary);
442 }
443 idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
444 rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str);
445 rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym);
446}
447
448static VALUE
449get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t)
450{
451 VALUE result = 0;
452
453 GLOBAL_SYMBOLS_ENTER(symbols);
454 {
455 if (num && num <= symbols->last_id) {
456 size_t idx = num / ID_ENTRY_UNIT;
457 VALUE ids = symbols->ids;
458 VALUE ary;
459 if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
460 long pos = (long)(num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
461 result = rb_ary_entry(ary, pos + t);
462
463 if (NIL_P(result)) {
464 result = 0;
465 }
466 else if (CHECK_ID_SERIAL) {
467 if (id) {
468 VALUE sym = result;
469 if (t != ID_ENTRY_SYM)
470 sym = rb_ary_entry(ary, pos + ID_ENTRY_SYM);
471 if (STATIC_SYM_P(sym)) {
472 if (STATIC_SYM2ID(sym) != id) result = 0;
473 }
474 else {
475 if (RSYMBOL(sym)->id != id) result = 0;
476 }
477 }
478 }
479 }
480 }
481 }
482 GLOBAL_SYMBOLS_LEAVE();
483
484 if (result) {
485 switch (t) {
486 case ID_ENTRY_STR:
488 break;
489 case ID_ENTRY_SYM:
491 break;
492 default:
493 break;
494 }
495 }
496
497 return result;
498}
499
500static VALUE
501get_id_entry(ID id, const enum id_entry_type t)
502{
503 return get_id_serial_entry(rb_id_to_serial(id), id, t);
504}
505
506int
507rb_static_id_valid_p(ID id)
508{
509 return STATIC_ID2SYM(id) == get_id_entry(id, ID_ENTRY_SYM);
510}
511
512static inline ID
513rb_id_serial_to_id(rb_id_serial_t num)
514{
515 if (is_notop_id((ID)num)) {
516 VALUE sym = get_id_serial_entry(num, 0, ID_ENTRY_SYM);
517 if (sym) return SYM2ID(sym);
518 return ((ID)num << ID_SCOPE_SHIFT) | ID_INTERNAL | ID_STATIC_SYM;
519 }
520 else {
521 return (ID)num;
522 }
523}
524
525static int
526register_sym_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
527{
528 if (existing) {
529 rb_fatal("symbol :% "PRIsVALUE" is already registered with %"PRIxVALUE,
530 (VALUE)*key, (VALUE)*value);
531 }
532 *value = arg;
533 return ST_CONTINUE;
534}
535
536static void
537register_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
538{
539 ASSERT_vm_locking();
540
541 if (SYMBOL_DEBUG) {
542 st_update(symbols->str_sym, (st_data_t)str,
543 register_sym_update_callback, (st_data_t)sym);
544 }
545 else {
546 st_add_direct(symbols->str_sym, (st_data_t)str, (st_data_t)sym);
547 }
548}
549
550void
551rb_free_static_symid_str(void)
552{
553 GLOBAL_SYMBOLS_ENTER(symbols)
554 {
555 st_free_table(symbols->str_sym);
556 }
557 GLOBAL_SYMBOLS_LEAVE();
558}
559
560static void
561unregister_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
562{
563 ASSERT_vm_locking();
564
565 st_data_t str_data = (st_data_t)str;
566 if (!st_delete(symbols->str_sym, &str_data, NULL)) {
567 rb_bug("%p can't remove str from str_id (%s)", (void *)sym, RSTRING_PTR(str));
568 }
569}
570
571static ID
572register_static_symid(ID id, const char *name, long len, rb_encoding *enc)
573{
574 VALUE str = rb_enc_str_new(name, len, enc);
575 return register_static_symid_str(id, str);
576}
577
578static ID
579register_static_symid_str(ID id, VALUE str)
580{
581 rb_id_serial_t num = rb_id_to_serial(id);
582 VALUE sym = STATIC_ID2SYM(id);
583
584 OBJ_FREEZE(str);
585 str = rb_fstring(str);
586
587 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(str));
588
589 GLOBAL_SYMBOLS_ENTER(symbols)
590 {
591 register_sym(symbols, str, sym);
592 set_id_entry(symbols, num, str, sym);
593 }
594 GLOBAL_SYMBOLS_LEAVE();
595
596 return id;
597}
598
599static int
600sym_check_asciionly(VALUE str, bool fake_str)
601{
602 if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
603 switch (rb_enc_str_coderange(str)) {
605 if (fake_str) {
606 str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str));
607 }
608 rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE,
609 rb_enc_name(rb_enc_get(str)), str);
611 return TRUE;
612 }
613 return FALSE;
614}
615
616#if 0
617/*
618 * _str_ itself will be registered at the global symbol table. _str_
619 * can be modified before the registration, since the encoding will be
620 * set to ASCII-8BIT if it is a special global name.
621 */
622
623static inline void
624must_be_dynamic_symbol(VALUE x)
625{
626 if (UNLIKELY(!DYNAMIC_SYM_P(x))) {
627 if (STATIC_SYM_P(x)) {
628 VALUE str = lookup_id_str(RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT));
629
630 if (str) {
631 rb_bug("wrong argument: %s (inappropriate Symbol)", RSTRING_PTR(str));
632 }
633 else {
634 rb_bug("wrong argument: inappropriate Symbol (%p)", (void *)x);
635 }
636 }
637 else {
638 rb_bug("wrong argument type %s (expected Symbol)", rb_builtin_class_name(x));
639 }
640 }
641}
642#endif
643
644static VALUE
645dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding * const enc, const ID type)
646{
647 ASSERT_vm_locking();
648
649 NEWOBJ_OF(obj, struct RSymbol, klass, T_SYMBOL | FL_WB_PROTECTED, sizeof(struct RSymbol), 0);
650
651 long hashval;
652
653 rb_enc_set_index((VALUE)obj, rb_enc_to_index(enc));
654 OBJ_FREEZE((VALUE)obj);
655 RB_OBJ_WRITE((VALUE)obj, &obj->fstr, str);
656 obj->id = type;
657
658 /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */
659 hashval = (long)rb_str_hash(str);
660 obj->hashval = RSHIFT((long)hashval, 1);
661 register_sym(symbols, str, (VALUE)obj);
662 rb_hash_aset(symbols->dsymbol_fstr_hash, str, Qtrue);
663 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(obj->fstr));
664
665 return (VALUE)obj;
666}
667
668static inline VALUE
669dsymbol_check(rb_symbols_t *symbols, const VALUE sym)
670{
671 ASSERT_vm_locking();
672
673 if (UNLIKELY(rb_objspace_garbage_object_p(sym))) {
674 const VALUE fstr = RSYMBOL(sym)->fstr;
675 const ID type = RSYMBOL(sym)->id & ID_SCOPE_MASK;
676 RSYMBOL(sym)->fstr = 0;
677 unregister_sym(symbols, fstr, sym);
678 return dsymbol_alloc(symbols, rb_cSymbol, fstr, rb_enc_get(fstr), type);
679 }
680 else {
681 return sym;
682 }
683}
684
685static ID
686lookup_str_id(VALUE str)
687{
688 st_data_t sym_data;
689 int found;
690
691 GLOBAL_SYMBOLS_ENTER(symbols);
692 {
693 found = st_lookup(symbols->str_sym, (st_data_t)str, &sym_data);
694 }
695 GLOBAL_SYMBOLS_LEAVE();
696
697 if (found) {
698 const VALUE sym = (VALUE)sym_data;
699
700 if (STATIC_SYM_P(sym)) {
701 return STATIC_SYM2ID(sym);
702 }
703 else if (DYNAMIC_SYM_P(sym)) {
704 ID id = RSYMBOL(sym)->id;
705 if (id & ~ID_SCOPE_MASK) return id;
706 }
707 else {
708 rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table",
709 rb_builtin_class_name(sym), sym, str);
710 }
711 }
712 return (ID)0;
713}
714
715static VALUE
716lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str)
717{
718 st_data_t sym_data;
719 if (st_lookup(symbols->str_sym, (st_data_t)str, &sym_data)) {
720 VALUE sym = (VALUE)sym_data;
721 if (DYNAMIC_SYM_P(sym)) {
722 sym = dsymbol_check(symbols, sym);
723 }
724 return sym;
725 }
726 else {
727 return Qfalse;
728 }
729}
730
731static VALUE
732lookup_str_sym(const VALUE str)
733{
734 VALUE sym;
735
736 GLOBAL_SYMBOLS_ENTER(symbols);
737 {
738 sym = lookup_str_sym_with_lock(symbols, str);
739 }
740 GLOBAL_SYMBOLS_LEAVE();
741
742 return sym;
743}
744
745static VALUE
746lookup_id_str(ID id)
747{
748 return get_id_entry(id, ID_ENTRY_STR);
749}
750
751ID
752rb_intern3(const char *name, long len, rb_encoding *enc)
753{
754 VALUE sym;
755 struct RString fake_str;
756 VALUE str = rb_setup_fake_str(&fake_str, name, len, enc);
757 OBJ_FREEZE(str);
758 sym = lookup_str_sym(str);
759 if (sym) return rb_sym2id(sym);
760 str = rb_enc_str_new(name, len, enc); /* make true string */
761 return intern_str(str, 1);
762}
763
764static ID
765next_id_base_with_lock(rb_symbols_t *symbols)
766{
767 ID id;
768 rb_id_serial_t next_serial = symbols->last_id + 1;
769
770 if (next_serial == 0) {
771 id = (ID)-1;
772 }
773 else {
774 const size_t num = ++symbols->last_id;
775 id = num << ID_SCOPE_SHIFT;
776 }
777
778 return id;
779}
780
781static ID
782next_id_base(void)
783{
784 ID id;
785 GLOBAL_SYMBOLS_ENTER(symbols);
786 {
787 id = next_id_base_with_lock(symbols);
788 }
789 GLOBAL_SYMBOLS_LEAVE();
790 return id;
791}
792
793static ID
794intern_str(VALUE str, int mutable)
795{
796 ID id;
797 ID nid;
798
799 id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
800 if (id == (ID)-1) id = ID_JUNK;
801 if (sym_check_asciionly(str, false)) {
802 if (!mutable) str = rb_str_dup(str);
803 rb_enc_associate(str, rb_usascii_encoding());
804 }
805 if ((nid = next_id_base()) == (ID)-1) {
806 str = rb_str_ellipsize(str, 20);
807 rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")",
808 str);
809 }
810 id |= nid;
811 id |= ID_STATIC_SYM;
812 return register_static_symid_str(id, str);
813}
814
815ID
816rb_intern2(const char *name, long len)
817{
818 return rb_intern3(name, len, rb_usascii_encoding());
819}
820
821#undef rb_intern
822ID
823rb_intern(const char *name)
824{
825 return rb_intern2(name, strlen(name));
826}
827
828ID
829rb_intern_str(VALUE str)
830{
831 VALUE sym = lookup_str_sym(str);
832
833 if (sym) {
834 return SYM2ID(sym);
835 }
836
837 return intern_str(str, 0);
838}
839
840void
841rb_gc_free_dsymbol(VALUE sym)
842{
843 VALUE str = RSYMBOL(sym)->fstr;
844
845 if (str) {
846 RSYMBOL(sym)->fstr = 0;
847
848 GLOBAL_SYMBOLS_ENTER(symbols);
849 {
850 unregister_sym(symbols, str, sym);
851 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, str);
852 }
853 GLOBAL_SYMBOLS_LEAVE();
854 }
855}
856
857/*
858 * call-seq:
859 * str.intern -> symbol
860 * str.to_sym -> symbol
861 *
862 * Returns the +Symbol+ corresponding to <i>str</i>, creating the
863 * symbol if it did not previously exist. See Symbol#id2name.
864 *
865 * "Koala".intern #=> :Koala
866 * s = 'cat'.to_sym #=> :cat
867 * s == :cat #=> true
868 * s = '@cat'.to_sym #=> :@cat
869 * s == :@cat #=> true
870 *
871 * This can also be used to create symbols that cannot be represented using the
872 * <code>:xxx</code> notation.
873 *
874 * 'cat and dog'.to_sym #=> :"cat and dog"
875 */
876
877VALUE
879{
880 VALUE sym;
881
882 GLOBAL_SYMBOLS_ENTER(symbols);
883 {
884 sym = lookup_str_sym_with_lock(symbols, str);
885
886 if (sym) {
887 // ok
888 }
889 else if (USE_SYMBOL_GC) {
890 rb_encoding *enc = rb_enc_get(str);
891 rb_encoding *ascii = rb_usascii_encoding();
892 if (enc != ascii && sym_check_asciionly(str, false)) {
893 str = rb_str_dup(str);
894 rb_enc_associate(str, ascii);
895 OBJ_FREEZE(str);
896 enc = ascii;
897 }
898 else {
899 str = rb_str_dup(str);
900 OBJ_FREEZE(str);
901 }
902 str = rb_fstring(str);
903 int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
904 if (type < 0) type = ID_JUNK;
905 sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
906 }
907 else {
908 ID id = intern_str(str, 0);
909 sym = ID2SYM(id);
910 }
911 }
912 GLOBAL_SYMBOLS_LEAVE();
913 return sym;
914}
915
916ID
918{
919 ID id;
920 if (STATIC_SYM_P(sym)) {
921 id = STATIC_SYM2ID(sym);
922 }
923 else if (DYNAMIC_SYM_P(sym)) {
924 GLOBAL_SYMBOLS_ENTER(symbols);
925 {
926 sym = dsymbol_check(symbols, sym);
927 id = RSYMBOL(sym)->id;
928
929 if (UNLIKELY(!(id & ~ID_SCOPE_MASK))) {
930 VALUE fstr = RSYMBOL(sym)->fstr;
931 ID num = next_id_base_with_lock(symbols);
932
933 RSYMBOL(sym)->id = id |= num;
934 /* make it permanent object */
935
936 set_id_entry(symbols, rb_id_to_serial(num), fstr, sym);
937 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, fstr);
938 }
939 }
940 GLOBAL_SYMBOLS_LEAVE();
941 }
942 else {
943 rb_raise(rb_eTypeError, "wrong argument type %s (expected Symbol)",
944 rb_builtin_class_name(sym));
945 }
946 return id;
947}
948
949#undef rb_id2sym
950VALUE
952{
953 if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x);
954 return get_id_entry(x, ID_ENTRY_SYM);
955}
956
957/*
958 * call-seq:
959 * name -> string
960 *
961 * Returns a frozen string representation of +self+ (not including the leading colon):
962 *
963 * :foo.name # => "foo"
964 * :foo.name.frozen? # => true
965 *
966 * Related: Symbol#to_s, Symbol#inspect.
967 */
968
969VALUE
971{
972 VALUE str;
973 if (DYNAMIC_SYM_P(sym)) {
974 str = RSYMBOL(sym)->fstr;
976 }
977 else {
978 str = rb_id2str(STATIC_SYM2ID(sym));
979 if (str) RUBY_ASSERT_BUILTIN_TYPE(str, T_STRING);
980 }
981
982 return str;
983}
984
985VALUE
986rb_id2str(ID id)
987{
988 return lookup_id_str(id);
989}
990
991const char *
992rb_id2name(ID id)
993{
994 VALUE str = rb_id2str(id);
995
996 if (!str) return 0;
997 return RSTRING_PTR(str);
998}
999
1000ID
1001rb_make_internal_id(void)
1002{
1003 return next_id_base() | ID_INTERNAL | ID_STATIC_SYM;
1004}
1005
1006ID
1007rb_make_temporary_id(size_t n)
1008{
1009 const ID max_id = RB_ID_SERIAL_MAX & ~0xffff;
1010 const ID id = max_id - (ID)n;
1011 if (id <= ruby_global_symbols.last_id) {
1012 rb_raise(rb_eRuntimeError, "too big to make temporary ID: %" PRIdSIZE, n);
1013 }
1014 return (id << ID_SCOPE_SHIFT) | ID_STATIC_SYM | ID_INTERNAL;
1015}
1016
1017static int
1018symbols_i(st_data_t key, st_data_t value, st_data_t arg)
1019{
1020 VALUE ary = (VALUE)arg;
1021 VALUE sym = (VALUE)value;
1022
1023 if (STATIC_SYM_P(sym)) {
1024 rb_ary_push(ary, sym);
1025 return ST_CONTINUE;
1026 }
1027 else if (!DYNAMIC_SYM_P(sym)) {
1028 rb_bug("invalid symbol: %s", RSTRING_PTR((VALUE)key));
1029 }
1030 else if (!SYMBOL_PINNED_P(sym) && rb_objspace_garbage_object_p(sym)) {
1031 RSYMBOL(sym)->fstr = 0;
1032 return ST_DELETE;
1033 }
1034 else {
1035 rb_ary_push(ary, sym);
1036 return ST_CONTINUE;
1037 }
1038
1039}
1040
1041VALUE
1043{
1044 VALUE ary;
1045
1046 GLOBAL_SYMBOLS_ENTER(symbols);
1047 {
1048 ary = rb_ary_new2(symbols->str_sym->num_entries);
1049 st_foreach(symbols->str_sym, symbols_i, ary);
1050 }
1051 GLOBAL_SYMBOLS_LEAVE();
1052
1053 return ary;
1054}
1055
1056size_t
1057rb_sym_immortal_count(void)
1058{
1059 return (size_t)ruby_global_symbols.last_id;
1060}
1061
1062int
1064{
1065 return is_const_id(id);
1066}
1067
1068int
1070{
1071 return is_class_id(id);
1072}
1073
1074int
1076{
1077 return is_global_id(id);
1078}
1079
1080int
1082{
1083 return is_instance_id(id);
1084}
1085
1086int
1088{
1089 return is_attrset_id(id);
1090}
1091
1092int
1094{
1095 return is_local_id(id);
1096}
1097
1098int
1100{
1101 return is_junk_id(id);
1102}
1103
1104int
1105rb_is_const_sym(VALUE sym)
1106{
1107 return is_const_sym(sym);
1108}
1109
1110int
1111rb_is_attrset_sym(VALUE sym)
1112{
1113 return is_attrset_sym(sym);
1114}
1115
1116ID
1117rb_check_id(volatile VALUE *namep)
1118{
1119 VALUE tmp;
1120 VALUE name = *namep;
1121
1122 if (STATIC_SYM_P(name)) {
1123 return STATIC_SYM2ID(name);
1124 }
1125 else if (DYNAMIC_SYM_P(name)) {
1126 if (SYMBOL_PINNED_P(name)) {
1127 return RSYMBOL(name)->id;
1128 }
1129 else {
1130 *namep = RSYMBOL(name)->fstr;
1131 return 0;
1132 }
1133 }
1134 else if (!RB_TYPE_P(name, T_STRING)) {
1135 tmp = rb_check_string_type(name);
1136 if (NIL_P(tmp)) {
1137 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1138 name);
1139 }
1140 name = tmp;
1141 *namep = name;
1142 }
1143
1144 sym_check_asciionly(name, false);
1145
1146 return lookup_str_id(name);
1147}
1148
1149// Used by yjit for handling .send without throwing exceptions
1150ID
1151rb_get_symbol_id(VALUE name)
1152{
1153 if (STATIC_SYM_P(name)) {
1154 return STATIC_SYM2ID(name);
1155 }
1156 else if (DYNAMIC_SYM_P(name)) {
1157 if (SYMBOL_PINNED_P(name)) {
1158 return RSYMBOL(name)->id;
1159 }
1160 else {
1161 return 0;
1162 }
1163 }
1164 else if (RB_TYPE_P(name, T_STRING)) {
1165 return lookup_str_id(name);
1166 }
1167 else {
1168 return 0;
1169 }
1170}
1171
1172
1173VALUE
1174rb_check_symbol(volatile VALUE *namep)
1175{
1176 VALUE sym;
1177 VALUE tmp;
1178 VALUE name = *namep;
1179
1180 if (STATIC_SYM_P(name)) {
1181 return name;
1182 }
1183 else if (DYNAMIC_SYM_P(name)) {
1184 if (!SYMBOL_PINNED_P(name)) {
1185 GLOBAL_SYMBOLS_ENTER(symbols);
1186 {
1187 name = dsymbol_check(symbols, name);
1188 }
1189 GLOBAL_SYMBOLS_LEAVE();
1190
1191 *namep = name;
1192 }
1193 return name;
1194 }
1195 else if (!RB_TYPE_P(name, T_STRING)) {
1196 tmp = rb_check_string_type(name);
1197 if (NIL_P(tmp)) {
1198 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1199 name);
1200 }
1201 name = tmp;
1202 *namep = name;
1203 }
1204
1205 sym_check_asciionly(name, false);
1206
1207 if ((sym = lookup_str_sym(name)) != 0) {
1208 return sym;
1209 }
1210
1211 return Qnil;
1212}
1213
1214ID
1215rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
1216{
1217 struct RString fake_str;
1218 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1219
1220 sym_check_asciionly(name, true);
1221
1222 return lookup_str_id(name);
1223}
1224
1225VALUE
1226rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
1227{
1228 VALUE sym;
1229 struct RString fake_str;
1230 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1231
1232 sym_check_asciionly(name, true);
1233
1234 if ((sym = lookup_str_sym(name)) != 0) {
1235 return sym;
1236 }
1237
1238 return Qnil;
1239}
1240
1241#undef rb_sym_intern_ascii_cstr
1242#ifdef __clang__
1243NOINLINE(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1244#else
1245FUNC_MINIMIZED(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1246FUNC_MINIMIZED(VALUE rb_sym_intern_ascii(const char *ptr, long len));
1247FUNC_MINIMIZED(VALUE rb_sym_intern_ascii_cstr(const char *ptr));
1248#endif
1249
1250VALUE
1251rb_sym_intern(const char *ptr, long len, rb_encoding *enc)
1252{
1253 struct RString fake_str;
1254 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1255 return rb_str_intern(name);
1256}
1257
1258VALUE
1259rb_sym_intern_ascii(const char *ptr, long len)
1260{
1261 return rb_sym_intern(ptr, len, rb_usascii_encoding());
1262}
1263
1264VALUE
1265rb_sym_intern_ascii_cstr(const char *ptr)
1266{
1267 return rb_sym_intern_ascii(ptr, strlen(ptr));
1268}
1269
1270VALUE
1271rb_to_symbol_type(VALUE obj)
1272{
1273 return rb_convert_type_with_id(obj, T_SYMBOL, "Symbol", idTo_sym);
1274}
1275
1276int
1277rb_is_const_name(VALUE name)
1278{
1279 return rb_str_symname_type(name, 0) == ID_CONST;
1280}
1281
1282int
1283rb_is_class_name(VALUE name)
1284{
1285 return rb_str_symname_type(name, 0) == ID_CLASS;
1286}
1287
1288int
1289rb_is_instance_name(VALUE name)
1290{
1291 return rb_str_symname_type(name, 0) == ID_INSTANCE;
1292}
1293
1294int
1295rb_is_local_name(VALUE name)
1296{
1297 return rb_str_symname_type(name, 0) == ID_LOCAL;
1298}
1299
1300#include "id_table.c"
1301#include "symbol.rbinc"
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
Definition assert.h:291
static bool rb_enc_isupper(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isupper(), except it additionally takes an encoding.
Definition ctype.h:124
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
Definition ctype.h:63
static bool rb_enc_islower(OnigCodePoint c, rb_encoding *enc)
Identical to rb_islower(), except it additionally takes an encoding.
Definition ctype.h:110
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition coderange.h:180
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define ISUPPER
Old name of rb_isupper.
Definition ctype.h:89
#define ID2SYM
Old name of RB_ID2SYM.
Definition symbol.h:44
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
Definition fl_type.h:135
#define SYM2ID
Old name of RB_SYM2ID.
Definition symbol.h:45
#define ISDIGIT
Old name of rb_isdigit.
Definition ctype.h:93
#define STATIC_SYM_P
Old name of RB_STATIC_SYM_P.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
Definition encoding.h:517
#define ISALPHA
Old name of rb_isalpha.
Definition ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition ctype.h:85
#define Qtrue
Old name of RUBY_Qtrue.
#define DYNAMIC_SYM_P
Old name of RB_DYNAMIC_SYM_P.
Definition value_type.h:86
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
Definition coderange.h:182
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
Definition encoding.h:516
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
Definition fl_type.h:59
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
Definition value_type.h:80
#define rb_ary_new2
Old name of rb_ary_new_capa.
Definition array.h:657
#define ISALNUM
Old name of rb_isalnum.
Definition ctype.h:91
void rb_name_error(ID id, const char *fmt,...)
Raises an instance of rb_eNameError.
Definition error.c:2344
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1430
void rb_name_error_str(VALUE str, const char *fmt,...)
Identical to rb_name_error(), except it takes a VALUE instead of ID.
Definition error.c:2359
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1428
VALUE rb_eEncodingError
EncodingError exception.
Definition error.c:1436
VALUE rb_obj_hide(VALUE obj)
Make the object invisible from Ruby code.
Definition object.c:104
VALUE rb_cSymbol
Symbol class.
Definition string.c:79
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
Definition gc.h:603
Encoding relates APIs.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
Definition string.c:900
int rb_enc_symname_p(const char *str, rb_encoding *enc)
Identical to rb_symname_p(), except it additionally takes an encoding.
Definition symbol.c:211
VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
Identical to rb_check_id_cstr(), except for the return type.
Definition symbol.c:1226
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
Definition symbol.c:414
ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
Identical to rb_check_id(), except it takes a pointer to a memory region instead of Ruby's string.
Definition symbol.c:1215
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
Definition symbol.c:1042
int rb_is_global_id(ID id)
Classifies the given ID, then sees if it is a global variable.
Definition symbol.c:1075
int rb_is_instance_id(ID id)
Classifies the given ID, then sees if it is an instance variable.
Definition symbol.c:1081
int rb_is_const_id(ID id)
Classifies the given ID, then sees if it is a constant.
Definition symbol.c:1063
int rb_is_junk_id(ID)
Classifies the given ID, then sees if it is a junk ID.
Definition symbol.c:1099
int rb_symname_p(const char *str)
Sees if the passed C string constructs a valid syntactic symbol.
Definition symbol.c:205
int rb_is_class_id(ID id)
Classifies the given ID, then sees if it is a class variable.
Definition symbol.c:1069
int rb_is_attrset_id(ID id)
Classifies the given ID, then sees if it is an attribute writer.
Definition symbol.c:1087
int rb_is_local_id(ID id)
Classifies the given ID, then sees if it is a local variable.
Definition symbol.c:1093
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
Definition string.c:4045
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
Definition string.c:11449
VALUE rb_str_dup(VALUE str)
Duplicates a string.
Definition string.c:1916
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
Definition string.c:4031
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
Definition string.c:3444
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
Definition string.c:2850
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
Definition symbol.c:878
VALUE rb_check_symbol(volatile VALUE *namep)
Identical to rb_check_id(), except it returns an instance of rb_cSymbol instead.
Definition symbol.c:1174
VALUE rb_id2sym(ID id)
Allocates an instance of rb_cSymbol that has the given id.
Definition symbol.c:951
ID rb_check_id(volatile VALUE *namep)
Detects if the given name is already interned or not.
Definition symbol.c:1117
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
Definition symbol.c:970
ID rb_sym2id(VALUE obj)
Converts an instance of rb_cSymbol into an ID.
Definition symbol.c:917
int len
Length of the buffer.
Definition io.h:8
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition memory.h:167
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:51
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition rstring.h:76
@ RUBY_SPECIAL_SHIFT
Least significant 8 bits are reserved.
Ruby's String.
Definition rstring.h:196
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376