Ruby 3.4.3p32 (2025-04-14 revision d0b7e5b6a04bde21ca483d20a1546b28b401c2d4)
ruby_parser.c
1/* This is a wrapper for parse.y */
2
3#include "internal/parse.h"
4#include "internal/re.h"
5#include "internal/ruby_parser.h"
6
7#include "node.h"
8#include "rubyparser.h"
9#include "internal/error.h"
10
11#ifdef UNIVERSAL_PARSER
12
13#include "internal.h"
14#include "internal/array.h"
15#include "internal/bignum.h"
16#include "internal/compile.h"
17#include "internal/complex.h"
18#include "internal/encoding.h"
19#include "internal/gc.h"
20#include "internal/hash.h"
21#include "internal/io.h"
22#include "internal/rational.h"
23#include "internal/re.h"
24#include "internal/string.h"
25#include "internal/symbol.h"
26#include "internal/thread.h"
27
28#include "ruby/ractor.h"
29#include "ruby/ruby.h"
30#include "ruby/util.h"
31#include "internal.h"
32#include "vm_core.h"
33#include "symbol.h"
34
35#define parser_encoding const void
36
37RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 6, 0)
38static VALUE
39syntax_error_append(VALUE exc, VALUE file, int line, int column,
40 parser_encoding *enc, const char *fmt, va_list args)
41{
42 return rb_syntax_error_append(exc, file, line, column, enc, fmt, args);
43}
44
45static int
46local_defined(ID id, const void *p)
47{
48 return rb_local_defined(id, (const rb_iseq_t *)p);
49}
50
51static int
52dvar_defined(ID id, const void *p)
53{
54 return rb_dvar_defined(id, (const rb_iseq_t *)p);
55}
56
57static int
58is_usascii_enc(parser_encoding *enc)
59{
60 return rb_is_usascii_enc(enc);
61}
62
63static int
64is_local_id2(ID id)
65{
66 return is_local_id(id);
67}
68
69static int
70is_attrset_id2(ID id)
71{
72 return is_attrset_id(id);
73}
74
75static int
76is_notop_id2(ID id)
77{
78 return is_notop_id(id);
79}
80
81static VALUE
82enc_str_new(const char *ptr, long len, parser_encoding *enc)
83{
84 return rb_enc_str_new(ptr, len, enc);
85}
86
87static int
88enc_isalnum(OnigCodePoint c, parser_encoding *enc)
89{
90 return rb_enc_isalnum(c, enc);
91}
92
93static int
94enc_precise_mbclen(const char *p, const char *e, parser_encoding *enc)
95{
96 return rb_enc_precise_mbclen(p, e, enc);
97}
98
99static int
100mbclen_charfound_p(int len)
101{
102 return MBCLEN_CHARFOUND_P(len);
103}
104
105static int
106mbclen_charfound_len(int len)
107{
109}
110
111static const char *
112enc_name(parser_encoding *enc)
113{
114 return rb_enc_name(enc);
115}
116
117static char *
118enc_prev_char(const char *s, const char *p, const char *e, parser_encoding *enc)
119{
120 return rb_enc_prev_char(s, p, e, enc);
121}
122
123static parser_encoding *
124enc_get(VALUE obj)
125{
126 return rb_enc_get(obj);
127}
128
129static int
130enc_asciicompat(parser_encoding *enc)
131{
132 return rb_enc_asciicompat(enc);
133}
134
135static parser_encoding *
136utf8_encoding(void)
137{
138 return rb_utf8_encoding();
139}
140
141static VALUE
142enc_associate(VALUE obj, parser_encoding *enc)
143{
144 return rb_enc_associate(obj, enc);
145}
146
147static parser_encoding *
148ascii8bit_encoding(void)
149{
150 return rb_ascii8bit_encoding();
151}
152
153static int
154enc_codelen(int c, parser_encoding *enc)
155{
156 return rb_enc_codelen(c, enc);
157}
158
159static int
160enc_mbcput(unsigned int c, void *buf, parser_encoding *enc)
161{
162 return rb_enc_mbcput(c, buf, enc);
163}
164
165static parser_encoding *
166enc_from_index(int idx)
167{
168 return rb_enc_from_index(idx);
169}
170
171static int
172enc_isspace(OnigCodePoint c, parser_encoding *enc)
173{
174 return rb_enc_isspace(c, enc);
175}
176
177static ID
178intern3(const char *name, long len, parser_encoding *enc)
179{
180 return rb_intern3(name, len, enc);
181}
182
183static parser_encoding *
184usascii_encoding(void)
185{
186 return rb_usascii_encoding();
187}
188
189static int
190enc_symname_type(const char *name, long len, parser_encoding *enc, unsigned int allowed_attrset)
191{
192 return rb_enc_symname_type(name, len, enc, allowed_attrset);
193}
194
195typedef struct {
196 struct parser_params *parser;
197 rb_encoding *enc;
198 NODE *succ_block;
199 const rb_code_location_t *loc;
200 rb_parser_assignable_func assignable;
202
203static int
204reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end,
205 int back_num, int *back_refs, OnigRegex regex, void *arg0)
206{
208 struct parser_params* p = arg->parser;
209 rb_encoding *enc = arg->enc;
210 const rb_code_location_t *loc = arg->loc;
211 long len = name_end - name;
212 const char *s = (const char *)name;
213
214 return rb_reg_named_capture_assign_iter_impl(p, s, len, enc, &arg->succ_block, loc, arg->assignable);
215}
216
217static NODE *
218reg_named_capture_assign(struct parser_params* p, VALUE regexp, const rb_code_location_t *loc,
219 rb_parser_assignable_func assignable)
220{
222
223 arg.parser = p;
224 arg.enc = rb_enc_get(regexp);
225 arg.succ_block = 0;
226 arg.loc = loc;
227 arg.assignable = assignable;
228 onig_foreach_name(RREGEXP_PTR(regexp), reg_named_capture_assign_iter, &arg);
229
230 if (!arg.succ_block) return 0;
231 return RNODE_BLOCK(arg.succ_block)->nd_next;
232}
233
234static int
235rtest(VALUE obj)
236{
237 return (int)RB_TEST(obj);
238}
239
240static int
241nil_p(VALUE obj)
242{
243 return (int)NIL_P(obj);
244}
245
246static VALUE
247syntax_error_new(void)
248{
250}
251
252static void *
253memmove2(void *dest, const void *src, size_t t, size_t n)
254{
255 return memmove(dest, src, rbimpl_size_mul_or_raise(t, n));
256}
257
258static void *
259nonempty_memcpy(void *dest, const void *src, size_t t, size_t n)
260{
261 return ruby_nonempty_memcpy(dest, src, rbimpl_size_mul_or_raise(t, n));
262}
263
264static VALUE
265ruby_verbose2(void)
266{
267 return ruby_verbose;
268}
269
270static int *
271rb_errno_ptr2(void)
272{
273 return rb_errno_ptr();
274}
275
276static void *
277zalloc(size_t elemsiz)
278{
279 return ruby_xcalloc(1, elemsiz);
280}
281
282static void
283gc_guard(VALUE obj)
284{
285 RB_GC_GUARD(obj);
286}
287
288static VALUE
289arg_error(void)
290{
291 return rb_eArgError;
292}
293
294static VALUE
295static_id2sym(ID id)
296{
297 return (((VALUE)(id)<<RUBY_SPECIAL_SHIFT)|SYMBOL_FLAG);
298}
299
300static long
301str_coderange_scan_restartable(const char *s, const char *e, parser_encoding *enc, int *cr)
302{
303 return rb_str_coderange_scan_restartable(s, e, enc, cr);
304}
305
306static int
307enc_mbminlen(parser_encoding *enc)
308{
309 return rb_enc_mbminlen(enc);
310}
311
312static bool
313enc_isascii(OnigCodePoint c, parser_encoding *enc)
314{
315 return rb_enc_isascii(c, enc);
316}
317
318static OnigCodePoint
319enc_mbc_to_codepoint(const char *p, const char *e, parser_encoding *enc)
320{
321 const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
322 const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
323
324 return ONIGENC_MBC_TO_CODE((rb_encoding *)enc, up, ue);
325}
326
327extern VALUE rb_eArgError;
328
329static const rb_parser_config_t rb_global_parser_config = {
330 .malloc = ruby_xmalloc,
331 .calloc = ruby_xcalloc,
332 .realloc = ruby_xrealloc,
333 .free = ruby_xfree,
334 .alloc_n = ruby_xmalloc2,
335 .alloc = ruby_xmalloc,
336 .realloc_n = ruby_xrealloc2,
337 .zalloc = zalloc,
338 .rb_memmove = memmove2,
339 .nonempty_memcpy = nonempty_memcpy,
340 .xmalloc_mul_add = rb_xmalloc_mul_add,
341
342 .compile_callback = rb_suppress_tracing,
343 .reg_named_capture_assign = reg_named_capture_assign,
344
345 .attr_get = rb_attr_get,
346
347 .ary_push = rb_ary_push,
348 .ary_new_from_args = rb_ary_new_from_args,
349 .ary_unshift = rb_ary_unshift,
350
351 .make_temporary_id = rb_make_temporary_id,
352 .is_local_id = is_local_id2,
353 .is_attrset_id = is_attrset_id2,
354 .is_global_name_punct = is_global_name_punct,
355 .id_type = id_type,
356 .id_attrset = rb_id_attrset,
357 .intern = rb_intern,
358 .intern2 = rb_intern2,
359 .intern3 = intern3,
360 .intern_str = rb_intern_str,
361 .is_notop_id = is_notop_id2,
362 .enc_symname_type = enc_symname_type,
363 .id2name = rb_id2name,
364 .id2str = rb_id2str,
365 .id2sym = rb_id2sym,
366 .sym2id = rb_sym2id,
367
368 .str_catf = rb_str_catf,
369 .str_cat_cstr = rb_str_cat_cstr,
370 .str_resize = rb_str_resize,
371 .str_new = rb_str_new,
372 .str_new_cstr = rb_str_new_cstr,
373 .str_to_interned_str = rb_str_to_interned_str,
374 .enc_str_new = enc_str_new,
375 .str_vcatf = rb_str_vcatf,
376 .rb_sprintf = rb_sprintf,
377 .rstring_ptr = RSTRING_PTR,
378 .rstring_len = RSTRING_LEN,
379 .obj_as_string = rb_obj_as_string,
380
381 .int2num = rb_int2num_inline,
382
383 .stderr_tty_p = rb_stderr_tty_p,
384 .write_error_str = rb_write_error_str,
385 .io_write = rb_io_write,
386 .io_flush = rb_io_flush,
387 .io_puts = rb_io_puts,
388
389 .debug_output_stdout = rb_ractor_stdout,
390 .debug_output_stderr = rb_ractor_stderr,
391
392 .is_usascii_enc = is_usascii_enc,
393 .enc_isalnum = enc_isalnum,
394 .enc_precise_mbclen = enc_precise_mbclen,
395 .mbclen_charfound_p = mbclen_charfound_p,
396 .mbclen_charfound_len = mbclen_charfound_len,
397 .enc_name = enc_name,
398 .enc_prev_char = enc_prev_char,
399 .enc_get = enc_get,
400 .enc_asciicompat = enc_asciicompat,
401 .utf8_encoding = utf8_encoding,
402 .enc_associate = enc_associate,
403 .ascii8bit_encoding = ascii8bit_encoding,
404 .enc_codelen = enc_codelen,
405 .enc_mbcput = enc_mbcput,
406 .enc_find_index = rb_enc_find_index,
407 .enc_from_index = enc_from_index,
408 .enc_isspace = enc_isspace,
409 .enc_coderange_7bit = ENC_CODERANGE_7BIT,
410 .enc_coderange_unknown = ENC_CODERANGE_UNKNOWN,
411 .usascii_encoding = usascii_encoding,
412 .enc_mbminlen = enc_mbminlen,
413 .enc_isascii = enc_isascii,
414 .enc_mbc_to_codepoint = enc_mbc_to_codepoint,
415
416 .local_defined = local_defined,
417 .dvar_defined = dvar_defined,
418
419 .syntax_error_append = syntax_error_append,
420 .raise = rb_raise,
421 .syntax_error_new = syntax_error_new,
422
423 .errinfo = rb_errinfo,
424 .set_errinfo = rb_set_errinfo,
425 .exc_raise = rb_exc_raise,
426 .make_exception = rb_make_exception,
427
428 .sized_xfree = ruby_sized_xfree,
429 .sized_realloc_n = ruby_sized_realloc_n,
430 .gc_guard = gc_guard,
431 .gc_mark = rb_gc_mark,
432
433 .reg_compile = rb_reg_compile,
434 .reg_check_preprocess = rb_reg_check_preprocess,
435 .memcicmp = rb_memcicmp,
436
437 .compile_warn = rb_compile_warn,
438 .compile_warning = rb_compile_warning,
439 .bug = rb_bug,
440 .fatal = rb_fatal,
441 .verbose = ruby_verbose2,
442 .errno_ptr = rb_errno_ptr2,
443
444 .make_backtrace = rb_make_backtrace,
445
446 .scan_hex = ruby_scan_hex,
447 .scan_oct = ruby_scan_oct,
448 .scan_digits = ruby_scan_digits,
449 .strtod = ruby_strtod,
450
451 .rtest = rtest,
452 .nil_p = nil_p,
453 .qnil = Qnil,
454 .qfalse = Qfalse,
455 .eArgError = arg_error,
456 .long2int = rb_long2int,
457
458 /* For Ripper */
459 .static_id2sym = static_id2sym,
460 .str_coderange_scan_restartable = str_coderange_scan_restartable,
461};
462#endif
463
464enum lex_type {
465 lex_type_str,
466 lex_type_io,
467 lex_type_array,
468 lex_type_generic,
469};
470
472 rb_parser_t *parser_params;
473 enum lex_type type;
474 union {
475 struct lex_pointer_string lex_str;
476 struct {
477 VALUE file;
478 } lex_io;
479 struct {
480 VALUE ary;
481 } lex_array;
482 } data;
483};
484
485static void
486parser_mark(void *ptr)
487{
488 struct ruby_parser *parser = (struct ruby_parser*)ptr;
489 rb_ruby_parser_mark(parser->parser_params);
490
491 switch (parser->type) {
492 case lex_type_str:
493 rb_gc_mark(parser->data.lex_str.str);
494 break;
495 case lex_type_io:
496 rb_gc_mark(parser->data.lex_io.file);
497 break;
498 case lex_type_array:
499 rb_gc_mark(parser->data.lex_array.ary);
500 break;
501 case lex_type_generic:
502 /* noop. Caller of rb_parser_compile_generic should mark the objects. */
503 break;
504 }
505}
506
507static void
508parser_free(void *ptr)
509{
510 struct ruby_parser *parser = (struct ruby_parser*)ptr;
511 rb_ruby_parser_free(parser->parser_params);
512 xfree(parser);
513}
514
515static size_t
516parser_memsize(const void *ptr)
517{
518 struct ruby_parser *parser = (struct ruby_parser*)ptr;
519 return rb_ruby_parser_memsize(parser->parser_params);
520}
521
522static const rb_data_type_t ruby_parser_data_type = {
523 "parser",
524 {
525 parser_mark,
526 parser_free,
527 parser_memsize,
528 },
529 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
530};
531
532#ifdef UNIVERSAL_PARSER
533const rb_parser_config_t *
534rb_ruby_parser_config(void)
535{
536 return &rb_global_parser_config;
537}
538
539rb_parser_t *
540rb_parser_params_new(void)
541{
542 return rb_ruby_parser_new(&rb_global_parser_config);
543}
544#else
545rb_parser_t *
546rb_parser_params_new(void)
547{
548 return rb_ruby_parser_new();
549}
550#endif /* UNIVERSAL_PARSER */
551
552VALUE
553rb_parser_new(void)
554{
555 struct ruby_parser *parser;
556 rb_parser_t *parser_params;
557
558 /*
559 * Create parser_params ahead of vparser because
560 * rb_ruby_parser_new can run GC so if create vparser
561 * first, parser_mark tries to mark not initialized parser_params.
562 */
563 parser_params = rb_parser_params_new();
564 VALUE vparser = TypedData_Make_Struct(0, struct ruby_parser,
565 &ruby_parser_data_type, parser);
566 parser->parser_params = parser_params;
567
568 return vparser;
569}
570
571void
572rb_parser_set_options(VALUE vparser, int print, int loop, int chomp, int split)
573{
574 struct ruby_parser *parser;
575
576 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
577 rb_ruby_parser_set_options(parser->parser_params, print, loop, chomp, split);
578}
579
580VALUE
581rb_parser_set_context(VALUE vparser, const struct rb_iseq_struct *base, int main)
582{
583 struct ruby_parser *parser;
584
585 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
586 rb_ruby_parser_set_context(parser->parser_params, base, main);
587 return vparser;
588}
589
590void
591rb_parser_set_script_lines(VALUE vparser)
592{
593 struct ruby_parser *parser;
594
595 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
596 rb_ruby_parser_set_script_lines(parser->parser_params);
597}
598
599void
600rb_parser_error_tolerant(VALUE vparser)
601{
602 struct ruby_parser *parser;
603
604 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
605 rb_ruby_parser_error_tolerant(parser->parser_params);
606}
607
608void
609rb_parser_keep_tokens(VALUE vparser)
610{
611 struct ruby_parser *parser;
612
613 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
614 rb_ruby_parser_keep_tokens(parser->parser_params);
615}
616
617rb_parser_string_t *
618rb_parser_lex_get_str(struct parser_params *p, struct lex_pointer_string *ptr_str)
619{
620 char *beg, *end, *start;
621 long len;
622 VALUE s = ptr_str->str;
623
624 beg = RSTRING_PTR(s);
625 len = RSTRING_LEN(s);
626 start = beg;
627 if (ptr_str->ptr) {
628 if (len == ptr_str->ptr) return 0;
629 beg += ptr_str->ptr;
630 len -= ptr_str->ptr;
631 }
632 end = memchr(beg, '\n', len);
633 if (end) len = ++end - beg;
634 ptr_str->ptr += len;
635 return rb_str_to_parser_string(p, rb_str_subseq(s, beg - start, len));
636}
637
638static rb_parser_string_t *
639lex_get_str(struct parser_params *p, rb_parser_input_data input, int line_count)
640{
641 return rb_parser_lex_get_str(p, (struct lex_pointer_string *)input);
642}
643
644static void parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines);
645
646static rb_ast_t*
647parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line)
648{
649 rb_ast_t *ast = rb_parser_compile(p, gets, fname, input, line);
650 parser_aset_script_lines_for(fname, ast->body.script_lines);
651 return ast;
652}
653
654static rb_ast_t*
655parser_compile_string0(struct ruby_parser *parser, VALUE fname, VALUE s, int line)
656{
657 VALUE str = rb_str_new_frozen(s);
658
659 parser->type = lex_type_str;
660 parser->data.lex_str.str = str;
661 parser->data.lex_str.ptr = 0;
662
663 return parser_compile(parser->parser_params, lex_get_str, fname, (rb_parser_input_data)&parser->data, line);
664}
665
666static rb_encoding *
667must_be_ascii_compatible(VALUE s)
668{
669 rb_encoding *enc = rb_enc_get(s);
670 if (!rb_enc_asciicompat(enc)) {
671 rb_raise(rb_eArgError, "invalid source encoding");
672 }
673 return enc;
674}
675
676static rb_ast_t*
677parser_compile_string_path(struct ruby_parser *parser, VALUE f, VALUE s, int line)
678{
679 must_be_ascii_compatible(s);
680 return parser_compile_string0(parser, f, s, line);
681}
682
683static rb_ast_t*
684parser_compile_string(struct ruby_parser *parser, const char *f, VALUE s, int line)
685{
686 return parser_compile_string_path(parser, rb_filesystem_str_new_cstr(f), s, line);
687}
688
689VALUE rb_io_gets_internal(VALUE io);
690
691static rb_parser_string_t *
692lex_io_gets(struct parser_params *p, rb_parser_input_data input, int line_count)
693{
694 VALUE io = (VALUE)input;
695 VALUE line = rb_io_gets_internal(io);
696 if (NIL_P(line)) return 0;
697 return rb_str_to_parser_string(p, line);
698}
699
700static rb_parser_string_t *
701lex_gets_array(struct parser_params *p, rb_parser_input_data data, int index)
702{
703 VALUE array = (VALUE)data;
704 VALUE str = rb_ary_entry(array, index);
705 if (!NIL_P(str)) {
706 StringValue(str);
707 if (!rb_enc_asciicompat(rb_enc_get(str))) {
708 rb_raise(rb_eArgError, "invalid source encoding");
709 }
710 return rb_str_to_parser_string(p, str);
711 }
712 else {
713 return 0;
714 }
715}
716
717static rb_ast_t*
718parser_compile_file_path(struct ruby_parser *parser, VALUE fname, VALUE file, int start)
719{
720 parser->type = lex_type_io;
721 parser->data.lex_io.file = file;
722
723 return parser_compile(parser->parser_params, lex_io_gets, fname, (rb_parser_input_data)file, start);
724}
725
726static rb_ast_t*
727parser_compile_array(struct ruby_parser *parser, VALUE fname, VALUE array, int start)
728{
729 parser->type = lex_type_array;
730 parser->data.lex_array.ary = array;
731
732 return parser_compile(parser->parser_params, lex_gets_array, fname, (rb_parser_input_data)array, start);
733}
734
735static rb_ast_t*
736parser_compile_generic(struct ruby_parser *parser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
737{
738 parser->type = lex_type_generic;
739
740 return parser_compile(parser->parser_params, lex_gets, fname, (rb_parser_input_data)input, start);
741}
742
743static void
744ast_free(void *ptr)
745{
746 rb_ast_t *ast = (rb_ast_t *)ptr;
747 rb_ast_free(ast);
748}
749
750static const rb_data_type_t ast_data_type = {
751 "AST",
752 {
753 NULL,
754 ast_free,
755 NULL, // No dsize() because this object does not appear in ObjectSpace.
756 },
757 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
758};
759
760static VALUE
761ast_alloc(void)
762{
763 return TypedData_Wrap_Struct(0, &ast_data_type, NULL);
764}
765
766VALUE
767rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE file, int start)
768{
769 struct ruby_parser *parser;
770 VALUE ast_value = ast_alloc();
771
772 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
773 DATA_PTR(ast_value) = parser_compile_file_path(parser, fname, file, start);
774 RB_GC_GUARD(vparser);
775
776 return ast_value;
777}
778
779VALUE
780rb_parser_compile_array(VALUE vparser, VALUE fname, VALUE array, int start)
781{
782 struct ruby_parser *parser;
783 VALUE ast_value = ast_alloc();
784
785 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
786 DATA_PTR(ast_value) = parser_compile_array(parser, fname, array, start);
787 RB_GC_GUARD(vparser);
788
789 return ast_value;
790}
791
792VALUE
793rb_parser_compile_generic(VALUE vparser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
794{
795 struct ruby_parser *parser;
796 VALUE ast_value = ast_alloc();
797
798 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
799 DATA_PTR(ast_value) = parser_compile_generic(parser, lex_gets, fname, input, start);
800 RB_GC_GUARD(vparser);
801
802 return ast_value;
803}
804
805VALUE
806rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line)
807{
808 struct ruby_parser *parser;
809 VALUE ast_value = ast_alloc();
810
811 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
812 DATA_PTR(ast_value) = parser_compile_string(parser, f, s, line);
813 RB_GC_GUARD(vparser);
814
815 return ast_value;
816}
817
818VALUE
819rb_parser_compile_string_path(VALUE vparser, VALUE f, VALUE s, int line)
820{
821 struct ruby_parser *parser;
822 VALUE ast_value = ast_alloc();
823
824 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
825 DATA_PTR(ast_value) = parser_compile_string_path(parser, f, s, line);
826 RB_GC_GUARD(vparser);
827
828 return ast_value;
829}
830
831VALUE
832rb_parser_encoding(VALUE vparser)
833{
834 struct ruby_parser *parser;
835
836 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
837 return rb_enc_from_encoding(rb_ruby_parser_encoding(parser->parser_params));
838}
839
840VALUE
841rb_parser_end_seen_p(VALUE vparser)
842{
843 struct ruby_parser *parser;
844
845 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
846 return RBOOL(rb_ruby_parser_end_seen_p(parser->parser_params));
847}
848
849VALUE
850rb_parser_set_yydebug(VALUE vparser, VALUE flag)
851{
852 struct ruby_parser *parser;
853
854 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
855 rb_ruby_parser_set_yydebug(parser->parser_params, RTEST(flag));
856 return flag;
857}
858
859void
860rb_set_script_lines_for(VALUE vparser, VALUE path)
861{
862 struct ruby_parser *parser;
863 VALUE hash;
864 ID script_lines;
865 CONST_ID(script_lines, "SCRIPT_LINES__");
866 if (!rb_const_defined_at(rb_cObject, script_lines)) return;
867 hash = rb_const_get_at(rb_cObject, script_lines);
868 if (RB_TYPE_P(hash, T_HASH)) {
869 rb_hash_aset(hash, path, Qtrue);
870 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
871 rb_ruby_parser_set_script_lines(parser->parser_params);
872 }
873}
874
875VALUE
876rb_parser_build_script_lines_from(rb_parser_ary_t *lines)
877{
878 int i;
879 if (!lines) return Qnil;
880 if (lines->data_type != PARSER_ARY_DATA_SCRIPT_LINE) {
881 rb_bug("unexpected rb_parser_ary_data_type (%d) for script lines", lines->data_type);
882 }
883 VALUE script_lines = rb_ary_new_capa(lines->len);
884 for (i = 0; i < lines->len; i++) {
885 rb_parser_string_t *str = (rb_parser_string_t *)lines->data[i];
886 rb_ary_push(script_lines, rb_enc_str_new(str->ptr, str->len, str->enc));
887 }
888 return script_lines;
889}
890
891VALUE
892rb_str_new_parser_string(rb_parser_string_t *str)
893{
894 VALUE string = rb_enc_literal_str(str->ptr, str->len, str->enc);
895 rb_enc_str_coderange(string);
896 return string;
897}
898
899VALUE
900rb_str_new_mutable_parser_string(rb_parser_string_t *str)
901{
902 return rb_enc_str_new(str->ptr, str->len, str->enc);
903}
904
905static VALUE
906negative_numeric(VALUE val)
907{
908 if (FIXNUM_P(val)) {
909 return LONG2FIX(-FIX2LONG(val));
910 }
911 if (SPECIAL_CONST_P(val)) {
912#if USE_FLONUM
913 if (FLONUM_P(val)) {
914 return DBL2NUM(-RFLOAT_VALUE(val));
915 }
916#endif
917 goto unknown;
918 }
919 switch (BUILTIN_TYPE(val)) {
920 case T_BIGNUM:
921 BIGNUM_NEGATE(val);
922 val = rb_big_norm(val);
923 break;
924 case T_RATIONAL:
925 RATIONAL_SET_NUM(val, negative_numeric(RRATIONAL(val)->num));
926 break;
927 case T_COMPLEX:
928 RCOMPLEX_SET_REAL(val, negative_numeric(RCOMPLEX(val)->real));
929 RCOMPLEX_SET_IMAG(val, negative_numeric(RCOMPLEX(val)->imag));
930 break;
931 case T_FLOAT:
932 val = DBL2NUM(-RFLOAT_VALUE(val));
933 break;
934 unknown:
935 default:
936 rb_bug("unknown literal type (%s) passed to negative_numeric",
937 rb_builtin_class_name(val));
938 break;
939 }
940 return val;
941}
942
943static VALUE
944integer_value(const char *val, int base)
945{
946 return rb_cstr_to_inum(val, base, FALSE);
947}
948
949static VALUE
950rational_value(const char *node_val, int base, int seen_point)
951{
952 VALUE lit;
953 char* val = strdup(node_val);
954 if (seen_point > 0) {
955 int len = (int)(strlen(val));
956 char *point = &val[seen_point];
957 size_t fraclen = len-seen_point-1;
958 memmove(point, point+1, fraclen+1);
959
960 lit = rb_rational_new(integer_value(val, base), rb_int_positive_pow(10, fraclen));
961 }
962 else {
963 lit = rb_rational_raw1(integer_value(val, base));
964 }
965
966 free(val);
967
968 return lit;
969}
970
971VALUE
972rb_node_integer_literal_val(const NODE *n)
973{
974 const rb_node_integer_t *node = RNODE_INTEGER(n);
975 VALUE val = integer_value(node->val, node->base);
976 if (node->minus) {
977 val = negative_numeric(val);
978 }
979 return val;
980}
981
982VALUE
983rb_node_float_literal_val(const NODE *n)
984{
985 const rb_node_float_t *node = RNODE_FLOAT(n);
986 double d = strtod(node->val, 0);
987 if (node->minus) {
988 d = -d;
989 }
990 VALUE val = DBL2NUM(d);
991 return val;
992}
993
994VALUE
995rb_node_rational_literal_val(const NODE *n)
996{
997 VALUE lit;
998 const rb_node_rational_t *node = RNODE_RATIONAL(n);
999
1000 lit = rational_value(node->val, node->base, node->seen_point);
1001
1002 if (node->minus) {
1003 lit = negative_numeric(lit);
1004 }
1005
1006 return lit;
1007}
1008
1009VALUE
1010rb_node_imaginary_literal_val(const NODE *n)
1011{
1012 VALUE lit;
1013 const rb_node_imaginary_t *node = RNODE_IMAGINARY(n);
1014
1015 enum rb_numeric_type type = node->type;
1016
1017 switch (type) {
1018 case integer_literal:
1019 lit = integer_value(node->val, node->base);
1020 break;
1021 case float_literal:{
1022 double d = strtod(node->val, 0);
1023 lit = DBL2NUM(d);
1024 break;
1025 }
1026 case rational_literal:
1027 lit = rational_value(node->val, node->base, node->seen_point);
1028 break;
1029 default:
1030 rb_bug("unreachable");
1031 }
1032
1033 lit = rb_complex_raw(INT2FIX(0), lit);
1034
1035 if (node->minus) {
1036 lit = negative_numeric(lit);
1037 }
1038 return lit;
1039}
1040
1041VALUE
1042rb_node_str_string_val(const NODE *node)
1043{
1044 rb_parser_string_t *str = RNODE_STR(node)->string;
1045 return rb_str_new_parser_string(str);
1046}
1047
1048VALUE
1049rb_node_sym_string_val(const NODE *node)
1050{
1051 rb_parser_string_t *str = RNODE_SYM(node)->string;
1052 return ID2SYM(rb_intern3(str->ptr, str->len, str->enc));
1053}
1054
1055VALUE
1056rb_node_dstr_string_val(const NODE *node)
1057{
1058 rb_parser_string_t *str = RNODE_DSTR(node)->string;
1059 return str ? rb_str_new_parser_string(str) : Qnil;
1060}
1061
1062VALUE
1063rb_node_dregx_string_val(const NODE *node)
1064{
1065 rb_parser_string_t *str = RNODE_DREGX(node)->string;
1066 return rb_str_new_parser_string(str);
1067}
1068
1069VALUE
1070rb_node_regx_string_val(const NODE *node)
1071{
1072 rb_node_regx_t *node_reg = RNODE_REGX(node);
1073 rb_parser_string_t *string = node_reg->string;
1074 VALUE str = rb_enc_str_new(string->ptr, string->len, string->enc);
1075
1076 return rb_reg_compile(str, node_reg->options, NULL, 0);
1077}
1078
1079VALUE
1080rb_node_line_lineno_val(const NODE *node)
1081{
1082 return INT2FIX(node->nd_loc.beg_pos.lineno);
1083}
1084
1085VALUE
1086rb_node_file_path_val(const NODE *node)
1087{
1088 return rb_str_new_parser_string(RNODE_FILE(node)->path);
1089}
1090
1091VALUE
1092rb_node_encoding_val(const NODE *node)
1093{
1094 return rb_enc_from_encoding(RNODE_ENCODING(node)->enc);
1095}
1096
1097static void
1098parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines)
1099{
1100 VALUE hash, script_lines;
1101 ID script_lines_id;
1102 if (NIL_P(path) || !lines) return;
1103 CONST_ID(script_lines_id, "SCRIPT_LINES__");
1104 if (!rb_const_defined_at(rb_cObject, script_lines_id)) return;
1105 hash = rb_const_get_at(rb_cObject, script_lines_id);
1106 if (!RB_TYPE_P(hash, T_HASH)) return;
1107 if (rb_hash_lookup(hash, path) == Qnil) return;
1108 script_lines = rb_parser_build_script_lines_from(lines);
1109 rb_hash_aset(hash, path, script_lines);
1110}
1111
1112VALUE
1113rb_ruby_ast_new(const NODE *const root)
1114{
1115 rb_ast_t *ast;
1116 VALUE ast_value = TypedData_Make_Struct(0, rb_ast_t, &ast_data_type, ast);
1117#ifdef UNIVERSAL_PARSER
1118 ast->config = &rb_global_parser_config;
1119#endif
1120 ast->body = (rb_ast_body_t){
1121 .root = root,
1122 .frozen_string_literal = -1,
1123 .coverage_enabled = -1,
1124 .script_lines = NULL,
1125 .line_count = 0,
1126 };
1127 return ast_value;
1128}
1129
1130rb_ast_t *
1131rb_ruby_ast_data_get(VALUE ast_value)
1132{
1133 rb_ast_t *ast;
1134 if (NIL_P(ast_value)) return NULL;
1135 TypedData_Get_Struct(ast_value, rb_ast_t, &ast_data_type, ast);
1136 return ast;
1137}
#define RBIMPL_ATTR_FORMAT(x, y, z)
Wraps (or simulates) __attribute__((format))
Definition format.h:29
#define T_COMPLEX
Old name of RUBY_T_COMPLEX.
Definition value_type.h:59
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition coderange.h:180
#define RFLOAT_VALUE
Old name of rb_float_value.
Definition double.h:28
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define INT2FIX
Old name of RB_INT2FIX.
Definition long.h:48
#define T_FLOAT
Old name of RUBY_T_FLOAT.
Definition value_type.h:64
#define ID2SYM
Old name of RB_ID2SYM.
Definition symbol.h:44
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
Definition value_type.h:57
#define SPECIAL_CONST_P
Old name of RB_SPECIAL_CONST_P.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
Definition coderange.h:179
#define LONG2FIX
Old name of RB_INT2FIX.
Definition long.h:49
#define SYMBOL_FLAG
Old name of RUBY_SYMBOL_FLAG.
#define T_RATIONAL
Old name of RUBY_T_RATIONAL.
Definition value_type.h:76
#define T_HASH
Old name of RUBY_T_HASH.
Definition value_type.h:65
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
Definition encoding.h:517
#define FLONUM_P
Old name of RB_FLONUM_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
Definition long.h:46
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
Definition encoding.h:516
#define DBL2NUM
Old name of rb_float_new.
Definition double.h:29
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
Definition value_type.h:85
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
Definition symbol.h:47
#define ruby_verbose
This variable controls whether the interpreter is in debug mode.
Definition error.h:475
VALUE rb_eSyntaxError
SyntaxError exception.
Definition error.c:1447
VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass)
Allocates, then initialises an instance of the given class.
Definition object.c:2138
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
Definition string.c:904
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
Definition string.c:788
VALUE rb_int_positive_pow(long x, unsigned long y)
Raises the passed x to the power of y.
Definition numeric.c:4559
VALUE rb_rational_new(VALUE num, VALUE den)
Constructs a Rational, with reduction.
Definition rational.c:1974
#define rb_rational_raw1(x)
Shorthand of (x/1)r.
Definition rational.h:51
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
Definition string.c:3055
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition string.h:1498
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
Definition string.c:1465
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
Definition string.c:1376
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1656
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1514
VALUE rb_const_get_at(VALUE space, ID name)
Identical to rb_const_defined_at(), except it returns the actual defined value.
Definition variable.c:3249
int rb_const_defined_at(VALUE space, ID name)
Identical to rb_const_defined(), except it doesn't look for parent classes.
Definition variable.c:3571
VALUE rb_id2sym(ID id)
Allocates an instance of rb_cSymbol that has the given id.
Definition symbol.c:951
ID rb_sym2id(VALUE obj)
Converts an instance of rb_cSymbol into an ID.
Definition symbol.c:917
int len
Length of the buffer.
Definition io.h:8
#define strtod(s, e)
Just another name of ruby_strtod.
Definition util.h:223
double ruby_strtod(const char *str, char **endptr)
Our own locale-insensitive version of strtod(3).
#define strdup(s)
Just another name of ruby_strdup.
Definition util.h:187
static VALUE rb_int2num_inline(int v)
Converts a C's int into an instance of rb_cInteger.
Definition int.h:239
#define rb_long2int
Just another name of rb_long2int_inline.
Definition long.h:62
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition memory.h:167
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define DATA_PTR(obj)
Convenient getter macro.
Definition rdata.h:67
#define RREGEXP_PTR(obj)
Convenient accessor macro.
Definition rregexp.h:45
#define StringValue(v)
Ensures that the parameter object is a String.
Definition rstring.h:66
#define TypedData_Get_Struct(obj, type, data_type, sval)
Obtains a C struct from inside of a wrapper Ruby object.
Definition rtypeddata.h:515
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
Definition rtypeddata.h:449
struct rb_data_type_struct rb_data_type_t
This is the struct that holds necessary info for a struct.
Definition rtypeddata.h:197
#define TypedData_Make_Struct(klass, type, data_type, sval)
Identical to TypedData_Wrap_Struct, except it allocates a new data region internally instead of takin...
Definition rtypeddata.h:497
static bool RB_TEST(VALUE obj)
Emulates Ruby's "if" statement.
@ RUBY_SPECIAL_SHIFT
Least significant 8 bits are reserved.
#define RTEST
This is an old name of RB_TEST.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
Definition value_type.h:376