Ruby
3.4.3p32 (2025-04-14 revision d0b7e5b6a04bde21ca483d20a1546b28b401c2d4)
marshal.c
1
/**********************************************************************
2
3
marshal.c -
4
5
$Author$
6
created at: Thu Apr 27 16:30:01 JST 1995
7
8
Copyright (C) 1993-2007 Yukihiro Matsumoto
9
10
**********************************************************************/
11
12
#include "ruby/internal/config.h"
13
14
#include <math.h>
15
#ifdef HAVE_FLOAT_H
16
#include <float.h>
17
#endif
18
#ifdef HAVE_IEEEFP_H
19
#include <ieeefp.h>
20
#endif
21
22
#include "encindex.h"
23
#include "id_table.h"
24
#include "internal.h"
25
#include "internal/array.h"
26
#include "internal/bignum.h"
27
#include "internal/class.h"
28
#include "internal/encoding.h"
29
#include "internal/error.h"
30
#include "internal/hash.h"
31
#include "internal/numeric.h"
32
#include "internal/object.h"
33
#include "internal/struct.h"
34
#include "internal/symbol.h"
35
#include "internal/util.h"
36
#include "internal/vm.h"
37
#include "
ruby/io.h
"
38
#include "
ruby/ruby.h
"
39
#include "ruby/st.h"
40
#include "
ruby/util.h
"
41
#include "builtin.h"
42
#include "shape.h"
43
44
#define BITSPERSHORT (2*CHAR_BIT)
45
#define SHORTMASK ((1<<BITSPERSHORT)-1)
46
#define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
47
48
#if SIZEOF_SHORT == SIZEOF_BDIGIT
49
#define SHORTLEN(x) (x)
50
#else
51
static
size_t
52
shortlen(
size_t
len
, BDIGIT *ds)
53
{
54
BDIGIT num;
55
int
offset = 0;
56
57
num = ds[
len
-1];
58
while
(num) {
59
num = SHORTDN(num);
60
offset++;
61
}
62
return
(
len
- 1)*SIZEOF_BDIGIT/2 + offset;
63
}
64
#define SHORTLEN(x) shortlen((x),d)
65
#endif
66
67
#define MARSHAL_MAJOR 4
68
#define MARSHAL_MINOR 8
69
70
#define TYPE_NIL '0'
71
#define TYPE_TRUE 'T'
72
#define TYPE_FALSE 'F'
73
#define TYPE_FIXNUM 'i'
74
75
#define TYPE_EXTENDED 'e'
76
#define TYPE_UCLASS 'C'
77
#define TYPE_OBJECT 'o'
78
#define TYPE_DATA 'd'
79
#define TYPE_USERDEF 'u'
80
#define TYPE_USRMARSHAL 'U'
81
#define TYPE_FLOAT 'f'
82
#define TYPE_BIGNUM 'l'
83
#define TYPE_STRING '"'
84
#define TYPE_REGEXP '/'
85
#define TYPE_ARRAY '['
86
#define TYPE_HASH '{'
87
#define TYPE_HASH_DEF '}'
88
#define TYPE_STRUCT 'S'
89
#define TYPE_MODULE_OLD 'M'
90
#define TYPE_CLASS 'c'
91
#define TYPE_MODULE 'm'
92
93
#define TYPE_SYMBOL ':'
94
#define TYPE_SYMLINK ';'
95
96
#define TYPE_IVAR 'I'
97
#define TYPE_LINK '@'
98
99
static ID s_dump, s_load, s_mdump, s_mload;
100
static ID s_dump_data, s_load_data, s_alloc, s_call;
101
static ID s_getbyte, s_read, s_write, s_binmode;
102
static ID s_encoding_short, s_ruby2_keywords_flag;
103
104
#define name_s_dump "_dump"
105
#define name_s_load "_load"
106
#define name_s_mdump "marshal_dump"
107
#define name_s_mload "marshal_load"
108
#define name_s_dump_data "_dump_data"
109
#define name_s_load_data "_load_data"
110
#define name_s_alloc "_alloc"
111
#define name_s_call "call"
112
#define name_s_getbyte "getbyte"
113
#define name_s_read "read"
114
#define name_s_write "write"
115
#define name_s_binmode "binmode"
116
#define name_s_encoding_short "E"
117
#define name_s_ruby2_keywords_flag "K"
118
119
typedef struct {
120
VALUE newclass;
121
VALUE oldclass;
122
VALUE (*dumper)(VALUE);
123
VALUE (*loader)(VALUE, VALUE);
124
} marshal_compat_t;
125
126
static st_table *compat_allocator_tbl;
127
static VALUE compat_allocator_tbl_wrapper;
128
static VALUE rb_marshal_dump_limited(VALUE obj, VALUE port, int limit);
129
static VALUE rb_marshal_load_with_proc(VALUE port, VALUE proc, bool freeze);
130
131
static st_table *compat_allocator_table(void);
132
133
void
134
rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
135
{
136
marshal_compat_t *compat;
137
rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
138
139
if (!allocator) {
140
rb_raise(rb_eTypeError, "no allocator");
141
}
142
143
compat_allocator_table();
144
compat = ALLOC(marshal_compat_t);
145
RB_OBJ_WRITE(compat_allocator_tbl_wrapper, &compat->newclass, newclass);
146
RB_OBJ_WRITE(compat_allocator_tbl_wrapper, &compat->oldclass, oldclass);
147
compat->dumper = dumper;
148
compat->loader = loader;
149
150
st_insert(compat_allocator_table(), (st_data_t)allocator, (st_data_t)compat);
151
}
152
153
struct dump_arg {
154
VALUE str, dest;
155
st_table *symbols;
156
st_table *data;
157
st_table *compat_tbl;
158
st_table *encodings;
159
st_table *userdefs;
160
st_index_t num_entries;
161
};
162
163
struct dump_call_arg {
164
VALUE obj;
165
struct dump_arg *arg;
166
int limit;
167
};
168
169
static VALUE
170
check_dump_arg(VALUE ret, struct dump_arg *arg, const char *name)
171
{
172
if (!arg->symbols) {
173
rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
174
name);
175
}
176
return ret;
177
}
178
179
static VALUE
180
check_userdump_arg(VALUE obj, ID sym, int argc, const VALUE *argv,
181
struct dump_arg *arg, const char *name)
182
{
183
VALUE ret = rb_funcallv(obj, sym, argc, argv);
184
VALUE klass = CLASS_OF(obj);
185
if (CLASS_OF(ret) == klass) {
186
rb_raise(rb_eRuntimeError, "%"PRIsVALUE"#%s returned same class instance",
187
klass, name);
188
}
189
return check_dump_arg(ret, arg, name);
190
}
191
192
#define dump_funcall(arg, obj, sym, argc, argv) \
193
check_userdump_arg(obj, sym, argc, argv, arg, name_##sym)
194
#define dump_check_funcall(arg, obj, sym, argc, argv) \
195
check_dump_arg(rb_check_funcall(obj, sym, argc, argv), arg, name_##sym)
196
197
static void clear_dump_arg(struct dump_arg *arg);
198
199
static void
200
mark_dump_arg(void *ptr)
201
{
202
struct dump_arg *p = ptr;
203
if (!p->symbols)
204
return;
205
rb_mark_set(p->symbols);
206
rb_mark_set(p->data);
207
rb_mark_hash(p->compat_tbl);
208
rb_mark_set(p->userdefs);
209
rb_gc_mark(p->str);
210
}
211
212
static void
213
free_dump_arg(void *ptr)
214
{
215
clear_dump_arg(ptr);
216
}
217
218
static size_t
219
memsize_dump_arg(const void *ptr)
220
{
221
const struct dump_arg *p = (struct dump_arg *)ptr;
222
size_t memsize = 0;
223
if (p->symbols) memsize += rb_st_memsize(p->symbols);
224
if (p->data) memsize += rb_st_memsize(p->data);
225
if (p->compat_tbl) memsize += rb_st_memsize(p->compat_tbl);
226
if (p->userdefs) memsize += rb_st_memsize(p->userdefs);
227
if (p->encodings) memsize += rb_st_memsize(p->encodings);
228
return memsize;
229
}
230
231
static const rb_data_type_t dump_arg_data = {
232
"dump_arg",
233
{mark_dump_arg, free_dump_arg, memsize_dump_arg,},
234
0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE
235
};
236
237
static VALUE
238
must_not_be_anonymous(const char *type, VALUE path)
239
{
240
char *n = RSTRING_PTR(path);
241
242
if (!rb_enc_asciicompat(rb_enc_get(path))) {
243
/* cannot occur? */
244
rb_raise(rb_eTypeError, "can't dump non-ascii %s name % "PRIsVALUE,
245
type, path);
246
}
247
if (n[0] == '#') {
248
rb_raise(rb_eTypeError, "can't dump anonymous %s % "PRIsVALUE,
249
type, path);
250
}
251
return path;
252
}
253
254
static VALUE
255
class2path(VALUE klass)
256
{
257
VALUE path = rb_class_path(klass);
258
259
must_not_be_anonymous((RB_TYPE_P(klass, T_CLASS) ? "class" : "module"), path);
260
if (rb_path_to_class(path) != rb_class_real(klass)) {
261
rb_raise(rb_eTypeError, "% "PRIsVALUE" can't be referred to", path);
262
}
263
return path;
264
}
265
266
int ruby_marshal_write_long(long x, char *buf);
267
static void w_long(long, struct dump_arg*);
268
static int w_encoding(VALUE encname, struct dump_call_arg *arg);
269
static VALUE encoding_name(VALUE obj, struct dump_arg *arg);
270
271
static void
272
w_nbyte(const char *s, long n, struct dump_arg *arg)
273
{
274
VALUE buf = arg->str;
275
rb_str_buf_cat(buf, s, n);
276
if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
277
rb_io_write(arg->dest, buf);
278
rb_str_resize(buf, 0);
279
}
280
}
281
282
static void
283
w_byte(char c, struct dump_arg *arg)
284
{
285
w_nbyte(&c, 1, arg);
286
}
287
288
static void
289
w_bytes(const char *s, long n, struct dump_arg *arg)
290
{
291
w_long(n, arg);
292
w_nbyte(s, n, arg);
293
}
294
295
#define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
296
297
static void
298
w_short(int x, struct dump_arg *arg)
299
{
300
w_byte((char)((x >> 0) & 0xff), arg);
301
w_byte((char)((x >> 8) & 0xff), arg);
302
}
303
304
static void
305
w_long(long x, struct dump_arg *arg)
306
{
307
char buf[sizeof(long)+1];
308
int i = ruby_marshal_write_long(x, buf);
309
if (i < 0) {
310
rb_raise(rb_eTypeError, "long too big to dump");
311
}
312
w_nbyte(buf, i, arg);
313
}
314
315
int
316
ruby_marshal_write_long(long x, char *buf)
317
{
318
int i;
319
320
#if SIZEOF_LONG > 4
321
if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
322
/* big long does not fit in 4 bytes */
323
return -1;
324
}
325
#endif
326
327
if (x == 0) {
328
buf[0] = 0;
329
return 1;
330
}
331
if (0 < x && x < 123) {
332
buf[0] = (char)(x + 5);
333
return 1;
334
}
335
if (-124 < x && x < 0) {
336
buf[0] = (char)((x - 5)&0xff);
337
return 1;
338
}
339
for (i=1;i<(int)sizeof(long)+1;i++) {
340
buf[i] = (char)(x & 0xff);
341
x = RSHIFT(x,8);
342
if (x == 0) {
343
buf[0] = i;
344
break;
345
}
346
if (x == -1) {
347
buf[0] = -i;
348
break;
349
}
350
}
351
return i+1;
352
}
353
354
#ifdef DBL_MANT_DIG
355
#define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
356
357
#if DBL_MANT_DIG > 32
358
#define MANT_BITS 32
359
#elif DBL_MANT_DIG > 24
360
#define MANT_BITS 24
361
#elif DBL_MANT_DIG > 16
362
#define MANT_BITS 16
363
#else
364
#define MANT_BITS 8
365
#endif
366
367
static double
368
load_mantissa(double d, const char *buf, long len)
369
{
370
if (!len) return d;
371
if (--len > 0 && !*buf++) { /* binary mantissa mark */
372
int e, s = d < 0, dig = 0;
373
unsigned long m;
374
375
modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
376
do {
377
m = 0;
378
switch (len) {
379
default: m = *buf++ & 0xff; /* fall through */
380
#if MANT_BITS > 24
381
case 3: m = (m << 8) | (*buf++ & 0xff); /* fall through */
382
#endif
383
#if MANT_BITS > 16
384
case 2: m = (m << 8) | (*buf++ & 0xff); /* fall through */
385
#endif
386
#if MANT_BITS > 8
387
case 1: m = (m << 8) | (*buf++ & 0xff);
388
#endif
389
}
390
dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
391
d += ldexp((double)m, dig);
392
} while ((len -= MANT_BITS / 8) > 0);
393
d = ldexp(d, e - DECIMAL_MANT);
394
if (s) d = -d;
395
}
396
return d;
397
}
398
#else
399
#define load_mantissa(d, buf, len) (d)
400
#endif
401
402
#ifdef DBL_DIG
403
#define FLOAT_DIG (DBL_DIG+2)
404
#else
405
#define FLOAT_DIG 17
406
#endif
407
408
static void
409
w_float(double d, struct dump_arg *arg)
410
{
411
char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
412
413
if (isinf(d)) {
414
if (d < 0) w_cstr("-inf", arg);
415
else w_cstr("inf", arg);
416
}
417
else if (isnan(d)) {
418
w_cstr("nan", arg);
419
}
420
else if (d == 0.0) {
421
if (signbit(d)) w_cstr("-0", arg);
422
else w_cstr("0", arg);
423
}
424
else {
425
int decpt, sign, digs, len = 0;
426
char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e);
427
if (sign) buf[len++] = '-';
428
digs = (int)(e - p);
429
if (decpt < -3 || decpt > digs) {
430
buf[len++] = p[0];
431
if (--digs > 0) buf[len++] = '.';
432
memcpy(buf + len, p + 1, digs);
433
len += digs;
434
len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1);
435
}
436
else if (decpt > 0) {
437
memcpy(buf + len, p, decpt);
438
len += decpt;
439
if ((digs -= decpt) > 0) {
440
buf[len++] = '.';
441
memcpy(buf + len, p + decpt, digs);
442
len += digs;
443
}
444
}
445
else {
446
buf[len++] = '0';
447
buf[len++] = '.';
448
if (decpt) {
449
memset(buf + len, '0', -decpt);
450
len -= decpt;
451
}
452
memcpy(buf + len, p, digs);
453
len += digs;
454
}
455
free(p);
456
w_bytes(buf, len, arg);
457
}
458
}
459
460
static void
461
w_symbol(VALUE sym, struct dump_arg *arg)
462
{
463
st_data_t num;
464
VALUE encname;
465
466
if (st_lookup(arg->symbols, sym, &num)) {
467
w_byte(TYPE_SYMLINK, arg);
468
w_long((long)num, arg);
469
}
470
else {
471
const VALUE orig_sym = sym;
472
sym = rb_sym2str(sym);
473
if (!sym) {
474
rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym);
475
}
476
encname = encoding_name(sym, arg);
477
if (NIL_P(encname) ||
478
is_ascii_string(sym)) {
479
encname = Qnil;
480
}
481
else {
482
w_byte(TYPE_IVAR, arg);
483
}
484
w_byte(TYPE_SYMBOL, arg);
485
w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
486
st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries);
487
if (!NIL_P(encname)) {
488
struct dump_call_arg c_arg;
489
c_arg.limit = 1;
490
c_arg.arg = arg;
491
w_long(1L, arg);
492
w_encoding(encname, &c_arg);
493
}
494
}
495
}
496
497
static void
498
w_unique(VALUE s, struct dump_arg *arg)
499
{
500
must_not_be_anonymous("class", s);
501
w_symbol(rb_str_intern(s), arg);
502
}
503
504
static void w_object(VALUE,struct dump_arg*,int);
505
506
static int
507
hash_each(VALUE key, VALUE value, VALUE v)
508
{
509
struct dump_call_arg *arg = (void *)v;
510
w_object(key, arg->arg, arg->limit);
511
w_object(value, arg->arg, arg->limit);
512
return ST_CONTINUE;
513
}
514
515
#define SINGLETON_DUMP_UNABLE_P(klass) \
516
(rb_id_table_size(RCLASS_M_TBL(klass)) > 0 || \
517
rb_ivar_count(klass) > 0)
518
519
static void
520
w_extended(VALUE klass, struct dump_arg *arg, int check)
521
{
522
if (check && RCLASS_SINGLETON_P(klass)) {
523
VALUE origin = RCLASS_ORIGIN(klass);
524
if (SINGLETON_DUMP_UNABLE_P(klass) ||
525
(origin != klass && SINGLETON_DUMP_UNABLE_P(origin))) {
526
rb_raise(rb_eTypeError, "singleton can't be dumped");
527
}
528
klass = RCLASS_SUPER(klass);
529
}
530
while (BUILTIN_TYPE(klass) == T_ICLASS) {
531
if (!FL_TEST(klass, RICLASS_IS_ORIGIN) ||
532
BUILTIN_TYPE(RBASIC(klass)->klass) != T_MODULE) {
533
VALUE path = rb_class_name(RBASIC(klass)->klass);
534
w_byte(TYPE_EXTENDED, arg);
535
w_unique(path, arg);
536
}
537
klass = RCLASS_SUPER(klass);
538
}
539
}
540
541
static void
542
w_class(char type, VALUE obj, struct dump_arg *arg, int check)
543
{
544
VALUE path;
545
st_data_t real_obj;
546
VALUE klass;
547
548
if (arg->compat_tbl &&
549
st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
550
obj = (VALUE)real_obj;
551
}
552
klass = CLASS_OF(obj);
553
w_extended(klass, arg, check);
554
w_byte(type, arg);
555
path = class2path(rb_class_real(klass));
556
w_unique(path, arg);
557
}
558
559
static void
560
w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
561
{
562
VALUE klass = CLASS_OF(obj);
563
564
w_extended(klass, arg, TRUE);
565
klass = rb_class_real(klass);
566
if (klass != super) {
567
w_byte(TYPE_UCLASS, arg);
568
w_unique(class2path(klass), arg);
569
}
570
}
571
572
static bool
573
rb_hash_ruby2_keywords_p(VALUE obj)
574
{
575
return (RHASH(obj)->basic.flags & RHASH_PASS_AS_KEYWORDS) != 0;
576
}
577
578
static void
579
rb_hash_ruby2_keywords(VALUE obj)
580
{
581
RHASH(obj)->basic.flags |= RHASH_PASS_AS_KEYWORDS;
582
}
583
584
static inline bool
585
to_be_skipped_id(const ID id)
586
{
587
if (id == s_encoding_short) return true;
588
if (id == s_ruby2_keywords_flag) return true;
589
if (id == rb_id_encoding()) return true;
590
return !rb_id2str(id);
591
}
592
593
struct w_ivar_arg {
594
struct dump_call_arg *dump;
595
st_data_t num_ivar;
596
};
597
598
static int
599
w_obj_each(ID id, VALUE value, st_data_t a)
600
{
601
struct w_ivar_arg *ivarg = (struct w_ivar_arg *)a;
602
struct dump_call_arg *arg = ivarg->dump;
603
604
if (to_be_skipped_id(id)) {
605
if (id == s_encoding_short) {
606
rb_warn("instance variable '"name_s_encoding_short"' on class %"PRIsVALUE" is not dumped",
607
CLASS_OF(arg->obj));
608
}
609
if (id == s_ruby2_keywords_flag) {
610
rb_warn("instance variable '"name_s_ruby2_keywords_flag"' on class %"PRIsVALUE" is not dumped",
611
CLASS_OF(arg->obj));
612
}
613
return ST_CONTINUE;
614
}
615
--ivarg->num_ivar;
616
w_symbol(ID2SYM(id), arg->arg);
617
w_object(value, arg->arg, arg->limit);
618
return ST_CONTINUE;
619
}
620
621
static int
622
obj_count_ivars(ID id, VALUE val, st_data_t a)
623
{
624
if (!to_be_skipped_id(id) && UNLIKELY(!++*(st_index_t *)a)) {
625
rb_raise(rb_eRuntimeError, "too many instance variables");
626
}
627
return ST_CONTINUE;
628
}
629
630
static VALUE
631
encoding_name(VALUE obj, struct dump_arg *arg)
632
{
633
if (rb_enc_capable(obj)) {
634
int encidx = rb_enc_get_index(obj);
635
rb_encoding *enc = 0;
636
st_data_t name;
637
638
if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
639
return Qnil;
640
}
641
642
/* special treatment for US-ASCII and UTF-8 */
643
if (encidx == rb_usascii_encindex()) {
644
return Qfalse;
645
}
646
else if (encidx == rb_utf8_encindex()) {
647
return Qtrue;
648
}
649
650
if (arg->encodings ?
651
!st_lookup(arg->encodings, (st_data_t)rb_enc_name(enc), &name) :
652
(arg->encodings = st_init_strcasetable(), 1)) {
653
name = (st_data_t)rb_str_new_cstr(rb_enc_name(enc));
654
st_insert(arg->encodings, (st_data_t)rb_enc_name(enc), name);
655
}
656
return (VALUE)name;
657
}
658
else {
659
return Qnil;
660
}
661
}
662
663
static int
664
w_encoding(VALUE encname, struct dump_call_arg *arg)
665
{
666
int limit = arg->limit;
667
if (limit >= 0) ++limit;
668
switch (encname) {
669
case Qfalse:
670
case Qtrue:
671
w_symbol(ID2SYM(s_encoding_short), arg->arg);
672
w_object(encname, arg->arg, limit);
673
return 1;
674
case Qnil:
675
return 0;
676
}
677
w_symbol(ID2SYM(rb_id_encoding()), arg->arg);
678
w_object(encname, arg->arg, limit);
679
return 1;
680
}
681
682
static st_index_t
683
has_ivars(VALUE obj, VALUE encname, VALUE *ivobj)
684
{
685
st_index_t num = !NIL_P(encname);
686
687
if (SPECIAL_CONST_P(obj)) goto generic;
688
switch (BUILTIN_TYPE(obj)) {
689
case T_OBJECT:
690
case T_CLASS:
691
case T_MODULE:
692
break; /* counted elsewhere */
693
case T_HASH:
694
if (rb_hash_ruby2_keywords_p(obj)) ++num;
695
/* fall through */
696
default:
697
generic:
698
rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
699
if (num) *ivobj = obj;
700
}
701
702
return num;
703
}
704
705
static void
706
w_ivar_each(VALUE obj, st_index_t num, struct dump_call_arg *arg)
707
{
708
shape_id_t shape_id = rb_shape_get_shape_id(arg->obj);
709
struct w_ivar_arg ivarg = {arg, num};
710
if (!num) return;
711
rb_ivar_foreach(obj, w_obj_each, (st_data_t)&ivarg);
712
713
if (shape_id != rb_shape_get_shape_id(arg->obj)) {
714
rb_shape_t * expected_shape = rb_shape_get_shape_by_id(shape_id);
715
rb_shape_t * actual_shape = rb_shape_get_shape(arg->obj);
716
717
// If the shape tree got _shorter_ then we probably removed an IV
718
// If the shape tree got longer, then we probably added an IV.
719
// The exception message might not be accurate when someone adds and
720
// removes the same number of IVs, but they will still get an exception
721
if (rb_shape_depth(expected_shape) > rb_shape_depth(actual_shape)) {
722
rb_raise(rb_eRuntimeError, "instance variable removed from %"PRIsVALUE" instance",
723
CLASS_OF(arg->obj));
724
}
725
else {
726
rb_raise(rb_eRuntimeError, "instance variable added to %"PRIsVALUE" instance",
727
CLASS_OF(arg->obj));
728
}
729
}
730
}
731
732
static void
733
w_ivar(st_index_t num, VALUE ivobj, VALUE encname, struct dump_call_arg *arg)
734
{
735
w_long(num, arg->arg);
736
num -= w_encoding(encname, arg);
737
if (RB_TYPE_P(ivobj, T_HASH) && rb_hash_ruby2_keywords_p(ivobj)) {
738
int limit = arg->limit;
739
if (limit >= 0) ++limit;
740
w_symbol(ID2SYM(s_ruby2_keywords_flag), arg->arg);
741
w_object(Qtrue, arg->arg, limit);
742
num--;
743
}
744
if (!UNDEF_P(ivobj) && num) {
745
w_ivar_each(ivobj, num, arg);
746
}
747
}
748
749
static void
750
w_objivar(VALUE obj, struct dump_call_arg *arg)
751
{
752
st_data_t num = 0;
753
754
rb_ivar_foreach(obj, obj_count_ivars, (st_data_t)&num);
755
w_long(num, arg->arg);
756
w_ivar_each(obj, num, arg);
757
}
758
759
#if SIZEOF_LONG > 4
760
// Optimized dump for fixnum larger than 31-bits
761
static void
762
w_bigfixnum(VALUE obj, struct dump_arg *arg)
763
{
764
RUBY_ASSERT(FIXNUM_P(obj));
765
766
w_byte(TYPE_BIGNUM, arg);
767
768
#if SIZEOF_LONG == SIZEOF_VALUE
769
long num, slen_num;
770
num = FIX2LONG(obj);
771
#else
772
long long num, slen_num;
773
num = NUM2LL(obj);
774
#endif
775
776
char sign = num < 0 ? '-' : '+';
777
w_byte(sign, arg);
778
779
// Guaranteed not to overflow, as FIXNUM is 1-bit less than long
780
if (num < 0) num = -num;
781
782
// calculate the size in shorts
783
int slen = 0;
784
{
785
slen_num = num;
786
while (slen_num) {
787
slen++;
788
slen_num = SHORTDN(slen_num);
789
}
790
}
791
792
RUBY_ASSERT(slen > 0 && slen <= SIZEOF_LONG / 2);
793
794
w_long((long)slen, arg);
795
796
for (int i = 0; i < slen; i++) {
797
w_short(num & SHORTMASK, arg);
798
num = SHORTDN(num);
799
}
800
801
// We aren't adding this object to the link table, but we need to increment
802
// the index.
803
arg->num_entries++;
804
805
RUBY_ASSERT(num == 0);
806
}
807
#endif
808
809
static void
810
w_remember(VALUE obj, struct dump_arg *arg)
811
{
812
st_add_direct(arg->data, obj, arg->num_entries++);
813
}
814
815
static void
816
w_object(VALUE obj, struct dump_arg *arg, int limit)
817
{
818
struct dump_call_arg c_arg;
819
VALUE ivobj = Qundef;
820
st_data_t num;
821
st_index_t hasiv = 0;
822
VALUE encname = Qnil;
823
824
if (limit == 0) {
825
rb_raise(rb_eArgError, "exceed depth limit");
826
}
827
828
if (NIL_P(obj)) {
829
w_byte(TYPE_NIL, arg);
830
}
831
else if (obj == Qtrue) {
832
w_byte(TYPE_TRUE, arg);
833
}
834
else if (obj == Qfalse) {
835
w_byte(TYPE_FALSE, arg);
836
}
837
else if (FIXNUM_P(obj)) {
838
#if SIZEOF_LONG <= 4
839
w_byte(TYPE_FIXNUM, arg);
840
w_long(FIX2INT(obj), arg);
841
#else
842
if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
843
w_byte(TYPE_FIXNUM, arg);
844
w_long(FIX2LONG(obj), arg);
845
}
846
else {
847
w_bigfixnum(obj, arg);
848
}
849
#endif
850
}
851
else if (SYMBOL_P(obj)) {
852
w_symbol(obj, arg);
853
}
854
else {
855
if (st_lookup(arg->data, obj, &num)) {
856
w_byte(TYPE_LINK, arg);
857
w_long((long)num, arg);
858
return;
859
}
860
861
if (limit > 0) limit--;
862
c_arg.limit = limit;
863
c_arg.arg = arg;
864
c_arg.obj = obj;
865
866
if (FLONUM_P(obj)) {
867
w_remember(obj, arg);
868
w_byte(TYPE_FLOAT, arg);
869
w_float(RFLOAT_VALUE(obj), arg);
870
return;
871
}
872
873
VALUE v;
874
875
if (!RBASIC_CLASS(obj)) {
876
rb_raise(rb_eTypeError, "can't dump internal %s",
877
rb_builtin_type_name(BUILTIN_TYPE(obj)));
878
}
879
880
if (rb_obj_respond_to(obj, s_mdump, TRUE)) {
881
w_remember(obj, arg);
882
883
v = dump_funcall(arg, obj, s_mdump, 0, 0);
884
w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
885
w_object(v, arg, limit);
886
return;
887
}
888
if (rb_obj_respond_to(obj, s_dump, TRUE)) {
889
VALUE ivobj2 = Qundef;
890
st_index_t hasiv2;
891
VALUE encname2;
892
893
if (arg->userdefs && st_is_member(arg->userdefs, (st_data_t)obj)) {
894
rb_raise(rb_eRuntimeError, "can't dump recursive object using _dump()");
895
}
896
v = INT2NUM(limit);
897
v = dump_funcall(arg, obj, s_dump, 1, &v);
898
if (!RB_TYPE_P(v, T_STRING)) {
899
rb_raise(rb_eTypeError, "_dump() must return string");
900
}
901
hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
902
hasiv2 = has_ivars(v, (encname2 = encoding_name(v, arg)), &ivobj2);
903
if (hasiv2) {
904
hasiv = hasiv2;
905
ivobj = ivobj2;
906
encname = encname2;
907
}
908
if (hasiv) w_byte(TYPE_IVAR, arg);
909
w_class(TYPE_USERDEF, obj, arg, FALSE);
910
w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
911
if (hasiv) {
912
st_data_t userdefs = (st_data_t)obj;
913
if (!arg->userdefs) {
914
arg->userdefs = rb_init_identtable();
915
}
916
st_add_direct(arg->userdefs, userdefs, 0);
917
w_ivar(hasiv, ivobj, encname, &c_arg);
918
st_delete(arg->userdefs, &userdefs, NULL);
919
}
920
w_remember(obj, arg);
921
return;
922
}
923
924
w_remember(obj, arg);
925
926
hasiv = has_ivars(obj, (encname = encoding_name(obj, arg)), &ivobj);
927
{
928
st_data_t compat_data;
929
rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
930
if (st_lookup(compat_allocator_tbl,
931
(st_data_t)allocator,
932
&compat_data)) {
933
marshal_compat_t *compat = (marshal_compat_t*)compat_data;
934
VALUE real_obj = obj;
935
obj = compat->dumper(real_obj);
936
if (!arg->compat_tbl) {
937
arg->compat_tbl = rb_init_identtable();
938
}
939
st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
940
if (obj != real_obj && UNDEF_P(ivobj)) hasiv = 0;
941
}
942
}
943
if (hasiv) w_byte(TYPE_IVAR, arg);
944
945
switch (BUILTIN_TYPE(obj)) {
946
case T_CLASS:
947
if (FL_TEST(obj, FL_SINGLETON)) {
948
rb_raise(rb_eTypeError, "singleton class can't be dumped");
949
}
950
w_byte(TYPE_CLASS, arg);
951
{
952
VALUE path = class2path(obj);
953
w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
954
RB_GC_GUARD(path);
955
}
956
break;
957
958
case T_MODULE:
959
w_byte(TYPE_MODULE, arg);
960
{
961
VALUE path = class2path(obj);
962
w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
963
RB_GC_GUARD(path);
964
}
965
break;
966
967
case T_FLOAT:
968
w_byte(TYPE_FLOAT, arg);
969
w_float(RFLOAT_VALUE(obj), arg);
970
break;
971
972
case T_BIGNUM:
973
w_byte(TYPE_BIGNUM, arg);
974
{
975
char sign = BIGNUM_SIGN(obj) ? '+' : '-';
976
size_t len = BIGNUM_LEN(obj);
977
size_t slen;
978
size_t j;
979
BDIGIT *d = BIGNUM_DIGITS(obj);
980
981
slen = SHORTLEN(len);
982
if (LONG_MAX < slen) {
983
rb_raise(rb_eTypeError, "too big Bignum can't be dumped");
984
}
985
986
w_byte(sign, arg);
987
w_long((long)slen, arg);
988
for (j = 0; j < len; j++) {
989
#if SIZEOF_BDIGIT > SIZEOF_SHORT
990
BDIGIT num = *d;
991
int i;
992
993
for (i=0; i<SIZEOF_BDIGIT; i+=SIZEOF_SHORT) {
994
w_short(num & SHORTMASK, arg);
995
num = SHORTDN(num);
996
if (j == len - 1 && num == 0) break;
997
}
998
#else
999
w_short(*d, arg);
1000
#endif
1001
d++;
1002
}
1003
}
1004
break;
1005
1006
case T_STRING:
1007
w_uclass(obj, rb_cString, arg);
1008
w_byte(TYPE_STRING, arg);
1009
w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
1010
break;
1011
1012
case T_REGEXP:
1013
w_uclass(obj, rb_cRegexp, arg);
1014
w_byte(TYPE_REGEXP, arg);
1015
{
1016
int opts = rb_reg_options(obj);
1017
w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
1018
w_byte((char)opts, arg);
1019
}
1020
break;
1021
1022
case T_ARRAY:
1023
w_uclass(obj, rb_cArray, arg);
1024
w_byte(TYPE_ARRAY, arg);
1025
{
1026
long i, len = RARRAY_LEN(obj);
1027
1028
w_long(len, arg);
1029
for (i=0; i<RARRAY_LEN(obj); i++) {
1030
w_object(RARRAY_AREF(obj, i), arg, limit);
1031
if (len != RARRAY_LEN(obj)) {
1032
rb_raise(rb_eRuntimeError, "array modified during dump");
1033
}
1034
}
1035
}
1036
break;
1037
1038
case T_HASH:
1039
w_uclass(obj, rb_cHash, arg);
1040
if (rb_hash_compare_by_id_p(obj)) {
1041
w_byte(TYPE_UCLASS, arg);
1042
w_symbol(rb_sym_intern_ascii_cstr("Hash"), arg);
1043
}
1044
if (NIL_P(RHASH_IFNONE(obj))) {
1045
w_byte(TYPE_HASH, arg);
1046
}
1047
else if (FL_TEST(obj, RHASH_PROC_DEFAULT)) {
1048
rb_raise(rb_eTypeError, "can't dump hash with default proc");
1049
}
1050
else {
1051
w_byte(TYPE_HASH_DEF, arg);
1052
}
1053
w_long(rb_hash_size_num(obj), arg);
1054
rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
1055
if (!NIL_P(RHASH_IFNONE(obj))) {
1056
w_object(RHASH_IFNONE(obj), arg, limit);
1057
}
1058
break;
1059
1060
case T_STRUCT:
1061
w_class(TYPE_STRUCT, obj, arg, TRUE);
1062
{
1063
long len = RSTRUCT_LEN(obj);
1064
VALUE mem;
1065
long i;
1066
1067
w_long(len, arg);
1068
mem = rb_struct_members(obj);
1069
for (i=0; i<len; i++) {
1070
w_symbol(RARRAY_AREF(mem, i), arg);
1071
w_object(RSTRUCT_GET(obj, i), arg, limit);
1072
}
1073
}
1074
break;
1075
1076
case T_OBJECT:
1077
w_class(TYPE_OBJECT, obj, arg, TRUE);
1078
w_objivar(obj, &c_arg);
1079
break;
1080
1081
case T_DATA:
1082
{
1083
VALUE v;
1084
1085
if (!rb_obj_respond_to(obj, s_dump_data, TRUE)) {
1086
rb_raise(rb_eTypeError,
1087
"no _dump_data is defined for class %"PRIsVALUE,
1088
rb_obj_class(obj));
1089
}
1090
v = dump_funcall(arg, obj, s_dump_data, 0, 0);
1091
w_class(TYPE_DATA, obj, arg, TRUE);
1092
w_object(v, arg, limit);
1093
}
1094
break;
1095
1096
default:
1097
rb_raise(rb_eTypeError, "can't dump %"PRIsVALUE,
1098
rb_obj_class(obj));
1099
break;
1100
}
1101
RB_GC_GUARD(obj);
1102
}
1103
if (hasiv) {
1104
w_ivar(hasiv, ivobj, encname, &c_arg);
1105
}
1106
}
1107
1108
static void
1109
clear_dump_arg(struct dump_arg *arg)
1110
{
1111
if (!arg->symbols) return;
1112
st_free_table(arg->symbols);
1113
arg->symbols = 0;
1114
st_free_table(arg->data);
1115
arg->data = 0;
1116
arg->num_entries = 0;
1117
if (arg->compat_tbl) {
1118
st_free_table(arg->compat_tbl);
1119
arg->compat_tbl = 0;
1120
}
1121
if (arg->encodings) {
1122
st_free_table(arg->encodings);
1123
arg->encodings = 0;
1124
}
1125
if (arg->userdefs) {
1126
st_free_table(arg->userdefs);
1127
arg->userdefs = 0;
1128
}
1129
}
1130
1131
NORETURN(static inline void io_needed(void));
1132
static inline void
1133
io_needed(void)
1134
{
1135
rb_raise(rb_eTypeError, "instance of IO needed");
1136
}
1137
1138
/*
1139
* call-seq:
1140
* dump( obj [, anIO] , limit=-1 ) -> anIO
1141
*
1142
* Serializes obj and all descendant objects. If anIO is
1143
* specified, the serialized data will be written to it, otherwise the
1144
* data will be returned as a String. If limit is specified, the
1145
* traversal of subobjects will be limited to that depth. If limit is
1146
* negative, no checking of depth will be performed.
1147
*
1148
* class Klass
1149
* def initialize(str)
1150
* @str = str
1151
* end
1152
* def say_hello
1153
* @str
1154
* end
1155
* end
1156
*
1157
* (produces no output)
1158
*
1159
* o = Klass.new("hello\n")
1160
* data = Marshal.dump(o)
1161
* obj = Marshal.load(data)
1162
* obj.say_hello #=> "hello\n"
1163
*
1164
* Marshal can't dump following objects:
1165
* * anonymous Class/Module.
1166
* * objects which are related to system (ex: Dir, File::Stat, IO, File, Socket
1167
* and so on)
1168
* * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
1169
* ThreadGroup, Continuation
1170
* * objects which define singleton methods
1171
*/
1172
static VALUE
1173
marshal_dump(int argc, VALUE *argv, VALUE _)
1174
{
1175
VALUE obj, port, a1, a2;
1176
int limit = -1;
1177
1178
port = Qnil;
1179
rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
1180
if (argc == 3) {
1181
if (!NIL_P(a2)) limit = NUM2INT(a2);
1182
if (NIL_P(a1)) io_needed();
1183
port = a1;
1184
}
1185
else if (argc == 2) {
1186
if (FIXNUM_P(a1)) limit = FIX2INT(a1);
1187
else if (NIL_P(a1)) io_needed();
1188
else port = a1;
1189
}
1190
return rb_marshal_dump_limited(obj, port, limit);
1191
}
1192
1193
VALUE
1194
rb_marshal_dump_limited(VALUE obj, VALUE port, int limit)
1195
{
1196
struct dump_arg *arg;
1197
VALUE wrapper; /* used to avoid memory leak in case of exception */
1198
1199
wrapper = TypedData_Make_Struct(0, struct dump_arg, &dump_arg_data, arg);
1200
arg->dest = 0;
1201
arg->symbols = st_init_numtable();
1202
arg->data = rb_init_identtable();
1203
arg->num_entries = 0;
1204
arg->compat_tbl = 0;
1205
arg->encodings = 0;
1206
arg->userdefs = 0;
1207
arg->str = rb_str_buf_new(0);
1208
if (!NIL_P(port)) {
1209
if (!rb_respond_to(port, s_write)) {
1210
io_needed();
1211
}
1212
arg->dest = port;
1213
dump_check_funcall(arg, port, s_binmode, 0, 0);
1214
}
1215
else {
1216
port = arg->str;
1217
}
1218
1219
w_byte(MARSHAL_MAJOR, arg);
1220
w_byte(MARSHAL_MINOR, arg);
1221
1222
w_object(obj, arg, limit);
1223
if (arg->dest) {
1224
rb_io_write(arg->dest, arg->str);
1225
rb_str_resize(arg->str, 0);
1226
}
1227
clear_dump_arg(arg);
1228
RB_GC_GUARD(wrapper);
1229
1230
return port;
1231
}
1232
1233
struct load_arg {
1234
VALUE src;
1235
char *buf;
1236
long buflen;
1237
long readable;
1238
long offset;
1239
st_table *symbols;
1240
st_table *data;
1241
st_table *partial_objects;
1242
VALUE proc;
1243
st_table *compat_tbl;
1244
bool freeze;
1245
};
1246
1247
static VALUE
1248
check_load_arg(VALUE ret, struct load_arg *arg, const char *name)
1249
{
1250
if (!arg->symbols) {
1251
rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
1252
name);
1253
}
1254
return ret;
1255
}
1256
#define load_funcall(arg, obj, sym, argc, argv) \
1257
check_load_arg(rb_funcallv(obj, sym, argc, argv), arg, name_##sym)
1258
1259
static void clear_load_arg(struct load_arg *arg);
1260
1261
static void
1262
mark_load_arg(void *ptr)
1263
{
1264
struct load_arg *p = ptr;
1265
if (!p->symbols)
1266
return;
1267
rb_mark_tbl(p->symbols);
1268
rb_mark_tbl(p->data);
1269
rb_mark_tbl(p->partial_objects);
1270
rb_mark_hash(p->compat_tbl);
1271
}
1272
1273
static void
1274
free_load_arg(void *ptr)
1275
{
1276
clear_load_arg(ptr);
1277
}
1278
1279
static size_t
1280
memsize_load_arg(const void *ptr)
1281
{
1282
const struct load_arg *p = (struct load_arg *)ptr;
1283
size_t memsize = 0;
1284
if (p->symbols) memsize += rb_st_memsize(p->symbols);
1285
if (p->data) memsize += rb_st_memsize(p->data);
1286
if (p->partial_objects) memsize += rb_st_memsize(p->partial_objects);
1287
if (p->compat_tbl) memsize += rb_st_memsize(p->compat_tbl);
1288
return memsize;
1289
}
1290
1291
static const rb_data_type_t load_arg_data = {
1292
"load_arg",
1293
{mark_load_arg, free_load_arg, memsize_load_arg,},
1294
0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE
1295
};
1296
1297
#define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
1298
static VALUE r_object(struct load_arg *arg);
1299
static VALUE r_symbol(struct load_arg *arg);
1300
1301
NORETURN(static void too_short(void));
1302
static void
1303
too_short(void)
1304
{
1305
rb_raise(rb_eArgError, "marshal data too short");
1306
}
1307
1308
static st_index_t
1309
r_prepare(struct load_arg *arg)
1310
{
1311
st_index_t idx = arg->data->num_entries;
1312
1313
st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
1314
return idx;
1315
}
1316
1317
static unsigned char
1318
r_byte1_buffered(struct load_arg *arg)
1319
{
1320
if (arg->buflen == 0) {
1321
long readable = arg->readable < BUFSIZ ? arg->readable : BUFSIZ;
1322
VALUE str, n = LONG2NUM(readable);
1323
1324
str = load_funcall(arg, arg->src, s_read, 1, &n);
1325
if (NIL_P(str)) too_short();
1326
StringValue(str);
1327
memcpy(arg->buf, RSTRING_PTR(str), RSTRING_LEN(str));
1328
arg->offset = 0;
1329
arg->buflen = RSTRING_LEN(str);
1330
}
1331
arg->buflen--;
1332
return arg->buf[arg->offset++];
1333
}
1334
1335
static int
1336
r_byte(struct load_arg *arg)
1337
{
1338
int c;
1339
1340
if (RB_TYPE_P(arg->src, T_STRING)) {
1341
if (RSTRING_LEN(arg->src) > arg->offset) {
1342
c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
1343
}
1344
else {
1345
too_short();
1346
}
1347
}
1348
else {
1349
if (arg->readable >0 || arg->buflen > 0) {
1350
c = r_byte1_buffered(arg);
1351
}
1352
else {
1353
VALUE v = load_funcall(arg, arg->src, s_getbyte, 0, 0);
1354
if (NIL_P(v)) rb_eof_error();
1355
c = (unsigned char)NUM2CHR(v);
1356
}
1357
}
1358
return c;
1359
}
1360
1361
NORETURN(static void long_toobig(int size));
1362
1363
static void
1364
long_toobig(int size)
1365
{
1366
rb_raise(rb_eTypeError, "long too big for this architecture (size "
1367
STRINGIZE(SIZEOF_LONG)", given %d)", size);
1368
}
1369
1370
static long
1371
r_long(struct load_arg *arg)
1372
{
1373
register long x;
1374
int c = (signed char)r_byte(arg);
1375
long i;
1376
1377
if (c == 0) return 0;
1378
if (c > 0) {
1379
if (4 < c && c < 128) {
1380
return c - 5;
1381
}
1382
if (c > (int)sizeof(long)) long_toobig(c);
1383
x = 0;
1384
for (i=0;i<c;i++) {
1385
x |= (long)r_byte(arg) << (8*i);
1386
}
1387
}
1388
else {
1389
if (-129 < c && c < -4) {
1390
return c + 5;
1391
}
1392
c = -c;
1393
if (c > (int)sizeof(long)) long_toobig(c);
1394
x = -1;
1395
for (i=0;i<c;i++) {
1396
x &= ~((long)0xff << (8*i));
1397
x |= (long)r_byte(arg) << (8*i);
1398
}
1399
}
1400
return x;
1401
}
1402
1403
long
1404
ruby_marshal_read_long(const char **buf, long len)
1405
{
1406
long x;
1407
struct RString src;
1408
struct load_arg arg;
1409
memset(&arg, 0, sizeof(arg));
1410
arg.src = rb_setup_fake_str(&src, *buf, len, 0);
1411
x = r_long(&arg);
1412
*buf += arg.offset;
1413
return x;
1414
}
1415
1416
static VALUE
1417
r_bytes1(long len, struct load_arg *arg)
1418
{
1419
VALUE str, n = LONG2NUM(len);
1420
1421
str = load_funcall(arg, arg->src, s_read, 1, &n);
1422
if (NIL_P(str)) too_short();
1423
StringValue(str);
1424
if (RSTRING_LEN(str) != len) too_short();
1425
1426
return str;
1427
}
1428
1429
static VALUE
1430
r_bytes1_buffered(long len, struct load_arg *arg)
1431
{
1432
VALUE str;
1433
1434
if (len <= arg->buflen) {
1435
str = rb_str_new(arg->buf+arg->offset, len);
1436
arg->offset += len;
1437
arg->buflen -= len;
1438
}
1439
else {
1440
long buflen = arg->buflen;
1441
long readable = arg->readable + 1;
1442
long tmp_len, read_len, need_len = len - buflen;
1443
VALUE tmp, n;
1444
1445
readable = readable < BUFSIZ ? readable : BUFSIZ;
1446
read_len = need_len > readable ? need_len : readable;
1447
n = LONG2NUM(read_len);
1448
tmp = load_funcall(arg, arg->src, s_read, 1, &n);
1449
if (NIL_P(tmp)) too_short();
1450
StringValue(tmp);
1451
1452
tmp_len = RSTRING_LEN(tmp);
1453
1454
if (tmp_len < need_len) too_short();
1455
1456
str = rb_str_new(arg->buf+arg->offset, buflen);
1457
rb_str_cat(str, RSTRING_PTR(tmp), need_len);
1458
1459
if (tmp_len > need_len) {
1460
buflen = tmp_len - need_len;
1461
memcpy(arg->buf, RSTRING_PTR(tmp)+need_len, buflen);
1462
arg->buflen = buflen;
1463
}
1464
else {
1465
arg->buflen = 0;
1466
}
1467
arg->offset = 0;
1468
}
1469
1470
return str;
1471
}
1472
1473
#define r_bytes(arg) r_bytes0(r_long(arg), (arg))
1474
1475
static VALUE
1476
r_bytes0(long len, struct load_arg *arg)
1477
{
1478
VALUE str;
1479
1480
if (len == 0) return rb_str_new(0, 0);
1481
if (RB_TYPE_P(arg->src, T_STRING)) {
1482
if (RSTRING_LEN(arg->src) - arg->offset >= len) {
1483
str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
1484
arg->offset += len;
1485
}
1486
else {
1487
too_short();
1488
}
1489
}
1490
else {
1491
if (arg->readable > 0 || arg->buflen > 0) {
1492
str = r_bytes1_buffered(len, arg);
1493
}
1494
else {
1495
str = r_bytes1(len, arg);
1496
}
1497
}
1498
return str;
1499
}
1500
1501
static inline int
1502
name_equal(const char *name, size_t nlen, const char *p, long l)
1503
{
1504
if ((size_t)l != nlen || *p != *name) return 0;
1505
return nlen == 1 || memcmp(p+1, name+1, nlen-1) == 0;
1506
}
1507
1508
static int
1509
sym2encidx(VALUE sym, VALUE val)
1510
{
1511
static const char name_encoding[8] = "encoding";
1512
const char *p;
1513
long l;
1514
if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return -1;
1515
RSTRING_GETMEM(sym, p, l);
1516
if (l <= 0) return -1;
1517
if (name_equal(name_encoding, sizeof(name_encoding), p, l)) {
1518
int idx = rb_enc_find_index(StringValueCStr(val));
1519
return idx;
1520
}
1521
if (name_equal(name_s_encoding_short, rb_strlen_lit(name_s_encoding_short), p, l)) {
1522
if (val == Qfalse) return rb_usascii_encindex();
1523
else if (val == Qtrue) return rb_utf8_encindex();
1524
/* bogus ignore */
1525
}
1526
return -1;
1527
}
1528
1529
static int
1530
symname_equal(VALUE sym, const char *name, size_t nlen)
1531
{
1532
const char *p;
1533
long l;
1534
if (rb_enc_get_index(sym) != ENCINDEX_US_ASCII) return 0;
1535
RSTRING_GETMEM(sym, p, l);
1536
return name_equal(name, nlen, p, l);
1537
}
1538
1539
#define BUILD_ASSERT_POSITIVE(n) \
1540
/* make 0 negative to workaround the "zero size array" GCC extension, */ \
1541
((sizeof(char [2*(ssize_t)(n)-1])+1)/2) /* assuming no overflow */
1542
#define symname_equal_lit(sym, sym_name) \
1543
symname_equal(sym, sym_name, BUILD_ASSERT_POSITIVE(rb_strlen_lit(sym_name)))
1544
1545
static VALUE
1546
r_symlink(struct load_arg *arg)
1547
{
1548
st_data_t sym;
1549
long num = r_long(arg);
1550
1551
if (!st_lookup(arg->symbols, num, &sym)) {
1552
rb_raise(rb_eArgError, "bad symbol");
1553
}
1554
return (VALUE)sym;
1555
}
1556
1557
static VALUE
1558
r_symreal(struct load_arg *arg, int ivar)
1559
{
1560
VALUE s = r_bytes(arg);
1561
VALUE sym;
1562
int idx = -1;
1563
st_index_t n = arg->symbols->num_entries;
1564
1565
if (rb_enc_str_asciionly_p(s)) rb_enc_associate_index(s, ENCINDEX_US_ASCII);
1566
st_insert(arg->symbols, (st_data_t)n, (st_data_t)s);
1567
if (ivar) {
1568
long num = r_long(arg);
1569
while (num-- > 0) {
1570
sym = r_symbol(arg);
1571
idx = sym2encidx(sym, r_object(arg));
1572
}
1573
}
1574
if (idx > 0) {
1575
rb_enc_associate_index(s, idx);
1576
if (is_broken_string(s)) {
1577
rb_raise(rb_eArgError, "invalid byte sequence in %s: %+"PRIsVALUE,
1578
rb_enc_name(rb_enc_from_index(idx)), s);
1579
}
1580
}
1581
1582
return s;
1583
}
1584
1585
static VALUE
1586
r_symbol(struct load_arg *arg)
1587
{
1588
int type, ivar = 0;
1589
1590
again:
1591
switch ((type = r_byte(arg))) {
1592
default:
1593
rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
1594
case TYPE_IVAR:
1595
ivar = 1;
1596
goto again;
1597
case TYPE_SYMBOL:
1598
return r_symreal(arg, ivar);
1599
case TYPE_SYMLINK:
1600
if (ivar) {
1601
rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
1602
}
1603
return r_symlink(arg);
1604
}
1605
}
1606
1607
static VALUE
1608
r_unique(struct load_arg *arg)
1609
{
1610
return r_symbol(arg);
1611
}
1612
1613
static VALUE
1614
r_string(struct load_arg *arg)
1615
{
1616
return r_bytes(arg);
1617
}
1618
1619
static VALUE
1620
r_entry0(VALUE v, st_index_t num, struct load_arg *arg)
1621
{
1622
st_data_t real_obj = (st_data_t)v;
1623
if (arg->compat_tbl) {
1624
/* real_obj is kept if not found */
1625
st_lookup(arg->compat_tbl, v, &real_obj);
1626
}
1627
st_insert(arg->data, num, real_obj);
1628
st_insert(arg->partial_objects, (st_data_t)real_obj, Qtrue);
1629
return v;
1630
}
1631
1632
static VALUE
1633
r_fixup_compat(VALUE v, struct load_arg *arg)
1634
{
1635
st_data_t data;
1636
st_data_t key = (st_data_t)v;
1637
if (arg->compat_tbl && st_delete(arg->compat_tbl, &key, &data)) {
1638
VALUE real_obj = (VALUE)data;
1639
rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
1640
if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1641
marshal_compat_t *compat = (marshal_compat_t*)data;
1642
compat->loader(real_obj, v);
1643
}
1644
v = real_obj;
1645
}
1646
return v;
1647
}
1648
1649
static VALUE
1650
r_post_proc(VALUE v, struct load_arg *arg)
1651
{
1652
if (arg->proc) {
1653
v = load_funcall(arg, arg->proc, s_call, 1, &v);
1654
}
1655
return v;
1656
}
1657
1658
static VALUE
1659
r_leave(VALUE v, struct load_arg *arg, bool partial)
1660
{
1661
v = r_fixup_compat(v, arg);
1662
if (!partial) {
1663
st_data_t data;
1664
st_data_t key = (st_data_t)v;
1665
st_delete(arg->partial_objects, &key, &data);
1666
if (arg->freeze) {
1667
if (RB_TYPE_P(v, T_MODULE) || RB_TYPE_P(v, T_CLASS)) {
1668
// noop
1669
}
1670
else if (RB_TYPE_P(v, T_STRING)) {
1671
v = rb_str_to_interned_str(v);
1672
}
1673
else {
1674
OBJ_FREEZE(v);
1675
}
1676
}
1677
v = r_post_proc(v, arg);
1678
}
1679
return v;
1680
}
1681
1682
static int
1683
copy_ivar_i(ID vid, VALUE value, st_data_t arg)
1684
{
1685
VALUE obj = (VALUE)arg;
1686
1687
if (!rb_ivar_defined(obj, vid))
1688
rb_ivar_set(obj, vid, value);
1689
return ST_CONTINUE;
1690
}
1691
1692
static VALUE
1693
r_copy_ivar(VALUE v, VALUE data)
1694
{
1695
rb_ivar_foreach(data, copy_ivar_i, (st_data_t)v);
1696
return v;
1697
}
1698
1699
#define override_ivar_error(type, str) \
1700
rb_raise(rb_eTypeError, \
1701
"can't override instance variable of "type" '%"PRIsVALUE"'", \
1702
(str))
1703
1704
static void
1705
r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
1706
{
1707
long len;
1708
1709
len = r_long(arg);
1710
if (len > 0) {
1711
if (RB_TYPE_P(obj, T_MODULE)) {
1712
override_ivar_error("module", rb_mod_name(obj));
1713
}
1714
else if (RB_TYPE_P(obj, T_CLASS)) {
1715
override_ivar_error("class", rb_class_name(obj));
1716
}
1717
do {
1718
VALUE sym = r_symbol(arg);
1719
VALUE val = r_object(arg);
1720
int idx = sym2encidx(sym, val);
1721
if (idx >= 0) {
1722
if (rb_enc_capable(obj)) {
1723
rb_enc_associate_index(obj, idx);
1724
}
1725
else {
1726
rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
1727
}
1728
if (has_encoding) *has_encoding = TRUE;
1729
}
1730
else if (symname_equal_lit(sym, name_s_ruby2_keywords_flag)) {
1731
if (RB_TYPE_P(obj, T_HASH)) {
1732
rb_hash_ruby2_keywords(obj);
1733
}
1734
else {
1735
rb_raise(rb_eArgError, "ruby2_keywords flag is given but %"PRIsVALUE" is not a Hash", obj);
1736
}
1737
}
1738
else {
1739
rb_ivar_set(obj, rb_intern_str(sym), val);
1740
}
1741
} while (--len > 0);
1742
}
1743
}
1744
1745
static VALUE
1746
path2class(VALUE path)
1747
{
1748
VALUE v = rb_path_to_class(path);
1749
1750
if (!RB_TYPE_P(v, T_CLASS)) {
1751
rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to class", path);
1752
}
1753
return v;
1754
}
1755
1756
#define path2module(path) must_be_module(rb_path_to_class(path), path)
1757
1758
static VALUE
1759
must_be_module(VALUE v, VALUE path)
1760
{
1761
if (!RB_TYPE_P(v, T_MODULE)) {
1762
rb_raise(rb_eArgError, "%"PRIsVALUE" does not refer to module", path);
1763
}
1764
return v;
1765
}
1766
1767
static VALUE
1768
obj_alloc_by_klass(VALUE klass, struct load_arg *arg, VALUE *oldclass)
1769
{
1770
st_data_t data;
1771
rb_alloc_func_t allocator;
1772
1773
allocator = rb_get_alloc_func(klass);
1774
if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1775
marshal_compat_t *compat = (marshal_compat_t*)data;
1776
VALUE real_obj = rb_obj_alloc(klass);
1777
VALUE obj = rb_obj_alloc(compat->oldclass);
1778
if (oldclass) *oldclass = compat->oldclass;
1779
1780
if (!arg->compat_tbl) {
1781
arg->compat_tbl = rb_init_identtable();
1782
}
1783
st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
1784
return obj;
1785
}
1786
1787
return rb_obj_alloc(klass);
1788
}
1789
1790
static VALUE
1791
obj_alloc_by_path(VALUE path, struct load_arg *arg)
1792
{
1793
return obj_alloc_by_klass(path2class(path), arg, 0);
1794
}
1795
1796
static VALUE
1797
append_extmod(VALUE obj, VALUE extmod)
1798
{
1799
long i = RARRAY_LEN(extmod);
1800
while (i > 0) {
1801
VALUE m = RARRAY_AREF(extmod, --i);
1802
rb_extend_object(obj, m);
1803
}
1804
return obj;
1805
}
1806
1807
#define prohibit_ivar(type, str) do { \
1808
if (!ivp || !*ivp) break; \
1809
override_ivar_error(type, str); \
1810
} while (0)
1811
1812
static VALUE r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type);
1813
1814
static VALUE
1815
r_object0(struct load_arg *arg, bool partial, int *ivp, VALUE extmod)
1816
{
1817
int type = r_byte(arg);
1818
return r_object_for(arg, partial, ivp, extmod, type);
1819
}
1820
1821
static VALUE
1822
r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int type)
1823
{
1824
VALUE (*hash_new_with_size)(st_index_t) = rb_hash_new_with_size;
1825
VALUE v = Qnil;
1826
long id;
1827
st_data_t link;
1828
1829
switch (type) {
1830
case TYPE_LINK:
1831
id = r_long(arg);
1832
if (!st_lookup(arg->data, (st_data_t)id, &link)) {
1833
rb_raise(rb_eArgError, "dump format error (unlinked)");
1834
}
1835
v = (VALUE)link;
1836
if (!st_lookup(arg->partial_objects, (st_data_t)v, &link)) {
1837
v = r_post_proc(v, arg);
1838
}
1839
break;
1840
1841
case TYPE_IVAR:
1842
{
1843
int ivar = TRUE;
1844
v = r_object0(arg, true, &ivar, extmod);
1845
if (ivar) r_ivar(v, NULL, arg);
1846
v = r_leave(v, arg, partial);
1847
}
1848
break;
1849
1850
case TYPE_EXTENDED:
1851
{
1852
VALUE path = r_unique(arg);
1853
VALUE m = rb_path_to_class(path);
1854
if (NIL_P(extmod)) extmod = rb_ary_hidden_new(0);
1855
1856
if (RB_TYPE_P(m, T_CLASS)) { /* prepended */
1857
VALUE c;
1858
1859
v = r_object0(arg, true, 0, Qnil);
1860
c = CLASS_OF(v);
1861
if (c != m || FL_TEST(c, FL_SINGLETON)) {
1862
rb_raise(rb_eArgError,
1863
"prepended class %"PRIsVALUE" differs from class %"PRIsVALUE,
1864
path, rb_class_name(c));
1865
}
1866
c = rb_singleton_class(v);
1867
while (RARRAY_LEN(extmod) > 0) {
1868
m = rb_ary_pop(extmod);
1869
rb_prepend_module(c, m);
1870
}
1871
}
1872
else {
1873
must_be_module(m, path);
1874
rb_ary_push(extmod, m);
1875
1876
v = r_object0(arg, true, 0, extmod);
1877
while (RARRAY_LEN(extmod) > 0) {
1878
m = rb_ary_pop(extmod);
1879
rb_extend_object(v, m);
1880
}
1881
}
1882
v = r_leave(v, arg, partial);
1883
}
1884
break;
1885
1886
case TYPE_UCLASS:
1887
{
1888
VALUE c = path2class(r_unique(arg));
1889
1890
if (FL_TEST(c, FL_SINGLETON)) {
1891
rb_raise(rb_eTypeError, "singleton can't be loaded");
1892
}
1893
type = r_byte(arg);
1894
if ((c == rb_cHash) &&
1895
/* Hack for compare_by_identify */
1896
(type == TYPE_HASH || type == TYPE_HASH_DEF)) {
1897
hash_new_with_size = rb_ident_hash_new_with_size;
1898
goto type_hash;
1899
}
1900
v = r_object_for(arg, partial, 0, extmod, type);
1901
if (RB_SPECIAL_CONST_P(v) || RB_TYPE_P(v, T_OBJECT) || RB_TYPE_P(v, T_CLASS)) {
1902
goto format_error;
1903
}
1904
if (RB_TYPE_P(v, T_MODULE) || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
1905
VALUE tmp = rb_obj_alloc(c);
1906
1907
if (TYPE(v) != TYPE(tmp)) goto format_error;
1908
}
1909
RBASIC_SET_CLASS(v, c);
1910
}
1911
break;
1912
1913
format_error:
1914
rb_raise(rb_eArgError, "dump format error (user class)");
1915
1916
case TYPE_NIL:
1917
v = Qnil;
1918
v = r_leave(v, arg, false);
1919
break;
1920
1921
case TYPE_TRUE:
1922
v = Qtrue;
1923
v = r_leave(v, arg, false);
1924
break;
1925
1926
case TYPE_FALSE:
1927
v = Qfalse;
1928
v = r_leave(v, arg, false);
1929
break;
1930
1931
case TYPE_FIXNUM:
1932
{
1933
long i = r_long(arg);
1934
v = LONG2FIX(i);
1935
}
1936
v = r_leave(v, arg, false);
1937
break;
1938
1939
case TYPE_FLOAT:
1940
{
1941
double d;
1942
VALUE str = r_bytes(arg);
1943
const char *ptr = RSTRING_PTR(str);
1944
1945
if (strcmp(ptr, "nan") == 0) {
1946
d = nan("");
1947
}
1948
else if (strcmp(ptr, "inf") == 0) {
1949
d = HUGE_VAL;
1950
}
1951
else if (strcmp(ptr, "-inf") == 0) {
1952
d = -HUGE_VAL;
1953
}
1954
else {
1955
char *e;
1956
d = strtod(ptr, &e);
1957
d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
1958
}
1959
v = DBL2NUM(d);
1960
v = r_entry(v, arg);
1961
v = r_leave(v, arg, false);
1962
}
1963
break;
1964
1965
case TYPE_BIGNUM:
1966
{
1967
long len;
1968
VALUE data;
1969
int sign;
1970
1971
sign = r_byte(arg);
1972
len = r_long(arg);
1973
1974
if (SIZEOF_VALUE >= 8 && len <= 4) {
1975
// Representable within uintptr, likely FIXNUM
1976
VALUE num = 0;
1977
for (int i = 0; i < len; i++) {
1978
num |= (VALUE)r_byte(arg) << (i * 16);
1979
num |= (VALUE)r_byte(arg) << (i * 16 + 8);
1980
}
1981
#if SIZEOF_VALUE == SIZEOF_LONG
1982
v = ULONG2NUM(num);
1983
#else
1984
v = ULL2NUM(num);
1985
#endif
1986
if (sign == '-') {
1987
v = rb_int_uminus(v);
1988
}
1989
}
1990
else {
1991
data = r_bytes0(len * 2, arg);
1992
v = rb_integer_unpack(RSTRING_PTR(data), len, 2, 0,
1993
INTEGER_PACK_LITTLE_ENDIAN | (sign == '-' ? INTEGER_PACK_NEGATIVE : 0));
1994
rb_str_resize(data, 0L);
1995
}
1996
v = r_entry(v, arg);
1997
v = r_leave(v, arg, false);
1998
}
1999
break;
2000
2001
case TYPE_STRING:
2002
v = r_entry(r_string(arg), arg);
2003
v = r_leave(v, arg, partial);
2004
break;
2005
2006
case TYPE_REGEXP:
2007
{
2008
VALUE str = r_bytes(arg);
2009
int options = r_byte(arg);
2010
int has_encoding = FALSE;
2011
st_index_t idx = r_prepare(arg);
2012
2013
if (ivp) {
2014
r_ivar(str, &has_encoding, arg);
2015
*ivp = FALSE;
2016
}
2017
if (!has_encoding) {
2018
/* 1.8 compatibility; remove escapes undefined in 1.8 */
2019
char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
2020
long len = RSTRING_LEN(str);
2021
long bs = 0;
2022
for (; len-- > 0; *dst++ = *src++) {
2023
switch (*src) {
2024
case '\\': bs++; break;
2025
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2026
case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
2027
case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
2028
case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
2029
case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
2030
if (bs & 1) --dst;
2031
/* fall through */
2032
default: bs = 0; break;
2033
}
2034
}
2035
rb_str_set_len(str, dst - ptr);
2036
}
2037
VALUE regexp = rb_reg_new_str(str, options);
2038
r_copy_ivar(regexp, str);
2039
2040
v = r_entry0(regexp, idx, arg);
2041
v = r_leave(v, arg, partial);
2042
}
2043
break;
2044
2045
case TYPE_ARRAY:
2046
{
2047
long len = r_long(arg);
2048
2049
v = rb_ary_new2(len);
2050
v = r_entry(v, arg);
2051
arg->readable += len - 1;
2052
while (len--) {
2053
rb_ary_push(v, r_object(arg));
2054
arg->readable--;
2055
}
2056
v = r_leave(v, arg, partial);
2057
arg->readable++;
2058
}
2059
break;
2060
2061
case TYPE_HASH:
2062
case TYPE_HASH_DEF:
2063
type_hash:
2064
{
2065
long len = r_long(arg);
2066
2067
v = hash_new_with_size(len);
2068
v = r_entry(v, arg);
2069
arg->readable += (len - 1) * 2;
2070
while (len--) {
2071
VALUE key = r_object(arg);
2072
VALUE value = r_object(arg);
2073
rb_hash_aset(v, key, value);
2074
arg->readable -= 2;
2075
}
2076
arg->readable += 2;
2077
if (type == TYPE_HASH_DEF) {
2078
RHASH_SET_IFNONE(v, r_object(arg));
2079
}
2080
v = r_leave(v, arg, partial);
2081
}
2082
break;
2083
2084
case TYPE_STRUCT:
2085
{
2086
VALUE mem, values;
2087
long i;
2088
VALUE slot;
2089
st_index_t idx = r_prepare(arg);
2090
VALUE klass = path2class(r_unique(arg));
2091
long len = r_long(arg);
2092
2093
v = rb_obj_alloc(klass);
2094
if (!RB_TYPE_P(v, T_STRUCT)) {
2095
rb_raise(rb_eTypeError, "class %"PRIsVALUE" not a struct", rb_class_name(klass));
2096
}
2097
mem = rb_struct_s_members(klass);
2098
if (RARRAY_LEN(mem) != len) {
2099
rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (struct size differs)",
2100
rb_class_name(klass));
2101
}
2102
2103
arg->readable += (len - 1) * 2;
2104
v = r_entry0(v, idx, arg);
2105
values = rb_ary_new2(len);
2106
{
2107
VALUE keywords = Qfalse;
2108
if (RTEST(rb_struct_s_keyword_init(klass))) {
2109
keywords = rb_hash_new();
2110
rb_ary_push(values, keywords);
2111
}
2112
2113
for (i=0; i<len; i++) {
2114
VALUE n = rb_sym2str(RARRAY_AREF(mem, i));
2115
slot = r_symbol(arg);
2116
2117
if (!rb_str_equal(n, slot)) {
2118
rb_raise(rb_eTypeError, "struct %"PRIsVALUE" not compatible (:%"PRIsVALUE" for :%"PRIsVALUE")",
2119
rb_class_name(klass),
2120
slot, n);
2121
}
2122
if (keywords) {
2123
rb_hash_aset(keywords, RARRAY_AREF(mem, i), r_object(arg));
2124
}
2125
else {
2126
rb_ary_push(values, r_object(arg));
2127
}
2128
arg->readable -= 2;
2129
}
2130
}
2131
rb_struct_initialize(v, values);
2132
v = r_leave(v, arg, partial);
2133
arg->readable += 2;
2134
}
2135
break;
2136
2137
case TYPE_USERDEF:
2138
{
2139
VALUE name = r_unique(arg);
2140
VALUE klass = path2class(name);
2141
VALUE data;
2142
st_data_t d;
2143
2144
if (!rb_obj_respond_to(klass, s_load, TRUE)) {
2145
rb_raise(rb_eTypeError, "class %"PRIsVALUE" needs to have method '_load'",
2146
name);
2147
}
2148
data = r_string(arg);
2149
if (ivp) {
2150
r_ivar(data, NULL, arg);
2151
*ivp = FALSE;
2152
}
2153
v = load_funcall(arg, klass, s_load, 1, &data);
2154
v = r_entry(v, arg);
2155
if (st_lookup(compat_allocator_tbl, (st_data_t)rb_get_alloc_func(klass), &d)) {
2156
marshal_compat_t *compat = (marshal_compat_t*)d;
2157
v = compat->loader(klass, v);
2158
}
2159
if (!partial) {
2160
if (arg->freeze) {
2161
OBJ_FREEZE(v);
2162
}
2163
v = r_post_proc(v, arg);
2164
}
2165
}
2166
break;
2167
2168
case TYPE_USRMARSHAL:
2169
{
2170
VALUE name = r_unique(arg);
2171
VALUE klass = path2class(name);
2172
VALUE oldclass = 0;
2173
VALUE data;
2174
2175
v = obj_alloc_by_klass(klass, arg, &oldclass);
2176
if (!NIL_P(extmod)) {
2177
/* for the case marshal_load is overridden */
2178
append_extmod(v, extmod);
2179
}
2180
if (!rb_obj_respond_to(v, s_mload, TRUE)) {
2181
rb_raise(rb_eTypeError, "instance of %"PRIsVALUE" needs to have method 'marshal_load'",
2182
name);
2183
}
2184
v = r_entry(v, arg);
2185
data = r_object(arg);
2186
load_funcall(arg, v, s_mload, 1, &data);
2187
v = r_fixup_compat(v, arg);
2188
v = r_copy_ivar(v, data);
2189
if (arg->freeze) {
2190
OBJ_FREEZE(v);
2191
}
2192
v = r_post_proc(v, arg);
2193
if (!NIL_P(extmod)) {
2194
if (oldclass) append_extmod(v, extmod);
2195
rb_ary_clear(extmod);
2196
}
2197
}
2198
break;
2199
2200
case TYPE_OBJECT:
2201
{
2202
st_index_t idx = r_prepare(arg);
2203
v = obj_alloc_by_path(r_unique(arg), arg);
2204
if (!RB_TYPE_P(v, T_OBJECT)) {
2205
rb_raise(rb_eArgError, "dump format error");
2206
}
2207
v = r_entry0(v, idx, arg);
2208
r_ivar(v, NULL, arg);
2209
v = r_leave(v, arg, partial);
2210
}
2211
break;
2212
2213
case TYPE_DATA:
2214
{
2215
VALUE name = r_unique(arg);
2216
VALUE klass = path2class(name);
2217
VALUE oldclass = 0;
2218
VALUE r;
2219
2220
v = obj_alloc_by_klass(klass, arg, &oldclass);
2221
if (!RB_TYPE_P(v, T_DATA)) {
2222
rb_raise(rb_eArgError, "dump format error");
2223
}
2224
v = r_entry(v, arg);
2225
if (!rb_obj_respond_to(v, s_load_data, TRUE)) {
2226
rb_raise(rb_eTypeError,
2227
"class %"PRIsVALUE" needs to have instance method '_load_data'",
2228
name);
2229
}
2230
r = r_object0(arg, partial, 0, extmod);
2231
load_funcall(arg, v, s_load_data, 1, &r);
2232
v = r_leave(v, arg, partial);
2233
}
2234
break;
2235
2236
case TYPE_MODULE_OLD:
2237
{
2238
VALUE str = r_bytes(arg);
2239
2240
v = rb_path_to_class(str);
2241
prohibit_ivar("class/module", str);
2242
v = r_entry(v, arg);
2243
v = r_leave(v, arg, partial);
2244
}
2245
break;
2246
2247
case TYPE_CLASS:
2248
{
2249
VALUE str = r_bytes(arg);
2250
2251
v = path2class(str);
2252
prohibit_ivar("class", str);
2253
v = r_entry(v, arg);
2254
v = r_leave(v, arg, partial);
2255
}
2256
break;
2257
2258
case TYPE_MODULE:
2259
{
2260
VALUE str = r_bytes(arg);
2261
2262
v = path2module(str);
2263
prohibit_ivar("module", str);
2264
v = r_entry(v, arg);
2265
v = r_leave(v, arg, partial);
2266
}
2267
break;
2268
2269
case TYPE_SYMBOL:
2270
if (ivp) {
2271
v = r_symreal(arg, *ivp);
2272
*ivp = FALSE;
2273
}
2274
else {
2275
v = r_symreal(arg, 0);
2276
}
2277
v = rb_str_intern(v);
2278
v = r_leave(v, arg, partial);
2279
break;
2280
2281
case TYPE_SYMLINK:
2282
v = rb_str_intern(r_symlink(arg));
2283
break;
2284
2285
default:
2286
rb_raise(rb_eArgError, "dump format error(0x%x)", type);
2287
break;
2288
}
2289
2290
if (UNDEF_P(v)) {
2291
rb_raise(rb_eArgError, "dump format error (bad link)");
2292
}
2293
2294
return v;
2295
}
2296
2297
static VALUE
2298
r_object(struct load_arg *arg)
2299
{
2300
return r_object0(arg, false, 0, Qnil);
2301
}
2302
2303
static void
2304
clear_load_arg(struct load_arg *arg)
2305
{
2306
xfree(arg->buf);
2307
arg->buf = NULL;
2308
arg->buflen = 0;
2309
arg->offset = 0;
2310
arg->readable = 0;
2311
if (!arg->symbols) return;
2312
st_free_table(arg->symbols);
2313
arg->symbols = 0;
2314
st_free_table(arg->data);
2315
arg->data = 0;
2316
st_free_table(arg->partial_objects);
2317
arg->partial_objects = 0;
2318
if (arg->compat_tbl) {
2319
st_free_table(arg->compat_tbl);
2320
arg->compat_tbl = 0;
2321
}
2322
}
2323
2324
VALUE
2325
rb_marshal_load_with_proc(VALUE port, VALUE proc, bool freeze)
2326
{
2327
int major, minor;
2328
VALUE v;
2329
VALUE wrapper; /* used to avoid memory leak in case of exception */
2330
struct load_arg *arg;
2331
2332
v = rb_check_string_type(port);
2333
if (!NIL_P(v)) {
2334
port = v;
2335
}
2336
else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
2337
rb_check_funcall(port, s_binmode, 0, 0);
2338
}
2339
else {
2340
io_needed();
2341
}
2342
wrapper = TypedData_Make_Struct(0, struct load_arg, &load_arg_data, arg);
2343
arg->src = port;
2344
arg->offset = 0;
2345
arg->symbols = st_init_numtable();
2346
arg->data = rb_init_identtable();
2347
arg->partial_objects = rb_init_identtable();
2348
arg->compat_tbl = 0;
2349
arg->proc = 0;
2350
arg->readable = 0;
2351
arg->freeze = freeze;
2352
2353
if (NIL_P(v))
2354
arg->buf = xmalloc(BUFSIZ);
2355
else
2356
arg->buf = 0;
2357
2358
major = r_byte(arg);
2359
minor = r_byte(arg);
2360
if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
2361
clear_load_arg(arg);
2362
rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
2363
\tformat version %d.%d required; %d.%d given",
2364
MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2365
}
2366
if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
2367
rb_warn("incompatible marshal file format (can be read)\n\
2368
\tformat version %d.%d required; %d.%d given",
2369
MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
2370
}
2371
2372
if (!NIL_P(proc)) arg->proc = proc;
2373
v = r_object(arg);
2374
clear_load_arg(arg);
2375
RB_GC_GUARD(wrapper);
2376
2377
return v;
2378
}
2379
2380
static VALUE
2381
marshal_load(rb_execution_context_t *ec, VALUE mod, VALUE source, VALUE proc, VALUE freeze)
2382
{
2383
return rb_marshal_load_with_proc(source, proc, RTEST(freeze));
2384
}
2385
2386
#include "marshal.rbinc"
2387
2388
/*
2389
* The marshaling library converts collections of Ruby objects into a
2390
* byte stream, allowing them to be stored outside the currently
2391
* active script. This data may subsequently be read and the original
2392
* objects reconstituted.
2393
*
2394
* Marshaled data has major and minor version numbers stored along
2395
* with the object information. In normal use, marshaling can only
2396
* load data written with the same major version number and an equal
2397
* or lower minor version number. If Ruby's ``verbose'' flag is set
2398
* (normally using -d, -v, -w, or --verbose) the major and minor
2399
* numbers must match exactly. Marshal versioning is independent of
2400
* Ruby's version numbers. You can extract the version by reading the
2401
* first two bytes of marshaled data.
2402
*
2403
* str = Marshal.dump("thing")
2404
* RUBY_VERSION #=> "1.9.0"
2405
* str[0].ord #=> 4
2406
* str[1].ord #=> 8
2407
*
2408
* Some objects cannot be dumped: if the objects to be dumped include
2409
* bindings, procedure or method objects, instances of class IO, or
2410
* singleton objects, a TypeError will be raised.
2411
*
2412
* If your class has special serialization needs (for example, if you
2413
* want to serialize in some specific format), or if it contains
2414
* objects that would otherwise not be serializable, you can implement
2415
* your own serialization strategy.
2416
*
2417
* There are two methods of doing this, your object can define either
2418
* marshal_dump and marshal_load or _dump and _load. marshal_dump will take
2419
* precedence over _dump if both are defined. marshal_dump may result in
2420
* smaller Marshal strings.
2421
*
2422
* == Security considerations
2423
*
2424
* By design, Marshal.load can deserialize almost any class loaded into the
2425
* Ruby process. In many cases this can lead to remote code execution if the
2426
* Marshal data is loaded from an untrusted source.
2427
*
2428
* As a result, Marshal.load is not suitable as a general purpose serialization
2429
* format and you should never unmarshal user supplied input or other untrusted
2430
* data.
2431
*
2432
* If you need to deserialize untrusted data, use JSON or another serialization
2433
* format that is only able to load simple, 'primitive' types such as String,
2434
* Array, Hash, etc. Never allow user input to specify arbitrary types to
2435
* deserialize into.
2436
*
2437
* == marshal_dump and marshal_load
2438
*
2439
* When dumping an object the method marshal_dump will be called.
2440
* marshal_dump must return a result containing the information necessary for
2441
* marshal_load to reconstitute the object. The result can be any object.
2442
*
2443
* When loading an object dumped using marshal_dump the object is first
2444
* allocated then marshal_load is called with the result from marshal_dump.
2445
* marshal_load must recreate the object from the information in the result.
2446
*
2447
* Example:
2448
*
2449
* class MyObj
2450
* def initialize name, version, data
2451
* @name = name
2452
* @version = version
2453
* @data = data
2454
* end
2455
*
2456
* def marshal_dump
2457
* [@name, @version]
2458
* end
2459
*
2460
* def marshal_load array
2461
* @name, @version = array
2462
* end
2463
* end
2464
*
2465
* == _dump and _load
2466
*
2467
* Use _dump and _load when you need to allocate the object you're restoring
2468
* yourself.
2469
*
2470
* When dumping an object the instance method _dump is called with an Integer
2471
* which indicates the maximum depth of objects to dump (a value of -1 implies
2472
* that you should disable depth checking). _dump must return a String
2473
* containing the information necessary to reconstitute the object.
2474
*
2475
* The class method _load should take a String and use it to return an object
2476
* of the same class.
2477
*
2478
* Example:
2479
*
2480
* class MyObj
2481
* def initialize name, version, data
2482
* @name = name
2483
* @version = version
2484
* @data = data
2485
* end
2486
*
2487
* def _dump level
2488
* [@name, @version].join ':'
2489
* end
2490
*
2491
* def self._load args
2492
* new(*args.split(':'))
2493
* end
2494
* end
2495
*
2496
* Since Marshal.dump outputs a string you can have _dump return a Marshal
2497
* string which is Marshal.loaded in _load for complex objects.
2498
*/
2499
void
2500
Init_marshal(void)
2501
{
2502
VALUE rb_mMarshal = rb_define_module("Marshal");
2503
#define set_id(sym) sym = rb_intern_const(name_##sym)
2504
set_id(s_dump);
2505
set_id(s_load);
2506
set_id(s_mdump);
2507
set_id(s_mload);
2508
set_id(s_dump_data);
2509
set_id(s_load_data);
2510
set_id(s_alloc);
2511
set_id(s_call);
2512
set_id(s_getbyte);
2513
set_id(s_read);
2514
set_id(s_write);
2515
set_id(s_binmode);
2516
set_id(s_encoding_short);
2517
set_id(s_ruby2_keywords_flag);
2518
2519
rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
2520
2521
/* major version */
2522
rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
2523
/* minor version */
2524
rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
2525
}
2526
2527
static int
2528
marshal_compat_table_mark_i(st_data_t key, st_data_t value, st_data_t _)
2529
{
2530
marshal_compat_t *p = (marshal_compat_t *)value;
2531
rb_gc_mark_movable(p->newclass);
2532
rb_gc_mark_movable(p->oldclass);
2533
return ST_CONTINUE;
2534
}
2535
2536
static void
2537
marshal_compat_table_mark(void *tbl)
2538
{
2539
if (!tbl) return;
2540
st_foreach(tbl, marshal_compat_table_mark_i, 0);
2541
}
2542
2543
static int
2544
marshal_compat_table_free_i(st_data_t key, st_data_t value, st_data_t _)
2545
{
2546
xfree((marshal_compat_t *)value);
2547
return ST_CONTINUE;
2548
}
2549
2550
static void
2551
marshal_compat_table_free(void *data)
2552
{
2553
st_foreach(data, marshal_compat_table_free_i, 0);
2554
st_free_table(data);
2555
}
2556
2557
static size_t
2558
marshal_compat_table_memsize(const void *data)
2559
{
2560
return st_memsize(data) + sizeof(marshal_compat_t) * st_table_size(data);
2561
}
2562
2563
static int
2564
marshal_compat_table_compact_i(st_data_t key, st_data_t value, st_data_t _)
2565
{
2566
marshal_compat_t *p = (marshal_compat_t *)value;
2567
p->newclass = rb_gc_location(p->newclass);
2568
p->oldclass = rb_gc_location(p->oldclass);
2569
return ST_CONTINUE;
2570
}
2571
2572
static void
2573
marshal_compat_table_compact(void *tbl)
2574
{
2575
if (!tbl) return;
2576
st_foreach(tbl, marshal_compat_table_compact_i, 0);
2577
}
2578
2579
static const rb_data_type_t marshal_compat_type = {
2580
.wrap_struct_name = "marshal_compat_table",
2581
.function = {
2582
.dmark = marshal_compat_table_mark,
2583
.dfree = marshal_compat_table_free,
2584
.dsize = marshal_compat_table_memsize,
2585
.dcompact = marshal_compat_table_compact,
2586
},
2587
.flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY,
2588
};
2589
2590
static st_table *
2591
compat_allocator_table(void)
2592
{
2593
if (compat_allocator_tbl) return compat_allocator_tbl;
2594
compat_allocator_tbl = st_init_numtable();
2595
compat_allocator_tbl_wrapper =
2596
TypedData_Wrap_Struct(0, &marshal_compat_type, compat_allocator_tbl);
2597
rb_vm_register_global_object(compat_allocator_tbl_wrapper);
2598
return compat_allocator_tbl;
2599
}
2600
2601
VALUE
2602
rb_marshal_dump(VALUE obj, VALUE port)
2603
{
2604
return rb_marshal_dump_limited(obj, port, -1);
2605
}
2606
2607
VALUE
2608
rb_marshal_load(VALUE port)
2609
{
2610
return rb_marshal_load_with_proc(port, Qnil, false);
2611
}
io.h
len
int len
Length of the buffer.
Definition
io.h:8
util.h
ruby.h
Generated by
1.13.2