Ruby 3.4.5p51 (2025-07-16 revision 20cda200d3ce092571d0b5d342dadca69636cb0f)
st.c
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#include <assert.h>
107#elif defined RUBY_EXPORT
108#include "internal.h"
109#include "internal/bits.h"
110#include "internal/hash.h"
111#include "internal/sanitizers.h"
112#include "internal/st.h"
113#include "ruby_assert.h"
114#endif
115
116#include <stdio.h>
117#ifdef HAVE_STDLIB_H
118#include <stdlib.h>
119#endif
120#include <string.h>
121
122#ifdef __GNUC__
123#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
124#define EXPECT(expr, val) __builtin_expect(expr, val)
125#define ATTRIBUTE_UNUSED __attribute__((unused))
126#else
127#define PREFETCH(addr, write_p)
128#define EXPECT(expr, val) (expr)
129#define ATTRIBUTE_UNUSED
130#endif
131
132/* The type of hashes. */
133typedef st_index_t st_hash_t;
134
136 st_hash_t hash;
137 st_data_t key;
138 st_data_t record;
139};
140
141#define type_numhash st_hashtype_num
142static const struct st_hash_type st_hashtype_num = {
143 st_numcmp,
144 st_numhash,
145};
146
147static int st_strcmp(st_data_t, st_data_t);
148static st_index_t strhash(st_data_t);
149static const struct st_hash_type type_strhash = {
150 st_strcmp,
151 strhash,
152};
153
154static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
155static st_index_t strcasehash(st_data_t);
156static const struct st_hash_type type_strcasehash = {
157 st_locale_insensitive_strcasecmp_i,
158 strcasehash,
159};
160
161/* Value used to catch uninitialized entries/bins during debugging.
162 There is a possibility for a false alarm, but its probability is
163 extremely small. */
164#define ST_INIT_VAL 0xafafafafafafafaf
165#define ST_INIT_VAL_BYTE 0xafa
166
167#ifdef RUBY
168#undef malloc
169#undef realloc
170#undef calloc
171#undef free
172#define malloc ruby_xmalloc
173#define calloc ruby_xcalloc
174#define realloc ruby_xrealloc
175#define free ruby_xfree
176#endif
177
178#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
179#define PTR_EQUAL(tab, ptr, hash_val, key_) \
180 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
181
182/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
183 up to TRUE if the table is rebuilt during the comparison. */
184#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
185 do { \
186 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
187 res = PTR_EQUAL(tab, ptr, hash_val, key); \
188 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
189 } while (FALSE)
190
191/* Features of a table. */
193 /* Power of 2 used for number of allocated entries. */
194 unsigned char entry_power;
195 /* Power of 2 used for number of allocated bins. Depending on the
196 table size, the number of bins is 2-4 times more than the
197 number of entries. */
198 unsigned char bin_power;
199 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
200 unsigned char size_ind;
201 /* Bins are packed in words of type st_index_t. The following is
202 a size of bins counted by words. */
203 st_index_t bins_words;
204};
205
206/* Features of all possible size tables. */
207#if SIZEOF_ST_INDEX_T == 8
208#define MAX_POWER2 62
209static const struct st_features features[] = {
210 {0, 1, 0, 0x0},
211 {1, 2, 0, 0x1},
212 {2, 3, 0, 0x1},
213 {3, 4, 0, 0x2},
214 {4, 5, 0, 0x4},
215 {5, 6, 0, 0x8},
216 {6, 7, 0, 0x10},
217 {7, 8, 0, 0x20},
218 {8, 9, 1, 0x80},
219 {9, 10, 1, 0x100},
220 {10, 11, 1, 0x200},
221 {11, 12, 1, 0x400},
222 {12, 13, 1, 0x800},
223 {13, 14, 1, 0x1000},
224 {14, 15, 1, 0x2000},
225 {15, 16, 1, 0x4000},
226 {16, 17, 2, 0x10000},
227 {17, 18, 2, 0x20000},
228 {18, 19, 2, 0x40000},
229 {19, 20, 2, 0x80000},
230 {20, 21, 2, 0x100000},
231 {21, 22, 2, 0x200000},
232 {22, 23, 2, 0x400000},
233 {23, 24, 2, 0x800000},
234 {24, 25, 2, 0x1000000},
235 {25, 26, 2, 0x2000000},
236 {26, 27, 2, 0x4000000},
237 {27, 28, 2, 0x8000000},
238 {28, 29, 2, 0x10000000},
239 {29, 30, 2, 0x20000000},
240 {30, 31, 2, 0x40000000},
241 {31, 32, 2, 0x80000000},
242 {32, 33, 3, 0x200000000},
243 {33, 34, 3, 0x400000000},
244 {34, 35, 3, 0x800000000},
245 {35, 36, 3, 0x1000000000},
246 {36, 37, 3, 0x2000000000},
247 {37, 38, 3, 0x4000000000},
248 {38, 39, 3, 0x8000000000},
249 {39, 40, 3, 0x10000000000},
250 {40, 41, 3, 0x20000000000},
251 {41, 42, 3, 0x40000000000},
252 {42, 43, 3, 0x80000000000},
253 {43, 44, 3, 0x100000000000},
254 {44, 45, 3, 0x200000000000},
255 {45, 46, 3, 0x400000000000},
256 {46, 47, 3, 0x800000000000},
257 {47, 48, 3, 0x1000000000000},
258 {48, 49, 3, 0x2000000000000},
259 {49, 50, 3, 0x4000000000000},
260 {50, 51, 3, 0x8000000000000},
261 {51, 52, 3, 0x10000000000000},
262 {52, 53, 3, 0x20000000000000},
263 {53, 54, 3, 0x40000000000000},
264 {54, 55, 3, 0x80000000000000},
265 {55, 56, 3, 0x100000000000000},
266 {56, 57, 3, 0x200000000000000},
267 {57, 58, 3, 0x400000000000000},
268 {58, 59, 3, 0x800000000000000},
269 {59, 60, 3, 0x1000000000000000},
270 {60, 61, 3, 0x2000000000000000},
271 {61, 62, 3, 0x4000000000000000},
272 {62, 63, 3, 0x8000000000000000},
273};
274
275#else
276#define MAX_POWER2 30
277
278static const struct st_features features[] = {
279 {0, 1, 0, 0x1},
280 {1, 2, 0, 0x1},
281 {2, 3, 0, 0x2},
282 {3, 4, 0, 0x4},
283 {4, 5, 0, 0x8},
284 {5, 6, 0, 0x10},
285 {6, 7, 0, 0x20},
286 {7, 8, 0, 0x40},
287 {8, 9, 1, 0x100},
288 {9, 10, 1, 0x200},
289 {10, 11, 1, 0x400},
290 {11, 12, 1, 0x800},
291 {12, 13, 1, 0x1000},
292 {13, 14, 1, 0x2000},
293 {14, 15, 1, 0x4000},
294 {15, 16, 1, 0x8000},
295 {16, 17, 2, 0x20000},
296 {17, 18, 2, 0x40000},
297 {18, 19, 2, 0x80000},
298 {19, 20, 2, 0x100000},
299 {20, 21, 2, 0x200000},
300 {21, 22, 2, 0x400000},
301 {22, 23, 2, 0x800000},
302 {23, 24, 2, 0x1000000},
303 {24, 25, 2, 0x2000000},
304 {25, 26, 2, 0x4000000},
305 {26, 27, 2, 0x8000000},
306 {27, 28, 2, 0x10000000},
307 {28, 29, 2, 0x20000000},
308 {29, 30, 2, 0x40000000},
309 {30, 31, 2, 0x80000000},
310};
311
312#endif
313
314/* The reserved hash value and its substitution. */
315#define RESERVED_HASH_VAL (~(st_hash_t) 0)
316#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
317
318static inline st_hash_t
319normalize_hash_value(st_hash_t hash)
320{
321 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
322 another value. Such mapping should be extremely rare. */
323 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
324}
325
326/* Return hash value of KEY for table TAB. */
327static inline st_hash_t
328do_hash(st_data_t key, st_table *tab)
329{
330 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
331 return normalize_hash_value(hash);
332}
333
334/* Power of 2 defining the minimal number of allocated entries. */
335#define MINIMAL_POWER2 2
336
337#if MINIMAL_POWER2 < 2
338#error "MINIMAL_POWER2 should be >= 2"
339#endif
340
341/* If the power2 of the allocated `entries` is less than the following
342 value, don't allocate bins and use a linear search. */
343#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
344
345/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
346static int
347get_power2(st_index_t size)
348{
349 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
350 if (n <= MAX_POWER2)
351 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
352#ifdef RUBY
353 /* Ran out of the table entries */
354 rb_raise(rb_eRuntimeError, "st_table too big");
355#endif
356 /* should raise exception */
357 return -1;
358}
359
360/* Return value of N-th bin in array BINS of table with bins size
361 index S. */
362static inline st_index_t
363get_bin(st_index_t *bins, int s, st_index_t n)
364{
365 return (s == 0 ? ((unsigned char *) bins)[n]
366 : s == 1 ? ((unsigned short *) bins)[n]
367 : s == 2 ? ((unsigned int *) bins)[n]
368 : ((st_index_t *) bins)[n]);
369}
370
371/* Set up N-th bin in array BINS of table with bins size index S to
372 value V. */
373static inline void
374set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
375{
376 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
377 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
378 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
379 else ((st_index_t *) bins)[n] = v;
380}
381
382/* These macros define reserved values for empty table bin and table
383 bin which contains a deleted entry. We will never use such values
384 for an entry index in bins. */
385#define EMPTY_BIN 0
386#define DELETED_BIN 1
387/* Base of a real entry index in the bins. */
388#define ENTRY_BASE 2
389
390/* Mark I-th bin of table TAB as empty, in other words not
391 corresponding to any entry. */
392#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
393
394/* Values used for not found entry and bin with given
395 characteristics. */
396#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
397#define UNDEFINED_BIN_IND (~(st_index_t) 0)
398
399/* Entry and bin values returned when we found a table rebuild during
400 the search. */
401#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
402#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
403
404/* Mark I-th bin of table TAB as corresponding to a deleted table
405 entry. Update number of entries in the table and number of bins
406 corresponding to deleted entries. */
407#define MARK_BIN_DELETED(tab, i) \
408 do { \
409 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
410 } while (0)
411
412/* Macros to check that value B is used empty bins and bins
413 corresponding deleted entries. */
414#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
415#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
416#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
417
418/* Macros to check empty bins and bins corresponding to deleted
419 entries. Bins are given by their index I in table TAB. */
420#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
421#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
422#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
423
424/* Macros for marking and checking deleted entries given by their
425 pointer E_PTR. */
426#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
427#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
428
429/* Return bin size index of table TAB. */
430static inline unsigned int
431get_size_ind(const st_table *tab)
432{
433 return tab->size_ind;
434}
435
436/* Return the number of allocated bins of table TAB. */
437static inline st_index_t
438get_bins_num(const st_table *tab)
439{
440 return ((st_index_t) 1)<<tab->bin_power;
441}
442
443/* Return mask for a bin index in table TAB. */
444static inline st_index_t
445bins_mask(const st_table *tab)
446{
447 return get_bins_num(tab) - 1;
448}
449
450/* Return the index of table TAB bin corresponding to
451 HASH_VALUE. */
452static inline st_index_t
453hash_bin(st_hash_t hash_value, st_table *tab)
454{
455 return hash_value & bins_mask(tab);
456}
457
458/* Return the number of allocated entries of table TAB. */
459static inline st_index_t
460get_allocated_entries(const st_table *tab)
461{
462 return ((st_index_t) 1)<<tab->entry_power;
463}
464
465/* Return size of the allocated bins of table TAB. */
466static inline st_index_t
467bins_size(const st_table *tab)
468{
469 return features[tab->entry_power].bins_words * sizeof (st_index_t);
470}
471
472/* Mark all bins of table TAB as empty. */
473static void
474initialize_bins(st_table *tab)
475{
476 memset(tab->bins, 0, bins_size(tab));
477}
478
479/* Make table TAB empty. */
480static void
481make_tab_empty(st_table *tab)
482{
483 tab->num_entries = 0;
484 tab->entries_start = tab->entries_bound = 0;
485 if (tab->bins != NULL)
486 initialize_bins(tab);
487}
488
489#ifdef HASH_LOG
490#ifdef HAVE_UNISTD_H
491#include <unistd.h>
492#endif
493static struct {
494 int all, total, num, str, strcase;
495} collision;
496
497/* Flag switching off output of package statistics at the end of
498 program. */
499static int init_st = 0;
500
501/* Output overall number of table searches and collisions into a
502 temporary file. */
503static void
504stat_col(void)
505{
506 char fname[10+sizeof(long)*3];
507 FILE *f;
508 if (!collision.total) return;
509 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
510 if (f == NULL)
511 return;
512 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
513 ((double)collision.all / (collision.total)) * 100);
514 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
515 fclose(f);
516}
517#endif
518
519st_table *
520st_init_existing_table_with_size(st_table *tab, const struct st_hash_type *type, st_index_t size)
521{
522 int n;
523
524#ifdef HASH_LOG
525#if HASH_LOG+0 < 0
526 {
527 const char *e = getenv("ST_HASH_LOG");
528 if (!e || !*e) init_st = 1;
529 }
530#endif
531 if (init_st == 0) {
532 init_st = 1;
533 atexit(stat_col);
534 }
535#endif
536
537 n = get_power2(size);
538#ifndef RUBY
539 if (n < 0)
540 return NULL;
541#endif
542
543 tab->type = type;
544 tab->entry_power = n;
545 tab->bin_power = features[n].bin_power;
546 tab->size_ind = features[n].size_ind;
547 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
548 tab->bins = NULL;
549 else {
550 tab->bins = (st_index_t *) malloc(bins_size(tab));
551#ifndef RUBY
552 if (tab->bins == NULL) {
553 free(tab);
554 return NULL;
555 }
556#endif
557 }
558 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
559 * sizeof(st_table_entry));
560#ifndef RUBY
561 if (tab->entries == NULL) {
562 st_free_table(tab);
563 return NULL;
564 }
565#endif
566 make_tab_empty(tab);
567 tab->rebuilds_num = 0;
568 return tab;
569}
570
571/* Create and return table with TYPE which can hold at least SIZE
572 entries. The real number of entries which the table can hold is
573 the nearest power of two for SIZE. */
574st_table *
575st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
576{
577 st_table *tab = malloc(sizeof(st_table));
578#ifndef RUBY
579 if (tab == NULL)
580 return NULL;
581#endif
582
583#ifdef RUBY
584 st_init_existing_table_with_size(tab, type, size);
585#else
586 if (st_init_existing_table_with_size(tab, type, size) == NULL) {
587 free(tab);
588 return NULL;
589 }
590#endif
591
592 return tab;
593}
594
595size_t
596st_table_size(const struct st_table *tbl)
597{
598 return tbl->num_entries;
599}
600
601/* Create and return table with TYPE which can hold a minimal number
602 of entries (see comments for get_power2). */
603st_table *
604st_init_table(const struct st_hash_type *type)
605{
606 return st_init_table_with_size(type, 0);
607}
608
609/* Create and return table which can hold a minimal number of
610 numbers. */
611st_table *
612st_init_numtable(void)
613{
614 return st_init_table(&type_numhash);
615}
616
617/* Create and return table which can hold SIZE numbers. */
618st_table *
619st_init_numtable_with_size(st_index_t size)
620{
621 return st_init_table_with_size(&type_numhash, size);
622}
623
624/* Create and return table which can hold a minimal number of
625 strings. */
626st_table *
627st_init_strtable(void)
628{
629 return st_init_table(&type_strhash);
630}
631
632/* Create and return table which can hold SIZE strings. */
633st_table *
634st_init_strtable_with_size(st_index_t size)
635{
636 return st_init_table_with_size(&type_strhash, size);
637}
638
639/* Create and return table which can hold a minimal number of strings
640 whose character case is ignored. */
641st_table *
642st_init_strcasetable(void)
643{
644 return st_init_table(&type_strcasehash);
645}
646
647/* Create and return table which can hold SIZE strings whose character
648 case is ignored. */
649st_table *
650st_init_strcasetable_with_size(st_index_t size)
651{
652 return st_init_table_with_size(&type_strcasehash, size);
653}
654
655/* Make table TAB empty. */
656void
657st_clear(st_table *tab)
658{
659 make_tab_empty(tab);
660 tab->rebuilds_num++;
661}
662
663/* Free table TAB space. */
664void
665st_free_table(st_table *tab)
666{
667 free(tab->bins);
668 free(tab->entries);
669 free(tab);
670}
671
672/* Return byte size of memory allocated for table TAB. */
673size_t
674st_memsize(const st_table *tab)
675{
676 return(sizeof(st_table)
677 + (tab->bins == NULL ? 0 : bins_size(tab))
678 + get_allocated_entries(tab) * sizeof(st_table_entry));
679}
680
681static st_index_t
682find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
683
684static st_index_t
685find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
686
687static st_index_t
688find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
689
690static st_index_t
691find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
692 st_data_t key, st_index_t *bin_ind);
693
694#ifdef HASH_LOG
695static void
696count_collision(const struct st_hash_type *type)
697{
698 collision.all++;
699 if (type == &type_numhash) {
700 collision.num++;
701 }
702 else if (type == &type_strhash) {
703 collision.strcase++;
704 }
705 else if (type == &type_strcasehash) {
706 collision.str++;
707 }
708}
709
710#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
711#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
712#define collision_check 0
713#else
714#define COLLISION
715#define FOUND_BIN
716#endif
717
718/* If the number of entries in the table is at least REBUILD_THRESHOLD
719 times less than the entry array length, decrease the table
720 size. */
721#define REBUILD_THRESHOLD 4
722
723#if REBUILD_THRESHOLD < 2
724#error "REBUILD_THRESHOLD should be >= 2"
725#endif
726
727static void rebuild_table_with(st_table *const new_tab, st_table *const tab);
728static void rebuild_move_table(st_table *const new_tab, st_table *const tab);
729static void rebuild_cleanup(st_table *const tab);
730
731/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
732 and can change size of the table entries and bins arrays.
733 Rebuilding is implemented by creation of a new table or by
734 compaction of the existing one. */
735static void
736rebuild_table(st_table *tab)
737{
738 if ((2 * tab->num_entries <= get_allocated_entries(tab)
739 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
740 || tab->num_entries < (1 << MINIMAL_POWER2)) {
741 /* Compaction: */
742 tab->num_entries = 0;
743 if (tab->bins != NULL)
744 initialize_bins(tab);
745 rebuild_table_with(tab, tab);
746 }
747 else {
748 st_table *new_tab;
749 /* This allocation could trigger GC and compaction. If tab is the
750 * gen_iv_tbl, then tab could have changed in size due to objects being
751 * freed and/or moved. Do not store attributes of tab before this line. */
752 new_tab = st_init_table_with_size(tab->type,
753 2 * tab->num_entries - 1);
754 rebuild_table_with(new_tab, tab);
755 rebuild_move_table(new_tab, tab);
756 }
757 rebuild_cleanup(tab);
758}
759
760static void
761rebuild_table_with(st_table *const new_tab, st_table *const tab)
762{
763 st_index_t i, ni;
764 unsigned int size_ind;
765 st_table_entry *new_entries;
766 st_table_entry *curr_entry_ptr;
767 st_index_t *bins;
768 st_index_t bin_ind;
769
770 new_entries = new_tab->entries;
771
772 ni = 0;
773 bins = new_tab->bins;
774 size_ind = get_size_ind(new_tab);
775 st_index_t bound = tab->entries_bound;
776 st_table_entry *entries = tab->entries;
777
778 for (i = tab->entries_start; i < bound; i++) {
779 curr_entry_ptr = &entries[i];
780 PREFETCH(entries + i + 1, 0);
781 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
782 continue;
783 if (&new_entries[ni] != curr_entry_ptr)
784 new_entries[ni] = *curr_entry_ptr;
785 if (EXPECT(bins != NULL, 1)) {
786 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
787 curr_entry_ptr->key);
788 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
789 }
790 new_tab->num_entries++;
791 ni++;
792 }
793
794 assert(new_tab->num_entries == tab->num_entries);
795}
796
797static void
798rebuild_move_table(st_table *const new_tab, st_table *const tab)
799{
800 tab->entry_power = new_tab->entry_power;
801 tab->bin_power = new_tab->bin_power;
802 tab->size_ind = new_tab->size_ind;
803 free(tab->bins);
804 tab->bins = new_tab->bins;
805 free(tab->entries);
806 tab->entries = new_tab->entries;
807 free(new_tab);
808}
809
810static void
811rebuild_cleanup(st_table *const tab)
812{
813 tab->entries_start = 0;
814 tab->entries_bound = tab->num_entries;
815 tab->rebuilds_num++;
816}
817
818/* Return the next secondary hash index for table TAB using previous
819 index IND and PERTURB. Finally modulo of the function becomes a
820 full *cycle linear congruential generator*, in other words it
821 guarantees traversing all table bins in extreme case.
822
823 According the Hull-Dobell theorem a generator
824 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
825 o m and c are relatively prime
826 o a-1 is divisible by all prime factors of m
827 o a-1 is divisible by 4 if m is divisible by 4.
828
829 For our case a is 5, c is 1, and m is a power of two. */
830static inline st_index_t
831secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb)
832{
833 *perturb >>= 11;
834 ind = (ind << 2) + ind + *perturb + 1;
835 return hash_bin(ind, tab);
836}
837
838/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
839 search. Return the index of the found entry in array `entries`.
840 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
841 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
842static inline st_index_t
843find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
844{
845 int eq_p, rebuilt_p;
846 st_index_t i, bound;
847 st_table_entry *entries;
848
849 bound = tab->entries_bound;
850 entries = tab->entries;
851 for (i = tab->entries_start; i < bound; i++) {
852 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
853 if (EXPECT(rebuilt_p, 0))
854 return REBUILT_TABLE_ENTRY_IND;
855 if (eq_p)
856 return i;
857 }
858 return UNDEFINED_ENTRY_IND;
859}
860
861/* Use the quadratic probing. The method has a better data locality
862 but more collisions than the current approach. In average it
863 results in a bit slower search. */
864/*#define QUADRATIC_PROBE*/
865
866/* Return index of entry with HASH_VALUE and KEY in table TAB. If
867 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
868 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
869static st_index_t
870find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
871{
872 int eq_p, rebuilt_p;
873 st_index_t ind;
874#ifdef QUADRATIC_PROBE
875 st_index_t d;
876#else
877 st_index_t perturb;
878#endif
879 st_index_t bin;
880 st_table_entry *entries = tab->entries;
881
882 ind = hash_bin(hash_value, tab);
883#ifdef QUADRATIC_PROBE
884 d = 1;
885#else
886 perturb = hash_value;
887#endif
888 FOUND_BIN;
889 for (;;) {
890 bin = get_bin(tab->bins, get_size_ind(tab), ind);
891 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
892 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
893 if (EXPECT(rebuilt_p, 0))
894 return REBUILT_TABLE_ENTRY_IND;
895 if (eq_p)
896 break;
897 }
898 else if (EMPTY_BIN_P(bin))
899 return UNDEFINED_ENTRY_IND;
900#ifdef QUADRATIC_PROBE
901 ind = hash_bin(ind + d, tab);
902 d++;
903#else
904 ind = secondary_hash(ind, tab, &perturb);
905#endif
906 COLLISION;
907 }
908 return bin;
909}
910
911/* Find and return index of table TAB bin corresponding to an entry
912 with HASH_VALUE and KEY. If there is no such bin, return
913 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
914 return REBUILT_TABLE_BIN_IND. */
915static st_index_t
916find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
917{
918 int eq_p, rebuilt_p;
919 st_index_t ind;
920#ifdef QUADRATIC_PROBE
921 st_index_t d;
922#else
923 st_index_t perturb;
924#endif
925 st_index_t bin;
926 st_table_entry *entries = tab->entries;
927
928 ind = hash_bin(hash_value, tab);
929#ifdef QUADRATIC_PROBE
930 d = 1;
931#else
932 perturb = hash_value;
933#endif
934 FOUND_BIN;
935 for (;;) {
936 bin = get_bin(tab->bins, get_size_ind(tab), ind);
937 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
938 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
939 if (EXPECT(rebuilt_p, 0))
940 return REBUILT_TABLE_BIN_IND;
941 if (eq_p)
942 break;
943 }
944 else if (EMPTY_BIN_P(bin))
945 return UNDEFINED_BIN_IND;
946#ifdef QUADRATIC_PROBE
947 ind = hash_bin(ind + d, tab);
948 d++;
949#else
950 ind = secondary_hash(ind, tab, &perturb);
951#endif
952 COLLISION;
953 }
954 return ind;
955}
956
957/* Find and return index of table TAB bin corresponding to an entry
958 with HASH_VALUE and KEY. The entry should be in the table
959 already. */
960static st_index_t
961find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
962{
963 st_index_t ind;
964#ifdef QUADRATIC_PROBE
965 st_index_t d;
966#else
967 st_index_t perturb;
968#endif
969 st_index_t bin;
970
971 ind = hash_bin(hash_value, tab);
972#ifdef QUADRATIC_PROBE
973 d = 1;
974#else
975 perturb = hash_value;
976#endif
977 FOUND_BIN;
978 for (;;) {
979 bin = get_bin(tab->bins, get_size_ind(tab), ind);
980 if (EMPTY_OR_DELETED_BIN_P(bin))
981 return ind;
982#ifdef QUADRATIC_PROBE
983 ind = hash_bin(ind + d, tab);
984 d++;
985#else
986 ind = secondary_hash(ind, tab, &perturb);
987#endif
988 COLLISION;
989 }
990}
991
992/* Return index of table TAB bin for HASH_VALUE and KEY through
993 BIN_IND and the pointed value as the function result. Reserve the
994 bin for inclusion of the corresponding entry into the table if it
995 is not there yet. We always find such bin as bins array length is
996 bigger entries array. Although we can reuse a deleted bin, the
997 result bin value is always empty if the table has no entry with
998 KEY. Return the entries array index of the found entry or
999 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
1000 during the search, return REBUILT_TABLE_ENTRY_IND. */
1001static st_index_t
1002find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
1003 st_data_t key, st_index_t *bin_ind)
1004{
1005 int eq_p, rebuilt_p;
1006 st_index_t ind;
1007 st_hash_t curr_hash_value = *hash_value;
1008#ifdef QUADRATIC_PROBE
1009 st_index_t d;
1010#else
1011 st_index_t perturb;
1012#endif
1013 st_index_t entry_index;
1014 st_index_t first_deleted_bin_ind;
1015 st_table_entry *entries;
1016
1017 ind = hash_bin(curr_hash_value, tab);
1018#ifdef QUADRATIC_PROBE
1019 d = 1;
1020#else
1021 perturb = curr_hash_value;
1022#endif
1023 FOUND_BIN;
1024 first_deleted_bin_ind = UNDEFINED_BIN_IND;
1025 entries = tab->entries;
1026 for (;;) {
1027 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
1028 if (EMPTY_BIN_P(entry_index)) {
1029 tab->num_entries++;
1030 entry_index = UNDEFINED_ENTRY_IND;
1031 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
1032 /* We can reuse bin of a deleted entry. */
1033 ind = first_deleted_bin_ind;
1034 MARK_BIN_EMPTY(tab, ind);
1035 }
1036 break;
1037 }
1038 else if (! DELETED_BIN_P(entry_index)) {
1039 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1040 if (EXPECT(rebuilt_p, 0))
1041 return REBUILT_TABLE_ENTRY_IND;
1042 if (eq_p)
1043 break;
1044 }
1045 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1046 first_deleted_bin_ind = ind;
1047#ifdef QUADRATIC_PROBE
1048 ind = hash_bin(ind + d, tab);
1049 d++;
1050#else
1051 ind = secondary_hash(ind, tab, &perturb);
1052#endif
1053 COLLISION;
1054 }
1055 *bin_ind = ind;
1056 return entry_index;
1057}
1058
1059/* Find an entry with KEY in table TAB. Return non-zero if we found
1060 it. Set up *RECORD to the found entry record. */
1061int
1062st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1063{
1064 st_index_t bin;
1065 st_hash_t hash = do_hash(key, tab);
1066
1067 retry:
1068 if (tab->bins == NULL) {
1069 bin = find_entry(tab, hash, key);
1070 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1071 goto retry;
1072 if (bin == UNDEFINED_ENTRY_IND)
1073 return 0;
1074 }
1075 else {
1076 bin = find_table_entry_ind(tab, hash, key);
1077 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1078 goto retry;
1079 if (bin == UNDEFINED_ENTRY_IND)
1080 return 0;
1081 bin -= ENTRY_BASE;
1082 }
1083 if (value != 0)
1084 *value = tab->entries[bin].record;
1085 return 1;
1086}
1087
1088/* Find an entry with KEY in table TAB. Return non-zero if we found
1089 it. Set up *RESULT to the found table entry key. */
1090int
1091st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1092{
1093 st_index_t bin;
1094 st_hash_t hash = do_hash(key, tab);
1095
1096 retry:
1097 if (tab->bins == NULL) {
1098 bin = find_entry(tab, hash, key);
1099 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1100 goto retry;
1101 if (bin == UNDEFINED_ENTRY_IND)
1102 return 0;
1103 }
1104 else {
1105 bin = find_table_entry_ind(tab, hash, key);
1106 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1107 goto retry;
1108 if (bin == UNDEFINED_ENTRY_IND)
1109 return 0;
1110 bin -= ENTRY_BASE;
1111 }
1112 if (result != 0)
1113 *result = tab->entries[bin].key;
1114 return 1;
1115}
1116
1117/* Check the table and rebuild it if it is necessary. */
1118static inline void
1119rebuild_table_if_necessary (st_table *tab)
1120{
1121 st_index_t bound = tab->entries_bound;
1122
1123 if (bound == get_allocated_entries(tab))
1124 rebuild_table(tab);
1125}
1126
1127/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1128 already entry with KEY in the table, return nonzero and update
1129 the value of the found entry. */
1130int
1131st_insert(st_table *tab, st_data_t key, st_data_t value)
1132{
1133 st_table_entry *entry;
1134 st_index_t bin;
1135 st_index_t ind;
1136 st_hash_t hash_value;
1137 st_index_t bin_ind;
1138 int new_p;
1139
1140 hash_value = do_hash(key, tab);
1141 retry:
1142 rebuild_table_if_necessary(tab);
1143 if (tab->bins == NULL) {
1144 bin = find_entry(tab, hash_value, key);
1145 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1146 goto retry;
1147 new_p = bin == UNDEFINED_ENTRY_IND;
1148 if (new_p)
1149 tab->num_entries++;
1150 bin_ind = UNDEFINED_BIN_IND;
1151 }
1152 else {
1153 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1154 key, &bin_ind);
1155 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1156 goto retry;
1157 new_p = bin == UNDEFINED_ENTRY_IND;
1158 bin -= ENTRY_BASE;
1159 }
1160 if (new_p) {
1161 ind = tab->entries_bound++;
1162 entry = &tab->entries[ind];
1163 entry->hash = hash_value;
1164 entry->key = key;
1165 entry->record = value;
1166 if (bin_ind != UNDEFINED_BIN_IND)
1167 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1168 return 0;
1169 }
1170 tab->entries[bin].record = value;
1171 return 1;
1172}
1173
1174/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1175 entry with KEY before the insertion. */
1176static inline void
1177st_add_direct_with_hash(st_table *tab,
1178 st_data_t key, st_data_t value, st_hash_t hash)
1179{
1180 st_table_entry *entry;
1181 st_index_t ind;
1182 st_index_t bin_ind;
1183
1184 assert(hash != RESERVED_HASH_VAL);
1185
1186 rebuild_table_if_necessary(tab);
1187 ind = tab->entries_bound++;
1188 entry = &tab->entries[ind];
1189 entry->hash = hash;
1190 entry->key = key;
1191 entry->record = value;
1192 tab->num_entries++;
1193 if (tab->bins != NULL) {
1194 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1195 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1196 }
1197}
1198
1199void
1200rb_st_add_direct_with_hash(st_table *tab,
1201 st_data_t key, st_data_t value, st_hash_t hash)
1202{
1203 st_add_direct_with_hash(tab, key, value, normalize_hash_value(hash));
1204}
1205
1206/* Insert (KEY, VALUE) into table TAB. The table should not have
1207 entry with KEY before the insertion. */
1208void
1209st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1210{
1211 st_hash_t hash_value;
1212
1213 hash_value = do_hash(key, tab);
1214 st_add_direct_with_hash(tab, key, value, hash_value);
1215}
1216
1217/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1218 there is already entry with KEY in the table, return nonzero and
1219 update the value of the found entry. */
1220int
1221st_insert2(st_table *tab, st_data_t key, st_data_t value,
1222 st_data_t (*func)(st_data_t))
1223{
1224 st_table_entry *entry;
1225 st_index_t bin;
1226 st_index_t ind;
1227 st_hash_t hash_value;
1228 st_index_t bin_ind;
1229 int new_p;
1230
1231 hash_value = do_hash(key, tab);
1232 retry:
1233 rebuild_table_if_necessary (tab);
1234 if (tab->bins == NULL) {
1235 bin = find_entry(tab, hash_value, key);
1236 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1237 goto retry;
1238 new_p = bin == UNDEFINED_ENTRY_IND;
1239 if (new_p)
1240 tab->num_entries++;
1241 bin_ind = UNDEFINED_BIN_IND;
1242 }
1243 else {
1244 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1245 key, &bin_ind);
1246 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1247 goto retry;
1248 new_p = bin == UNDEFINED_ENTRY_IND;
1249 bin -= ENTRY_BASE;
1250 }
1251 if (new_p) {
1252 key = (*func)(key);
1253 ind = tab->entries_bound++;
1254 entry = &tab->entries[ind];
1255 entry->hash = hash_value;
1256 entry->key = key;
1257 entry->record = value;
1258 if (bin_ind != UNDEFINED_BIN_IND)
1259 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1260 return 0;
1261 }
1262 tab->entries[bin].record = value;
1263 return 1;
1264}
1265
1266/* Create a copy of old_tab into new_tab. */
1267st_table *
1268st_replace(st_table *new_tab, st_table *old_tab)
1269{
1270 *new_tab = *old_tab;
1271 if (old_tab->bins == NULL)
1272 new_tab->bins = NULL;
1273 else {
1274 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1275#ifndef RUBY
1276 if (new_tab->bins == NULL) {
1277 return NULL;
1278 }
1279#endif
1280 }
1281 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1282 * sizeof(st_table_entry));
1283#ifndef RUBY
1284 if (new_tab->entries == NULL) {
1285 return NULL;
1286 }
1287#endif
1288 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1289 get_allocated_entries(old_tab));
1290 if (old_tab->bins != NULL)
1291 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1292
1293 return new_tab;
1294}
1295
1296/* Create and return a copy of table OLD_TAB. */
1297st_table *
1298st_copy(st_table *old_tab)
1299{
1300 st_table *new_tab;
1301
1302 new_tab = (st_table *) malloc(sizeof(st_table));
1303#ifndef RUBY
1304 if (new_tab == NULL)
1305 return NULL;
1306#endif
1307
1308 if (st_replace(new_tab, old_tab) == NULL) {
1309 st_free_table(new_tab);
1310 return NULL;
1311 }
1312
1313 return new_tab;
1314}
1315
1316/* Update the entries start of table TAB after removing an entry
1317 with index N in the array entries. */
1318static inline void
1319update_range_for_deleted(st_table *tab, st_index_t n)
1320{
1321 /* Do not update entries_bound here. Otherwise, we can fill all
1322 bins by deleted entry value before rebuilding the table. */
1323 if (tab->entries_start == n) {
1324 st_index_t start = n + 1;
1325 st_index_t bound = tab->entries_bound;
1326 st_table_entry *entries = tab->entries;
1327 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1328 tab->entries_start = start;
1329 }
1330}
1331
1332/* Delete entry with KEY from table TAB, set up *VALUE (unless
1333 VALUE is zero) from deleted table entry, and return non-zero. If
1334 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1335 is zero), and return zero. */
1336static int
1337st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1338{
1339 st_table_entry *entry;
1340 st_index_t bin;
1341 st_index_t bin_ind;
1342 st_hash_t hash;
1343
1344 hash = do_hash(*key, tab);
1345 retry:
1346 if (tab->bins == NULL) {
1347 bin = find_entry(tab, hash, *key);
1348 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1349 goto retry;
1350 if (bin == UNDEFINED_ENTRY_IND) {
1351 if (value != 0) *value = 0;
1352 return 0;
1353 }
1354 }
1355 else {
1356 bin_ind = find_table_bin_ind(tab, hash, *key);
1357 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1358 goto retry;
1359 if (bin_ind == UNDEFINED_BIN_IND) {
1360 if (value != 0) *value = 0;
1361 return 0;
1362 }
1363 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1364 MARK_BIN_DELETED(tab, bin_ind);
1365 }
1366 entry = &tab->entries[bin];
1367 *key = entry->key;
1368 if (value != 0) *value = entry->record;
1369 MARK_ENTRY_DELETED(entry);
1370 tab->num_entries--;
1371 update_range_for_deleted(tab, bin);
1372 return 1;
1373}
1374
1375int
1376st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1377{
1378 return st_general_delete(tab, key, value);
1379}
1380
1381/* The function and other functions with suffix '_safe' or '_check'
1382 are originated from the previous implementation of the hash tables.
1383 It was necessary for correct deleting entries during traversing
1384 tables. The current implementation permits deletion during
1385 traversing without a specific way to do this. */
1386int
1387st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1388 st_data_t never ATTRIBUTE_UNUSED)
1389{
1390 return st_general_delete(tab, key, value);
1391}
1392
1393/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1394 return zero. Otherwise, remove the first entry in the table.
1395 Return its key through KEY and its record through VALUE (unless
1396 VALUE is zero). */
1397int
1398st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1399{
1400 st_index_t i, bound;
1401 st_index_t bin;
1402 st_table_entry *entries, *curr_entry_ptr;
1403 st_index_t bin_ind;
1404
1405 entries = tab->entries;
1406 bound = tab->entries_bound;
1407 for (i = tab->entries_start; i < bound; i++) {
1408 curr_entry_ptr = &entries[i];
1409 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1410 st_hash_t entry_hash = curr_entry_ptr->hash;
1411 st_data_t entry_key = curr_entry_ptr->key;
1412
1413 if (value != 0) *value = curr_entry_ptr->record;
1414 *key = entry_key;
1415 retry:
1416 if (tab->bins == NULL) {
1417 bin = find_entry(tab, entry_hash, entry_key);
1418 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1419 entries = tab->entries;
1420 goto retry;
1421 }
1422 curr_entry_ptr = &entries[bin];
1423 }
1424 else {
1425 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1426 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1427 entries = tab->entries;
1428 goto retry;
1429 }
1430 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1431 - ENTRY_BASE];
1432 MARK_BIN_DELETED(tab, bin_ind);
1433 }
1434 MARK_ENTRY_DELETED(curr_entry_ptr);
1435 tab->num_entries--;
1436 update_range_for_deleted(tab, i);
1437 return 1;
1438 }
1439 }
1440 if (value != 0) *value = 0;
1441 return 0;
1442}
1443
1444/* See comments for function st_delete_safe. */
1445void
1446st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1447 st_data_t never ATTRIBUTE_UNUSED)
1448{
1449}
1450
1451/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1452 of the key and the value of the found entry, and non-zero as the
1453 3rd argument. If the entry is not found, call FUNC with a pointer
1454 to KEY, a pointer to zero, and a zero argument. If the call
1455 returns ST_CONTINUE, the table will have an entry with key and
1456 value returned by FUNC through the 1st and 2nd parameters. If the
1457 call of FUNC returns ST_DELETE, the table will not have entry with
1458 KEY. The function returns flag of that the entry with KEY was in
1459 the table before the call. */
1460int
1461st_update(st_table *tab, st_data_t key,
1462 st_update_callback_func *func, st_data_t arg)
1463{
1464 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1465 st_index_t bin = 0; /* Ditto */
1466 st_table_entry *entries;
1467 st_index_t bin_ind;
1468 st_data_t value = 0, old_key;
1469 int retval, existing;
1470 st_hash_t hash = do_hash(key, tab);
1471
1472 retry:
1473 entries = tab->entries;
1474 if (tab->bins == NULL) {
1475 bin = find_entry(tab, hash, key);
1476 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1477 goto retry;
1478 existing = bin != UNDEFINED_ENTRY_IND;
1479 entry = &entries[bin];
1480 bin_ind = UNDEFINED_BIN_IND;
1481 }
1482 else {
1483 bin_ind = find_table_bin_ind(tab, hash, key);
1484 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1485 goto retry;
1486 existing = bin_ind != UNDEFINED_BIN_IND;
1487 if (existing) {
1488 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1489 entry = &entries[bin];
1490 }
1491 }
1492 if (existing) {
1493 key = entry->key;
1494 value = entry->record;
1495 }
1496 old_key = key;
1497
1498 unsigned int rebuilds_num = tab->rebuilds_num;
1499
1500 retval = (*func)(&key, &value, arg, existing);
1501
1502 // We need to make sure that the callback didn't cause a table rebuild
1503 // Ideally we would make sure no operations happened
1504 assert(rebuilds_num == tab->rebuilds_num);
1505 (void)rebuilds_num;
1506
1507 switch (retval) {
1508 case ST_CONTINUE:
1509 if (! existing) {
1510 st_add_direct_with_hash(tab, key, value, hash);
1511 break;
1512 }
1513 if (old_key != key) {
1514 entry->key = key;
1515 }
1516 entry->record = value;
1517 break;
1518 case ST_DELETE:
1519 if (existing) {
1520 if (bin_ind != UNDEFINED_BIN_IND)
1521 MARK_BIN_DELETED(tab, bin_ind);
1522 MARK_ENTRY_DELETED(entry);
1523 tab->num_entries--;
1524 update_range_for_deleted(tab, bin);
1525 }
1526 break;
1527 }
1528 return existing;
1529}
1530
1531/* Traverse all entries in table TAB calling FUNC with current entry
1532 key and value and zero. If the call returns ST_STOP, stop
1533 traversing. If the call returns ST_DELETE, delete the current
1534 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1535 traversing. The function returns zero unless an error is found.
1536 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1537 different for ST_CHECK and when the current element is removed
1538 during traversing. */
1539static inline int
1540st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1541 int check_p)
1542{
1543 st_index_t bin;
1544 st_index_t bin_ind;
1545 st_table_entry *entries, *curr_entry_ptr;
1546 enum st_retval retval;
1547 st_index_t i, rebuilds_num;
1548 st_hash_t hash;
1549 st_data_t key;
1550 int error_p, packed_p = tab->bins == NULL;
1551
1552 entries = tab->entries;
1553 /* The bound can change inside the loop even without rebuilding
1554 the table, e.g. by an entry insertion. */
1555 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1556 curr_entry_ptr = &entries[i];
1557 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1558 continue;
1559 key = curr_entry_ptr->key;
1560 rebuilds_num = tab->rebuilds_num;
1561 hash = curr_entry_ptr->hash;
1562 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1563
1564 if (retval == ST_REPLACE && replace) {
1565 st_data_t value;
1566 value = curr_entry_ptr->record;
1567 retval = (*replace)(&key, &value, arg, TRUE);
1568 curr_entry_ptr->key = key;
1569 curr_entry_ptr->record = value;
1570 }
1571
1572 if (rebuilds_num != tab->rebuilds_num) {
1573 retry:
1574 entries = tab->entries;
1575 packed_p = tab->bins == NULL;
1576 if (packed_p) {
1577 i = find_entry(tab, hash, key);
1578 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1579 goto retry;
1580 error_p = i == UNDEFINED_ENTRY_IND;
1581 }
1582 else {
1583 i = find_table_entry_ind(tab, hash, key);
1584 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1585 goto retry;
1586 error_p = i == UNDEFINED_ENTRY_IND;
1587 i -= ENTRY_BASE;
1588 }
1589 if (error_p && check_p) {
1590 /* call func with error notice */
1591 retval = (*func)(0, 0, arg, 1);
1592 return 1;
1593 }
1594 curr_entry_ptr = &entries[i];
1595 }
1596 switch (retval) {
1597 case ST_REPLACE:
1598 break;
1599 case ST_CONTINUE:
1600 break;
1601 case ST_CHECK:
1602 if (check_p)
1603 break;
1604 case ST_STOP:
1605 return 0;
1606 case ST_DELETE: {
1607 st_data_t key = curr_entry_ptr->key;
1608
1609 again:
1610 if (packed_p) {
1611 bin = find_entry(tab, hash, key);
1612 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1613 goto again;
1614 if (bin == UNDEFINED_ENTRY_IND)
1615 break;
1616 }
1617 else {
1618 bin_ind = find_table_bin_ind(tab, hash, key);
1619 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1620 goto again;
1621 if (bin_ind == UNDEFINED_BIN_IND)
1622 break;
1623 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1624 MARK_BIN_DELETED(tab, bin_ind);
1625 }
1626 curr_entry_ptr = &entries[bin];
1627 MARK_ENTRY_DELETED(curr_entry_ptr);
1628 tab->num_entries--;
1629 update_range_for_deleted(tab, bin);
1630 break;
1631 }
1632 }
1633 }
1634 return 0;
1635}
1636
1637int
1638st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1639{
1640 return st_general_foreach(tab, func, replace, arg, TRUE);
1641}
1642
1643struct functor {
1644 st_foreach_callback_func *func;
1645 st_data_t arg;
1646};
1647
1648static int
1649apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1650{
1651 const struct functor *f = (void *)d;
1652 return f->func(k, v, f->arg);
1653}
1654
1655int
1656st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1657{
1658 const struct functor f = { func, arg };
1659 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1660}
1661
1662/* See comments for function st_delete_safe. */
1663int
1664st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1665 st_data_t never ATTRIBUTE_UNUSED)
1666{
1667 return st_general_foreach(tab, func, 0, arg, TRUE);
1668}
1669
1670/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1671 Return the number of keys set up in array KEYS. */
1672static inline st_index_t
1673st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1674{
1675 st_index_t i, bound;
1676 st_data_t key, *keys_start, *keys_end;
1677 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1678
1679 bound = tab->entries_bound;
1680 keys_start = keys;
1681 keys_end = keys + size;
1682 for (i = tab->entries_start; i < bound; i++) {
1683 if (keys == keys_end)
1684 break;
1685 curr_entry_ptr = &entries[i];
1686 key = curr_entry_ptr->key;
1687 if (! DELETED_ENTRY_P(curr_entry_ptr))
1688 *keys++ = key;
1689 }
1690
1691 return keys - keys_start;
1692}
1693
1694st_index_t
1695st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1696{
1697 return st_general_keys(tab, keys, size);
1698}
1699
1700/* See comments for function st_delete_safe. */
1701st_index_t
1702st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1703 st_data_t never ATTRIBUTE_UNUSED)
1704{
1705 return st_general_keys(tab, keys, size);
1706}
1707
1708/* Set up array VALUES by at most SIZE values of head table TAB
1709 entries. Return the number of values set up in array VALUES. */
1710static inline st_index_t
1711st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1712{
1713 st_index_t i, bound;
1714 st_data_t *values_start, *values_end;
1715 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1716
1717 values_start = values;
1718 values_end = values + size;
1719 bound = tab->entries_bound;
1720 for (i = tab->entries_start; i < bound; i++) {
1721 if (values == values_end)
1722 break;
1723 curr_entry_ptr = &entries[i];
1724 if (! DELETED_ENTRY_P(curr_entry_ptr))
1725 *values++ = curr_entry_ptr->record;
1726 }
1727
1728 return values - values_start;
1729}
1730
1731st_index_t
1732st_values(st_table *tab, st_data_t *values, st_index_t size)
1733{
1734 return st_general_values(tab, values, size);
1735}
1736
1737/* See comments for function st_delete_safe. */
1738st_index_t
1739st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1740 st_data_t never ATTRIBUTE_UNUSED)
1741{
1742 return st_general_values(tab, values, size);
1743}
1744
1745#define FNV1_32A_INIT 0x811c9dc5
1746
1747/*
1748 * 32 bit magic FNV-1a prime
1749 */
1750#define FNV_32_PRIME 0x01000193
1751
1752/* __POWERPC__ added to accommodate Darwin case. */
1753#ifndef UNALIGNED_WORD_ACCESS
1754# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1755 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1756 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1757 defined(__mc68020__)
1758# define UNALIGNED_WORD_ACCESS 1
1759# endif
1760#endif
1761#ifndef UNALIGNED_WORD_ACCESS
1762# define UNALIGNED_WORD_ACCESS 0
1763#endif
1764
1765/* This hash function is quite simplified MurmurHash3
1766 * Simplification is legal, cause most of magic still happens in finalizator.
1767 * And finalizator is almost the same as in MurmurHash3 */
1768#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1769#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1770
1771#if ST_INDEX_BITS <= 32
1772#define C1 (st_index_t)0xcc9e2d51
1773#define C2 (st_index_t)0x1b873593
1774#else
1775#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1776#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1777#endif
1778NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1779NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1780NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1781
1782static inline st_index_t
1783murmur_step(st_index_t h, st_index_t k)
1784{
1785#if ST_INDEX_BITS <= 32
1786#define r1 (17)
1787#define r2 (11)
1788#else
1789#define r1 (33)
1790#define r2 (24)
1791#endif
1792 k *= C1;
1793 h ^= ROTL(k, r1);
1794 h *= C2;
1795 h = ROTL(h, r2);
1796 return h;
1797}
1798#undef r1
1799#undef r2
1800
1801static inline st_index_t
1802murmur_finish(st_index_t h)
1803{
1804#if ST_INDEX_BITS <= 32
1805#define r1 (16)
1806#define r2 (13)
1807#define r3 (16)
1808 const st_index_t c1 = 0x85ebca6b;
1809 const st_index_t c2 = 0xc2b2ae35;
1810#else
1811/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1812#define r1 (30)
1813#define r2 (27)
1814#define r3 (31)
1815 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1816 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1817#endif
1818#if ST_INDEX_BITS > 64
1819 h ^= h >> 64;
1820 h *= c2;
1821 h ^= h >> 65;
1822#endif
1823 h ^= h >> r1;
1824 h *= c1;
1825 h ^= h >> r2;
1826 h *= c2;
1827 h ^= h >> r3;
1828 return h;
1829}
1830#undef r1
1831#undef r2
1832#undef r3
1833
1834st_index_t
1835st_hash(const void *ptr, size_t len, st_index_t h)
1836{
1837 const char *data = ptr;
1838 st_index_t t = 0;
1839 size_t l = len;
1840
1841#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1842#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1843#if SIZEOF_ST_INDEX_T > 4
1844#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1845#if SIZEOF_ST_INDEX_T > 8
1846#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1847 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1848#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1849#endif
1850#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1851#else
1852#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1853#endif
1854#undef SKIP_TAIL
1855 if (len >= sizeof(st_index_t)) {
1856#if !UNALIGNED_WORD_ACCESS
1857 int align = (int)((st_data_t)data % sizeof(st_index_t));
1858 if (align) {
1859 st_index_t d = 0;
1860 int sl, sr, pack;
1861
1862 switch (align) {
1863#ifdef WORDS_BIGENDIAN
1864# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1865 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1866#else
1867# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1868 t |= data_at(n) << CHAR_BIT*(n)
1869#endif
1870 UNALIGNED_ADD_ALL;
1871#undef UNALIGNED_ADD
1872 }
1873
1874#ifdef WORDS_BIGENDIAN
1875 t >>= (CHAR_BIT * align) - CHAR_BIT;
1876#else
1877 t <<= (CHAR_BIT * align);
1878#endif
1879
1880 data += sizeof(st_index_t)-align;
1881 len -= sizeof(st_index_t)-align;
1882
1883 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1884 sr = CHAR_BIT * align;
1885
1886 while (len >= sizeof(st_index_t)) {
1887 d = *(st_index_t *)data;
1888#ifdef WORDS_BIGENDIAN
1889 t = (t << sr) | (d >> sl);
1890#else
1891 t = (t >> sr) | (d << sl);
1892#endif
1893 h = murmur_step(h, t);
1894 t = d;
1895 data += sizeof(st_index_t);
1896 len -= sizeof(st_index_t);
1897 }
1898
1899 pack = len < (size_t)align ? (int)len : align;
1900 d = 0;
1901 switch (pack) {
1902#ifdef WORDS_BIGENDIAN
1903# define UNALIGNED_ADD(n) case (n) + 1: \
1904 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1905#else
1906# define UNALIGNED_ADD(n) case (n) + 1: \
1907 d |= data_at(n) << CHAR_BIT*(n)
1908#endif
1909 UNALIGNED_ADD_ALL;
1910#undef UNALIGNED_ADD
1911 }
1912#ifdef WORDS_BIGENDIAN
1913 t = (t << sr) | (d >> sl);
1914#else
1915 t = (t >> sr) | (d << sl);
1916#endif
1917
1918 if (len < (size_t)align) goto skip_tail;
1919# define SKIP_TAIL 1
1920 h = murmur_step(h, t);
1921 data += pack;
1922 len -= pack;
1923 }
1924 else
1925#endif
1926#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1927#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1928#else
1929#define aligned_data data
1930#endif
1931 {
1932 do {
1933 h = murmur_step(h, *(st_index_t *)aligned_data);
1934 data += sizeof(st_index_t);
1935 len -= sizeof(st_index_t);
1936 } while (len >= sizeof(st_index_t));
1937 }
1938 }
1939
1940 t = 0;
1941 switch (len) {
1942#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1943 /* in this case byteorder doesn't really matter */
1944#if SIZEOF_ST_INDEX_T > 4
1945 case 7: t |= data_at(6) << 48;
1946 case 6: t |= data_at(5) << 40;
1947 case 5: t |= data_at(4) << 32;
1948 case 4:
1949 t |= (st_index_t)*(uint32_t*)aligned_data;
1950 goto skip_tail;
1951# define SKIP_TAIL 1
1952#endif
1953 case 3: t |= data_at(2) << 16;
1954 case 2: t |= data_at(1) << 8;
1955 case 1: t |= data_at(0);
1956#else
1957#ifdef WORDS_BIGENDIAN
1958# define UNALIGNED_ADD(n) case (n) + 1: \
1959 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1960#else
1961# define UNALIGNED_ADD(n) case (n) + 1: \
1962 t |= data_at(n) << CHAR_BIT*(n)
1963#endif
1964 UNALIGNED_ADD_ALL;
1965#undef UNALIGNED_ADD
1966#endif
1967#ifdef SKIP_TAIL
1968 skip_tail:
1969#endif
1970 h ^= t; h -= ROTL(t, 7);
1971 h *= C2;
1972 }
1973 h ^= l;
1974#undef aligned_data
1975
1976 return murmur_finish(h);
1977}
1978
1979st_index_t
1980st_hash_uint32(st_index_t h, uint32_t i)
1981{
1982 return murmur_step(h, i);
1983}
1984
1985NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1986st_index_t
1987st_hash_uint(st_index_t h, st_index_t i)
1988{
1989 i += h;
1990/* no matter if it is BigEndian or LittleEndian,
1991 * we hash just integers */
1992#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1993 h = murmur_step(h, i >> 8*8);
1994#endif
1995 h = murmur_step(h, i);
1996 return h;
1997}
1998
1999st_index_t
2000st_hash_end(st_index_t h)
2001{
2002 h = murmur_finish(h);
2003 return h;
2004}
2005
2006#undef st_hash_start
2007st_index_t
2008rb_st_hash_start(st_index_t h)
2009{
2010 return h;
2011}
2012
2013static st_index_t
2014strhash(st_data_t arg)
2015{
2016 register const char *string = (const char *)arg;
2017 return st_hash(string, strlen(string), FNV1_32A_INIT);
2018}
2019
2020int
2021st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
2022{
2023 char c1, c2;
2024
2025 while (1) {
2026 c1 = *s1++;
2027 c2 = *s2++;
2028 if (c1 == '\0' || c2 == '\0') {
2029 if (c1 != '\0') return 1;
2030 if (c2 != '\0') return -1;
2031 return 0;
2032 }
2033 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2034 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2035 if (c1 != c2) {
2036 if (c1 > c2)
2037 return 1;
2038 else
2039 return -1;
2040 }
2041 }
2042}
2043
2044int
2045st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
2046{
2047 char c1, c2;
2048 size_t i;
2049
2050 for (i = 0; i < n; i++) {
2051 c1 = *s1++;
2052 c2 = *s2++;
2053 if (c1 == '\0' || c2 == '\0') {
2054 if (c1 != '\0') return 1;
2055 if (c2 != '\0') return -1;
2056 return 0;
2057 }
2058 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2059 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2060 if (c1 != c2) {
2061 if (c1 > c2)
2062 return 1;
2063 else
2064 return -1;
2065 }
2066 }
2067 return 0;
2068}
2069
2070static int
2071st_strcmp(st_data_t lhs, st_data_t rhs)
2072{
2073 const char *s1 = (char *)lhs;
2074 const char *s2 = (char *)rhs;
2075 return strcmp(s1, s2);
2076}
2077
2078static int
2079st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2080{
2081 const char *s1 = (char *)lhs;
2082 const char *s2 = (char *)rhs;
2083 return st_locale_insensitive_strcasecmp(s1, s2);
2084}
2085
2086NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2087static st_index_t
2088strcasehash(st_data_t arg)
2089{
2090 register const char *string = (const char *)arg;
2091 register st_index_t hval = FNV1_32A_INIT;
2092
2093 /*
2094 * FNV-1a hash each octet in the buffer
2095 */
2096 while (*string) {
2097 unsigned int c = (unsigned char)*string++;
2098 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2099 hval ^= c;
2100
2101 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2102 hval *= FNV_32_PRIME;
2103 }
2104 return hval;
2105}
2106
2107int
2108st_numcmp(st_data_t x, st_data_t y)
2109{
2110 return x != y;
2111}
2112
2113st_index_t
2114st_numhash(st_data_t n)
2115{
2116 enum {s1 = 11, s2 = 3};
2117 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2118}
2119
2120#ifdef RUBY
2121/* Expand TAB to be suitable for holding SIZ entries in total.
2122 Pre-existing entries remain not deleted inside of TAB, but its bins
2123 are cleared to expect future reconstruction. See rehash below. */
2124static void
2125st_expand_table(st_table *tab, st_index_t siz)
2126{
2127 st_table *tmp;
2128 st_index_t n;
2129
2130 if (siz <= get_allocated_entries(tab))
2131 return; /* enough room already */
2132
2133 tmp = st_init_table_with_size(tab->type, siz);
2134 n = get_allocated_entries(tab);
2135 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2136 free(tab->entries);
2137 free(tab->bins);
2138 free(tmp->bins);
2139 tab->entry_power = tmp->entry_power;
2140 tab->bin_power = tmp->bin_power;
2141 tab->size_ind = tmp->size_ind;
2142 tab->entries = tmp->entries;
2143 tab->bins = NULL;
2144 tab->rebuilds_num++;
2145 free(tmp);
2146}
2147
2148/* Rehash using linear search. Return TRUE if we found that the table
2149 was rebuilt. */
2150static int
2151st_rehash_linear(st_table *tab)
2152{
2153 int eq_p, rebuilt_p;
2154 st_index_t i, j;
2155 st_table_entry *p, *q;
2156
2157 free(tab->bins);
2158 tab->bins = NULL;
2159
2160 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2161 p = &tab->entries[i];
2162 if (DELETED_ENTRY_P(p))
2163 continue;
2164 for (j = i + 1; j < tab->entries_bound; j++) {
2165 q = &tab->entries[j];
2166 if (DELETED_ENTRY_P(q))
2167 continue;
2168 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2169 if (EXPECT(rebuilt_p, 0))
2170 return TRUE;
2171 if (eq_p) {
2172 *p = *q;
2173 MARK_ENTRY_DELETED(q);
2174 tab->num_entries--;
2175 update_range_for_deleted(tab, j);
2176 }
2177 }
2178 }
2179 return FALSE;
2180}
2181
2182/* Rehash using index. Return TRUE if we found that the table was
2183 rebuilt. */
2184static int
2185st_rehash_indexed(st_table *tab)
2186{
2187 int eq_p, rebuilt_p;
2188 st_index_t i;
2189 st_index_t const n = bins_size(tab);
2190 unsigned int const size_ind = get_size_ind(tab);
2191 st_index_t *bins = realloc(tab->bins, n);
2192 tab->bins = bins;
2193 initialize_bins(tab);
2194 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2195 st_table_entry *p = &tab->entries[i];
2196 st_index_t ind;
2197#ifdef QUADRATIC_PROBE
2198 st_index_t d = 1;
2199#else
2200 st_index_t perturb = p->hash;
2201#endif
2202
2203 if (DELETED_ENTRY_P(p))
2204 continue;
2205
2206 ind = hash_bin(p->hash, tab);
2207 for (;;) {
2208 st_index_t bin = get_bin(bins, size_ind, ind);
2209 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2210 /* ok, new room */
2211 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2212 break;
2213 }
2214 else {
2215 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2216 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2217 if (EXPECT(rebuilt_p, 0))
2218 return TRUE;
2219 if (eq_p) {
2220 /* duplicated key; delete it */
2221 q->record = p->record;
2222 MARK_ENTRY_DELETED(p);
2223 tab->num_entries--;
2224 update_range_for_deleted(tab, bin);
2225 break;
2226 }
2227 else {
2228 /* hash collision; skip it */
2229#ifdef QUADRATIC_PROBE
2230 ind = hash_bin(ind + d, tab);
2231 d++;
2232#else
2233 ind = secondary_hash(ind, tab, &perturb);
2234#endif
2235 }
2236 }
2237 }
2238 }
2239 return FALSE;
2240}
2241
2242/* Reconstruct TAB's bins according to TAB's entries. This function
2243 permits conflicting keys inside of entries. No errors are reported
2244 then. All but one of them are discarded silently. */
2245static void
2246st_rehash(st_table *tab)
2247{
2248 int rebuilt_p;
2249
2250 do {
2251 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2252 rebuilt_p = st_rehash_linear(tab);
2253 else
2254 rebuilt_p = st_rehash_indexed(tab);
2255 } while (rebuilt_p);
2256}
2257
2258static st_data_t
2259st_stringify(VALUE key)
2260{
2261 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2262 rb_hash_key_str(key) : key;
2263}
2264
2265static void
2266st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2267{
2268 st_data_t k = st_stringify(key);
2269 st_table_entry e;
2270 e.hash = do_hash(k, tab);
2271 e.key = k;
2272 e.record = val;
2273
2274 tab->entries[tab->entries_bound++] = e;
2275 tab->num_entries++;
2276 RB_OBJ_WRITTEN(hash, Qundef, k);
2277 RB_OBJ_WRITTEN(hash, Qundef, val);
2278}
2279
2280static void
2281st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2282{
2283 long i;
2284
2285 for (i = 0; i < argc; /* */) {
2286 st_data_t k = st_stringify(argv[i++]);
2287 st_data_t v = argv[i++];
2288 st_insert(tab, k, v);
2289 RB_OBJ_WRITTEN(hash, Qundef, k);
2290 RB_OBJ_WRITTEN(hash, Qundef, v);
2291 }
2292}
2293
2294static void
2295st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2296{
2297 long i;
2298
2299 /* push elems */
2300 for (i = 0; i < argc; /* */) {
2301 VALUE key = argv[i++];
2302 VALUE val = argv[i++];
2303 st_insert_single(tab, hash, key, val);
2304 }
2305
2306 /* reindex */
2307 st_rehash(tab);
2308}
2309
2310/* Mimics ruby's { foo => bar } syntax. This function is subpart
2311 of rb_hash_bulk_insert. */
2312void
2313rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2314{
2315 st_index_t n, size = argc / 2;
2316 st_table *tab = RHASH_ST_TABLE(hash);
2317
2318 tab = RHASH_TBL_RAW(hash);
2319 n = tab->entries_bound + size;
2320 st_expand_table(tab, n);
2321 if (UNLIKELY(tab->num_entries))
2322 st_insert_generic(tab, argc, argv, hash);
2323 else if (argc <= 2)
2324 st_insert_single(tab, hash, argv[0], argv[1]);
2325 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2326 st_insert_linear(tab, argc, argv, hash);
2327 else
2328 st_insert_generic(tab, argc, argv, hash);
2329}
2330
2331// to iterate iv_index_tbl
2332st_data_t
2333rb_st_nth_key(st_table *tab, st_index_t index)
2334{
2335 if (LIKELY(tab->entries_start == 0 &&
2336 tab->num_entries == tab->entries_bound &&
2337 index < tab->num_entries)) {
2338 return tab->entries[index].key;
2339 }
2340 else {
2341 rb_bug("unreachable");
2342 }
2343}
2344
2345void
2346rb_st_compact_table(st_table *tab)
2347{
2348 st_index_t num = tab->num_entries;
2349 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2350 /* Compaction: */
2351 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2352 rebuild_table_with(new_tab, tab);
2353 rebuild_move_table(new_tab, tab);
2354 rebuild_cleanup(tab);
2355 }
2356}
2357
2358#endif
int st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
Our own locale-insensitive version of strcasecmp(3).
Definition st.c:2021
int st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
Our own locale-insensitive version of strcnasecmp(3).
Definition st.c:2045
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition fl_type.h:898
#define Qundef
Old name of RUBY_Qundef.
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1428
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:247
VALUE rb_cString
String class.
Definition string.c:79
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition gc.h:615
int len
Length of the buffer.
Definition io.h:8
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:372
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
Definition st.c:135
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40