Project

General

Profile

st-march31.patch

vmakarov (Vladimir Makarov), 04/01/2016 03:37 AM

View differences:

benchmark/bm_bighash.rb
1
h = {}; 20000000.times {|n| h[n] = n }
benchmark/bm_hash_aref_dsym.rb
1 1
h = {}
2 2
syms = ('a'..'z').map { |s| s.to_sym }
3 3
syms.each { |s| h[s] = 1 }
4
200_000.times { syms.each { |s| h[s] } }
4
400_000.times { syms.each { |s| h[s] } }
benchmark/bm_hash_aref_fix.rb
1 1
h = {}
2 2
nums = (1..26).to_a
3 3
nums.each { |i| h[i] = i }
4
200_000.times { nums.each { |s| h[s] } }
4
800_000.times { nums.each { |s| h[s] } }
benchmark/bm_hash_aref_flo.rb
1 1
h = {}
2 2
strs = [*1..10000].map! {|i| i.fdiv(10)}
3 3
strs.each { |s| h[s] = s }
4
50.times { strs.each { |s| h[s] } }
4
500.times { strs.each { |s| h[s] } }
benchmark/bm_hash_aref_miss.rb
2 2
strs = ('a'..'z').to_a.map!(&:freeze)
3 3
strs.each { |s| h[s] = s }
4 4
strs = ('A'..'Z').to_a
5
200_000.times { strs.each { |s| h[s] } }
5
500_000.times { strs.each { |s| h[s] } }
benchmark/bm_hash_aref_str.rb
1 1
h = {}
2 2
strs = ('a'..'z').to_a.map!(&:freeze)
3 3
strs.each { |s| h[s] = s }
4
200_000.times { strs.each { |s| h[s] } }
4
500_000.times { strs.each { |s| h[s] } }
benchmark/bm_hash_aref_sym.rb
6 6
  syms.map!(&:to_sym)
7 7
end
8 8
syms.each { |s| h[s] = s }
9
200_000.times { syms.each { |s| h[s] } }
9
500_000.times { syms.each { |s| h[s] } }
benchmark/bm_hash_aref_sym_long.rb
10 10
  syms.map!(&:to_sym)
11 11
end
12 12
syms.each { |s| h[s] = s }
13
200_000.times { syms.each { |s| h[s] } }
13
500_000.times { syms.each { |s| h[s] } }
benchmark/bm_hash_flatten.rb
4 4
  h[i] = nil
5 5
end
6 6

  
7
1000.times do
7
2000.times do
8 8
  h.flatten
9 9
end
benchmark/bm_hash_ident_flo.rb
1 1
h = {}.compare_by_identity
2 2
strs = (1..10000).to_a.map!(&:to_f)
3 3
strs.each { |s| h[s] = s }
4
50.times { strs.each { |s| h[s] } }
4
500.times { strs.each { |s| h[s] } }
benchmark/bm_hash_ident_num.rb
1 1
h = {}.compare_by_identity
2 2
nums = (1..26).to_a
3 3
nums.each { |n| h[n] = n }
4
200_000.times { nums.each { |n| h[n] } }
4
500_000.times { nums.each { |n| h[n] } }
benchmark/bm_hash_ident_obj.rb
1 1
h = {}.compare_by_identity
2 2
objs = 26.times.map { Object.new }
3 3
objs.each { |o| h[o] = o }
4
200_000.times { objs.each { |o| h[o] } }
4
500_000.times { objs.each { |o| h[o] } }
benchmark/bm_hash_ident_str.rb
1 1
h = {}.compare_by_identity
2 2
strs = ('a'..'z').to_a
3 3
strs.each { |s| h[s] = s }
4
200_000.times { strs.each { |s| h[s] } }
4
500_000.times { strs.each { |s| h[s] } }
benchmark/bm_hash_ident_sym.rb
1 1
h = {}.compare_by_identity
2 2
syms = ('a'..'z').to_a.map(&:to_sym)
3 3
syms.each { |s| h[s] = s }
4
200_000.times { syms.each { |s| h[s] } }
4
500_000.times { syms.each { |s| h[s] } }
benchmark/bm_hash_keys.rb
4 4
  h[i] = nil
5 5
end
6 6

  
7
5000.times do
7
10000.times do
8 8
  h.keys
9 9
end
benchmark/bm_hash_shift.rb
4 4
  h[i] = nil
5 5
end
6 6

  
7
50000.times do
7
1000000.times do
8 8
  k, v = h.shift
9 9
  h[k] = v
10 10
end
benchmark/bm_hash_shift_u16.rb
4 4
  h[i] = nil
5 5
end
6 6

  
7
300000.times do
7
1000000.times do
8 8
  k, v = h.shift
9 9
  h[k] = v
10 10
end
benchmark/bm_hash_shift_u24.rb
4 4
  h[i] = nil
5 5
end
6 6

  
7
300000.times do
7
1000000.times do
8 8
  k, v = h.shift
9 9
  h[k] = v
10 10
end
benchmark/bm_hash_shift_u32.rb
4 4
  h[i] = nil
5 5
end
6 6

  
7
300000.times do
7
1000000.times do
8 8
  k, v = h.shift
9 9
  h[k] = v
10 10
end
benchmark/bm_hash_small2.rb
1
1000000.times.map{|i| a={}; 2.times{|j| a[j]=j}; a}
benchmark/bm_hash_small4.rb
1
1000000.times.map{|i| a={}; 4.times{|j| a[j]=j}; a}
benchmark/bm_hash_small8.rb
1
1000000.times.map{|i| a={}; 8.times{|j| a[j]=j}; a}
benchmark/bm_hash_to_proc.rb
4 4
  h[i] = nil
5 5
end
6 6

  
7
5000.times do |i|
7
500000.times do |i|
8 8
  [i].map(&h)
9 9
end
benchmark/bm_hash_values.rb
4 4
  h[i] = nil
5 5
end
6 6

  
7
5000.times do
7
10000.times do
8 8
  h.values
9 9
end
ext/-test-/st/foreach/foreach.c
14 14
    if (c->nr == 0) {
15 15
	st_data_t i;
16 16

  
17
	if (!c->tbl->entries_packed) rb_bug("should be packed\n");
17
	if (c->tbl->bins == NULL) rb_bug("should be packed\n");
18 18

  
19 19
	/* force unpacking during iteration: */
20 20
	for (i = 1; i < expect_size; i++)
21 21
	    st_add_direct(c->tbl, i, i);
22 22

  
23
	if (c->tbl->entries_packed) rb_bug("should be unpacked\n");
23
	if (c->tbl->bins != NULL) rb_bug("should be unpacked\n");
24 24
    }
25 25

  
26 26
    if (key != c->nr) {
......
84 84

  
85 85
    st_add_direct(tbl, 0, 0);
86 86

  
87
    if (!tbl->entries_packed) rb_bug("should still be packed\n");
87
    if (tbl->bins == NULL) rb_bug("should still be packed\n");
88 88

  
89 89
    st_foreach_check(tbl, unp_fec_i, (st_data_t)&c, -1);
90 90

  
......
98 98
		(VALUE)c.nr, (VALUE)expect_size);
99 99
    }
100 100

  
101
    if (tbl->entries_packed) rb_bug("should be unpacked\n");
101
    if (tbl->bins != NULL) rb_bug("should be unpacked\n");
102 102

  
103 103
    st_free_table(tbl);
104 104

  
......
145 145

  
146 146
    st_add_direct(tbl, 0, 0);
147 147

  
148
    if (!tbl->entries_packed) rb_bug("should still be packed\n");
148
    if (tbl->bins == NULL) rb_bug("should still be packed\n");
149 149

  
150 150
    st_foreach(tbl, unp_fe_i, (st_data_t)&c);
151 151

  
......
159 159
		(VALUE)c.nr, (VALUE)expect_size);
160 160
    }
161 161

  
162
    if (tbl->entries_packed) rb_bug("should be unpacked\n");
162
    if (tbl->bins != NULL) rb_bug("should be unpacked\n");
163 163

  
164 164
    st_free_table(tbl);
165 165

  
hash.c
129 129

  
130 130
long rb_objid_hash(st_index_t index);
131 131

  
132
static st_index_t rb_num_hash_start(st_index_t n);
133

  
132 134
static st_index_t
133
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
135
obj_any_hash(VALUE obj)
136
{
137
    obj = rb_hash(obj);
138
    return FIX2LONG(obj);
139
}
140

  
141
/* Prime number (79087987342985798987987) mod 32/64 used for hash
142
   calculations.  */
143
#if SIZEOF_INT == SIZEOF_VOIDP
144
static const st_index_t jauquet_prime_mod = 2053222611; /* mod 32 */
145
#else
146
static const st_index_t jauquet_prime_mod = 6795498992951210195ULL; /* mod 64 */
147
#endif
148

  
149
static inline st_index_t
150
any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
134 151
{
135 152
    VALUE hval;
136 153
    st_index_t hnum;
......
148 165
	hnum = rb_objid_hash((st_index_t)a);
149 166
    }
150 167
    else if (BUILTIN_TYPE(a) == T_STRING) {
151
	hnum = rb_str_hash(a);
168
      hnum = (strong_p
169
	      ? rb_str_hash(a)
170
	      : st_hash(RSTRING_PTR(a), RSTRING_LEN(a), jauquet_prime_mod));
152 171
    }
153 172
    else if (BUILTIN_TYPE(a) == T_SYMBOL) {
154 173
	hnum = RSYMBOL(a)->hashval;
......
167 186
}
168 187

  
169 188
static st_index_t
170
obj_any_hash(VALUE obj)
171
{
172
    obj = rb_hash(obj);
173
    return FIX2LONG(obj);
189
any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) {
190
  return any_hash_general(a, FALSE, other_func);
191
}
192

  
193
static st_index_t
194
rb_any_hash_weak(VALUE a) {
195
    return any_hash_weak(a, obj_any_hash);
196
}
197

  
198
static st_index_t
199
any_hash(VALUE a, st_index_t (*other_func)(VALUE)) {
200
  return any_hash_general(a, TRUE, other_func);
174 201
}
175 202

  
176 203
static st_index_t
......
182 209
static st_index_t
183 210
rb_num_hash_start(st_index_t n)
184 211
{
185
    /*
186
     * This hash function is lightly-tuned for Ruby.  Further tuning
187
     * should be possible.  Notes:
188
     *
189
     * - (n >> 3) alone is great for heap objects and OK for fixnum,
190
     *   however symbols perform poorly.
191
     * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
192
     *   n.b.: +3 to remove most ID scope, +1 worked well initially, too
193
     *   n.b.: +1 (instead of 3) worked well initially, too
194
     * - (n << 16) was finally added to avoid losing bits for fixnums
195
     * - avoid expensive modulo instructions, it is currently only
196
     *   shifts and bitmask operations.
197
     */
198
    return (n >> (RUBY_SPECIAL_SHIFT + 3) ^ (n << 16)) ^ (n >> 3);
212
    /* Use a simple multiplicative hashing.  It is not high quality
213
       hash function but it is very fast and we can spent the saved
214
       time on collission processing.  Mix low and high bits not to
215
       ignore high bits after the multiplication.  */
216
#if SIZEOF_INT == SIZEOF_VOIDP
217
    return ((n >> 16) ^ (n & 0xffff)) * jauquet_prime_mod;
218
#else
219
    return ((n >> 32) ^ (n & 0xffffffff)) * jauquet_prime_mod;
220
#endif
199 221
}
200 222

  
201 223
long
202 224
rb_objid_hash(st_index_t index)
203 225
{
204
    st_index_t hnum = rb_num_hash_start(index);
205

  
206
    hnum = rb_hash_start(hnum);
207
    hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
208
    hnum = rb_hash_end(hnum);
209
    return hnum;
226
    return rb_num_hash_start(index);
210 227
}
211 228

  
212 229
static st_index_t
......
230 247

  
231 248
static const struct st_hash_type objhash = {
232 249
    rb_any_cmp,
250
    rb_any_hash_weak,
233 251
    rb_any_hash,
234 252
};
235 253

  
include/ruby/st.h
1
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
1
/* This is a public domain general purpose hash table package
2
   originally written by Peter Moore @ UCB.
2 3

  
3
/* @(#) st.h 5.1 89/12/14 */
4
   The hash table data strutures were redesigned and the package was
5
   rewritten by Vladimir Makarov <vmakarov@redhat.com>.  */
4 6

  
5 7
#ifndef RUBY_ST_H
6 8
#define RUBY_ST_H 1
......
46 48
typedef struct st_table st_table;
47 49

  
48 50
typedef st_data_t st_index_t;
51

  
52
/* Maximal value of unsigned integer type st_index_t.  */
53
#define MAX_ST_INDEX_VAL (~(st_index_t) 0)
54
  
49 55
typedef int st_compare_func(st_data_t, st_data_t);
50 56
typedef st_index_t st_hash_func(st_data_t);
51 57

  
......
55 61
struct st_hash_type {
56 62
    int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
57 63
    st_index_t (*hash)(ANYARGS /*st_data_t*/);        /* st_hash_func* */
64
    /* The following is an optional func for stronger hash.  When we
65
       have many different keys with the same hash we can switch to
66
       use it to prevent a denial attack with usage of hash table
67
       collisions. */
68
    st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
58 69
};
59 70

  
60 71
#define ST_INDEX_BITS (sizeof(st_index_t) * CHAR_BIT)
......
66 77
# define ST_DATA_COMPATIBLE_P(type) 0
67 78
#endif
68 79

  
80
typedef struct st_table_entry st_table_entry;
81

  
82
struct st_table_entry; /* defined in st.c */
83

  
69 84
struct st_table {
85
    /* Cached features of the table -- see st.c for more details.  */
86
    unsigned char entry_power, bin_power, size_ind;
87
    /* How many times the table was rebuilt.  */
88
    unsigned int rebuilds_num;
89
    /* Currently used hash function.  */
90
    st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
70 91
    const struct st_hash_type *type;
71
    st_index_t num_bins;
72
    unsigned int entries_packed : 1;
73
#ifdef __GNUC__
74
    /*
75
     * C spec says,
76
     *   A bit-field shall have a type that is a qualified or unqualified
77
     *   version of _Bool, signed int, unsigned int, or some other
78
     *   implementation-defined type. It is implementation-defined whether
79
     *   atomic types are permitted.
80
     * In short, long and long long bit-field are implementation-defined
81
     * feature. Therefore we want to suppress a warning explicitly.
82
     */
83
    __extension__
84
#endif
85
    st_index_t num_entries : ST_INDEX_BITS - 1;
86
    union {
87
	struct {
88
	    struct st_table_entry **bins;
89
	    void *private_list_head[2];
90
	} big;
91
	struct {
92
	    struct st_packed_entry *entries;
93
	    st_index_t real_entries;
94
	} packed;
95
    } as;
92
    /* Number of entries currently in the table.  */
93
    st_index_t num_entries;
94
    /* Array of bins used for access by keys.  */
95
    st_index_t *bins;
96
    /* Start and bound index of entries in array entries.
97
       entries_starts and entries_bound are in interval
98
       [0,allocated_entries].  */
99
    st_index_t entries_start, entries_bound;
100
    /* Array of size 2^entry_power.  */
101
    st_table_entry *entries;
96 102
};
97 103

  
98 104
#define st_is_member(table,key) st_lookup((table),(key),(st_data_t *)0)
......
121 127
int st_update(st_table *table, st_data_t key, st_update_callback_func *func, st_data_t arg);
122 128
int st_foreach(st_table *, int (*)(ANYARGS), st_data_t);
123 129
int st_foreach_check(st_table *, int (*)(ANYARGS), st_data_t, st_data_t);
124
int st_reverse_foreach(st_table *, int (*)(ANYARGS), st_data_t);
125 130
st_index_t st_keys(st_table *table, st_data_t *keys, st_index_t size);
126 131
st_index_t st_keys_check(st_table *table, st_data_t *keys, st_index_t size, st_data_t never);
127 132
st_index_t st_values(st_table *table, st_data_t *values, st_index_t size);
128 133
st_index_t st_values_check(st_table *table, st_data_t *values, st_index_t size, st_data_t never);
129 134
void st_add_direct(st_table *, st_data_t, st_data_t);
130 135
void st_free_table(st_table *);
136
size_t st_memsize(const st_table *);
131 137
void st_cleanup_safe(st_table *, st_data_t);
132 138
void st_clear(st_table *);
133 139
st_table *st_copy(st_table *);
......
137 143
int st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n);
138 144
#define st_strcasecmp st_locale_insensitive_strcasecmp
139 145
#define st_strncasecmp st_locale_insensitive_strncasecmp
140
size_t st_memsize(const st_table *);
141 146
st_index_t st_hash(const void *ptr, size_t len, st_index_t h);
142 147
st_index_t st_hash_uint32(st_index_t h, uint32_t i);
143 148
st_index_t st_hash_uint(st_index_t h, st_index_t i);
......
145 150
st_index_t st_hash_start(st_index_t h);
146 151
#define st_hash_start(h) ((st_index_t)(h))
147 152

  
153
st_index_t st_hash_index(st_index_t k);
154
st_index_t st_hash_double(double d);
155

  
148 156
RUBY_SYMBOL_EXPORT_END
149 157

  
150 158
#if defined(__cplusplus)
st.c
1
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
2

  
3
/* static	char	sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
1
/* This is a public domain general purpose hash table package
2
   originally written by Peter Moore @ UCB.
3

  
4
   The hash table data structures were redesigned and the package was
5
   rewritten by Vladimir Makarov <vmakarov@redhat.com>.  */
6

  
7
/* The original package implemented classic bucket-based hash tables
8
   with entries doubly linked for an access by their insertion order.
9
   To decrease pointer chasing and as a consequence to improve a data
10
   locality the current implementation is based on storing entries in
11
   an array and using hash tables with open addressing.  The current
12
   entries are more compact in comparison with the original ones and
13
   this also improves the data locality.
14

  
15
   The hash table has two arrays called *bins* and *entries*.
16

  
17
     bins:
18
    -------
19
   |       |                  entries array:
20
   |-------|            --------------------------------
21
   | index |           |      | entry:  |        |      |
22
   |-------|           |      |         |        |      |
23
   | ...   |           | ...  | hash    |  ...   | ...  |
24
   |-------|           |      | key     |        |      |
25
   | empty |           |      | record  |        |      |
26
   |-------|            --------------------------------
27
   | ...   |                   ^                  ^
28
   |-------|                   |_ entries start   |_ entries bound
29
   |deleted|
30
    -------
31

  
32
   o The entry array contains table entries in the same order as they
33
     were inserted.
34

  
35
     When the first entry is deleted, a variable containing index of
36
     the current first entry (*entries start*) is changed.  In all
37
     other cases of the deletion, we just mark the entry as deleted by
38
     using a reserved hash value.
39

  
40
     Such organization of the entry storage makes operations of the
41
     table shift and the entries traversal very fast.
42

  
43
   o The bins provide access to the entries by their keys.  The
44
     key hash is mapped to a bin containing *index* of the
45
     corresponding entry in the entry array.
46

  
47
     The bin array size is always power of two, it makes mapping very
48
     fast by using the corresponding lower bits of the hash.
49
     Generally it is not a good idea to ignore some part of the hash.
50
     But alternative approach is worse.  For example, we could use a
51
     modulo operation for mapping and a prime number for the size of
52
     the bin array.  Unfortunately, the modulo operation for big
53
     64-bit numbers are extremely slow (it takes more than 100 cycles
54
     on modern Intel CPUs).
55

  
56
     Still other bits of the hash value are used when the mapping
57
     results in a collision.  In this case we use a secondary hash
58
     value which is a result of a function of the collision bin
59
     index and the original hash value.  The function choice
60
     guarantees that we can traverse all bins and finally find the
61
     corresponding bin as after several iterations the function
62
     becomes a full cycle linear congruential generator because it
63
     satisfies requirements of the Hull-Dobell theorem.
64

  
65
     When an entry is removed from the table besides marking the
66
     hash in the corresponding entry described above, we also mark
67
     the bin by a special value in order to find entries which had
68
     a collision with the removed entries.
69

  
70
     There are two reserved values for the bins.  One denotes an
71
     empty bin, another one denotes a bin for a deleted entry.
72

  
73
   o The length of the bin array is at least two times more than the
74
     entry array length.  This keeps the table load factor healthy.
75
     The trigger of rebuilding the table is always a case when we can
76
     not insert an entry anymore at the entries bound.  We could
77
     change the entries bound too in case of deletion but than we need
78
     a special code to count bins with corresponding deleted entries
79
     and reset the bin values when there are too many bins
80
     corresponding deleted entries
81

  
82
     Table rebuilding is done by creation of a new entry array and
83
     bins of an appropriate size.  We also try to reuse the arrays
84
     in some cases by compacting the array and removing deleted
85
     entries.
86

  
87
   o To save memory very small tables have no allocated arrays
88
     bins.  We use a linear search for an access by a key.
89

  
90
   o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91
     bins depending on the current hash table size.
92

  
93
   This implementation speeds up the Ruby hash table benchmarks in
94
   average by more 40% on Intel Haswell CPU.
95

  
96
*/
4 97

  
5 98
#ifdef NOT_RUBY
6 99
#include "regint.h"
......
14 107
#include <stdlib.h>
15 108
#endif
16 109
#include <string.h>
17
#include "ccan/list/list.h"
110
#include <assert.h>
111

  
112
#ifdef __GNUC__
113
#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
114
#define EXPECT(expr, val) __builtin_expect(expr, val)
115
#define ATTRIBUTE_UNUSED  __attribute__((unused))
116
#else
117
#define PREFETCH(addr, write_p)
118
#define EXPECT(expr, val) (expr)
119
#define ATTRIBUTE_UNUSED
120
#endif
18 121

  
19
typedef struct st_table_entry st_table_entry;
122
#ifdef ST_DEBUG
123
#define st_assert(cond) assert(cond)
124
#else
125
#define st_assert(cond) ((void)(0 && (cond)))
126
#endif
127

  
128
/* The type of hashes.  */
129
typedef st_index_t st_hash_t;
20 130

  
21 131
struct st_table_entry {
22
    st_index_t hash;
132
    st_hash_t hash;
23 133
    st_data_t key;
24 134
    st_data_t record;
25
    st_table_entry *next;
26
    struct list_node olist;
27 135
};
28 136

  
29
typedef struct st_packed_entry {
30
    st_index_t hash;
31
    st_data_t key, val;
32
} st_packed_entry;
33

  
34
#ifndef STATIC_ASSERT
35
#define STATIC_ASSERT(name, expr) typedef int static_assert_##name##_check[(expr) ? 1 : -1]
36
#endif
37

  
38
#define ST_DEFAULT_MAX_DENSITY 5
39
#define ST_DEFAULT_INIT_TABLE_SIZE 16
40
#define ST_DEFAULT_PACKED_TABLE_SIZE 18
41
#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
42
#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
43

  
44
STATIC_ASSERT(st_packed_entry, sizeof(st_packed_entry) == sizeof(st_table_entry*[PACKED_UNIT]));
45
STATIC_ASSERT(st_packed_bins, sizeof(st_packed_entry[MAX_PACKED_HASH]) <= sizeof(st_table_entry*[ST_DEFAULT_PACKED_TABLE_SIZE]));
46

  
47
    /*
48
     * DEFAULT_MAX_DENSITY is the default for the largest we allow the
49
     * average number of items per bin before increasing the number of
50
     * bins
51
     *
52
     * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
53
     * allocated initially
54
     *
55
     */
56

  
57 137
#define type_numhash st_hashtype_num
58 138
const struct st_hash_type st_hashtype_num = {
59 139
    st_numcmp,
......
73 153
    strcasehash,
74 154
};
75 155

  
76
static void rehash(st_table *);
156
/* Value used to catch uninitialized entries/bins during debugging.
157
   There is a possibility for a false alarm, but its probability is
158
   extremely small.  */
159
#define ST_INIT_VAL 0xafafafafafafafaf
160
#define ST_INIT_VAL_BYTE 0xafa
77 161

  
78 162
#ifdef RUBY
79 163
#undef malloc
80 164
#undef realloc
81 165
#undef calloc
82 166
#undef free
83
#define malloc xmalloc
84
#define calloc xcalloc
85
#define realloc xrealloc
86
#define free(x) xfree(x)
167
#define malloc ruby_xmalloc
168
#define calloc ruby_xcalloc
169
#define realloc ruby_xrealloc
170
#define free ruby_xfree
87 171
#endif
88 172

  
89
#define EQUAL(table,x,ent) ((x)==(ent)->key || (*(table)->type->compare)((x),(ent)->key) == 0)
173
#include <stdlib.h>
90 174

  
91
#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
92
#define hash_pos(h,n) ((h) & (n - 1))
175
#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
176
#define PTR_EQUAL(tab, ptr, hash_val, key) \
177
    ((ptr)->hash == (hash_val) && EQUAL((tab), (key), (ptr)->key))
178

  
179
/* Features of a table.  */
180
struct st_features {
181
    /* Power of 2 used for number of allocated entries.  */
182
    unsigned char entry_power;
183
    /* Power of 2 used for number of allocated bins.  Depending on the
184
       table size, the number of bins is 2-4 times more than the
185
       number of entries.  */
186
    unsigned char bin_power;
187
    /* Enumeration of sizes of bins (8-bit, 16-bit etc).  */
188
    unsigned char size_ind;
189
    /* Bins are packed in words of type st_index_t.  The following is
190
       a size of bins counted by words.  */
191
    st_index_t bins_words;
192
};
93 193

  
94
/* preparation for possible allocation improvements */
95
#define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry))
96
#define st_free_entry(entry) free(entry)
97
#define st_alloc_table() (st_table *)malloc(sizeof(st_table))
98
#define st_dealloc_table(table) free(table)
99
#define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *))
100
#define st_free_bins(bins, size) free(bins)
101
static inline st_table_entry**
102
st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
103
{
104
    bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *));
105
    MEMZERO(bins, st_table_entry*, newsize);
106
    return bins;
107
}
108

  
109
/* Shortcut */
110
#define bins as.big.bins
111
#define real_entries as.packed.real_entries
112

  
113
/* preparation for possible packing improvements */
114
#define PACKED_BINS(table) ((table)->as.packed.entries)
115
#define PACKED_ENT(table, i) PACKED_BINS(table)[i]
116
#define PKEY(table, i) PACKED_ENT((table), (i)).key
117
#define PVAL(table, i) PACKED_ENT((table), (i)).val
118
#define PHASH(table, i) PACKED_ENT((table), (i)).hash
119
#define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v))
120
#define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v))
121
#define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v))
122

  
123
/* this function depends much on packed layout, so that it placed here */
124
static inline void
125
remove_packed_entry(st_table *table, st_index_t i)
126
{
127
    table->real_entries--;
128
    table->num_entries--;
129
    if (i < table->real_entries) {
130
	MEMMOVE(&PACKED_ENT(table, i), &PACKED_ENT(table, i+1),
131
		st_packed_entry, table->real_entries - i);
132
    }
133
}
194
/* Features of all possible size tables.  */
195
#if SIZEOF_ST_INDEX_T == 8
196
#define MAX_POWER2 62
197
struct st_features features[] = {
198
    {0, 2, 0, 0x0},
199
    {1, 3, 0, 0x1},
200
    {2, 4, 0, 0x2},
201
    {3, 5, 0, 0x4},
202
    {4, 6, 0, 0x8},
203
    {5, 7, 0, 0x10},
204
    {6, 8, 0, 0x20},
205
    {7, 9, 0, 0x40},
206
    {8, 10, 1, 0x100},
207
    {9, 11, 1, 0x200},
208
    {10, 12, 1, 0x400},
209
    {11, 13, 1, 0x800},
210
    {12, 14, 1, 0x1000},
211
    {13, 15, 1, 0x2000},
212
    {14, 16, 1, 0x4000},
213
    {15, 17, 1, 0x8000},
214
    {16, 18, 2, 0x20000},
215
    {17, 19, 2, 0x40000},
216
    {18, 20, 2, 0x80000},
217
    {19, 21, 2, 0x100000},
218
    {20, 22, 2, 0x200000},
219
    {21, 23, 2, 0x400000},
220
    {22, 24, 2, 0x800000},
221
    {23, 25, 2, 0x1000000},
222
    {24, 26, 2, 0x2000000},
223
    {25, 27, 2, 0x4000000},
224
    {26, 28, 2, 0x8000000},
225
    {27, 29, 2, 0x10000000},
226
    {28, 30, 2, 0x20000000},
227
    {29, 31, 2, 0x40000000},
228
    {30, 32, 2, 0x80000000},
229
    {31, 33, 2, 0x100000000},
230
    {32, 33, 3, 0x200000000},
231
    {33, 34, 3, 0x400000000},
232
    {34, 35, 3, 0x800000000},
233
    {35, 36, 3, 0x1000000000},
234
    {36, 37, 3, 0x2000000000},
235
    {37, 38, 3, 0x4000000000},
236
    {38, 39, 3, 0x8000000000},
237
    {39, 40, 3, 0x10000000000},
238
    {40, 41, 3, 0x20000000000},
239
    {41, 42, 3, 0x40000000000},
240
    {42, 43, 3, 0x80000000000},
241
    {43, 44, 3, 0x100000000000},
242
    {44, 45, 3, 0x200000000000},
243
    {45, 46, 3, 0x400000000000},
244
    {46, 47, 3, 0x800000000000},
245
    {47, 48, 3, 0x1000000000000},
246
    {48, 49, 3, 0x2000000000000},
247
    {49, 50, 3, 0x4000000000000},
248
    {50, 51, 3, 0x8000000000000},
249
    {51, 52, 3, 0x10000000000000},
250
    {52, 53, 3, 0x20000000000000},
251
    {53, 54, 3, 0x40000000000000},
252
    {54, 55, 3, 0x80000000000000},
253
    {55, 56, 3, 0x100000000000000},
254
    {56, 57, 3, 0x200000000000000},
255
    {57, 58, 3, 0x400000000000000},
256
    {58, 59, 3, 0x800000000000000},
257
    {59, 60, 3, 0x1000000000000000},
258
    {60, 61, 3, 0x2000000000000000},
259
    {61, 62, 3, 0x4000000000000000},
260
    {62, 63, 3, 0x8000000000000000},
261
};
134 262

  
135
static inline void
136
remove_safe_packed_entry(st_table *table, st_index_t i, st_data_t never)
137
{
138
    table->num_entries--;
139
    PKEY_SET(table, i, never);
140
    PVAL_SET(table, i, never);
141
    PHASH_SET(table, i, 0);
142
}
263
#else
264
#define MAX_POWER2 30
265

  
266
struct st_features features[] = {
267
    {0, 2, 0, 0x1},
268
    {1, 3, 0, 0x2},
269
    {2, 4, 0, 0x4},
270
    {3, 5, 0, 0x8},
271
    {4, 6, 0, 0x10},
272
    {5, 7, 0, 0x20},
273
    {6, 8, 0, 0x40},
274
    {7, 9, 0, 0x80},
275
    {8, 10, 1, 0x200},
276
    {9, 11, 1, 0x400},
277
    {10, 12, 1, 0x800},
278
    {11, 13, 1, 0x1000},
279
    {12, 14, 1, 0x2000},
280
    {13, 15, 1, 0x4000},
281
    {14, 16, 1, 0x8000},
282
    {15, 17, 1, 0x10000},
283
    {16, 17, 2, 0x20000},
284
    {17, 18, 2, 0x40000},
285
    {18, 19, 2, 0x80000},
286
    {19, 20, 2, 0x100000},
287
    {20, 21, 2, 0x200000},
288
    {21, 22, 2, 0x400000},
289
    {22, 23, 2, 0x800000},
290
    {23, 24, 2, 0x1000000},
291
    {24, 25, 2, 0x2000000},
292
    {25, 26, 2, 0x4000000},
293
    {26, 27, 2, 0x8000000},
294
    {27, 28, 2, 0x10000000},
295
    {28, 29, 2, 0x20000000},
296
    {29, 30, 2, 0x40000000},
297
    {30, 31, 2, 0x80000000},
298
};
143 299

  
144
static st_index_t
145
next_pow2(st_index_t x)
146
{
147
    x |= x >> 1;
148
    x |= x >> 2;
149
    x |= x >> 4;
150
    x |= x >> 8;
151
    x |= x >> 16;
152
#if SIZEOF_ST_INDEX_T == 8
153
    x |= x >> 32;
154 300
#endif
155
    return x + 1;
301

  
302
/* The reserved hash value and its substitution.  */
303
#define RESERVED_HASH_VAL (~(st_hash_t) 0)
304
#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
305

  
306
/* Return hash value of KEY for table TAB.  */
307
static inline st_hash_t
308
do_hash(st_data_t key, st_table *tab) {
309
    st_index_t h = (st_index_t)(tab->curr_hash)(key);
310
#if SIZEOF_INT == SIZEOF_VOIDP
311
    st_hash_t hash = h;
312
#else
313
    st_hash_t hash = h;
314
#endif
315

  
316
    /* RESERVED_HASH_VAL is used for a deleted entry.  Map it into
317
       another value.  Such mapping should be extremely rare.  */
318
    return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
156 319
}
157 320

  
158
static st_index_t
159
new_size(st_index_t size)
160
{
161
    st_index_t n;
321
/* Power of 2 defining the minimal number of allocated entries.  */
322
#define MINIMAL_POWER2 3
323

  
324
#if MINIMAL_POWER2 < 2
325
#error "MINIMAL_POWER2 should be >= 2"
326
#endif
327

  
328
/* If the power2 of the allocated `entries` is less than the following
329
   value, don't allocate bins and use a linear search.  */
330
#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 3
162 331

  
163
    if (size && (size & ~(size - 1)) == size) /* already a power-of-two? */
164
	return size;
332
/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE.  */
333
static int
334
get_power2(st_index_t size) {
335
    unsigned int n;
165 336

  
166
    n = next_pow2(size);
167
    if (n > size)
168
	return n;
337
    for (n = 0; size != 0; n++)
338
        size >>= 1;
339
    if (n <= MAX_POWER2)
340
        return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
169 341
#ifndef NOT_RUBY
342
    /* Ran out of the table entries */
170 343
    rb_raise(rb_eRuntimeError, "st_table too big");
171 344
#endif
172
    return -1;			/* should raise exception */
345
    /* should raise exception */
346
    return -1;
347
}
348

  
349
/* Return value of N-th bin in array BINS of table with bins size
350
   index S.  */
351
static inline st_index_t
352
get_bin(st_index_t *bins, int s, st_index_t n) {
353
  return (s == 0 ? ((unsigned char *) bins)[n]
354
	  : s == 1 ? ((unsigned short *) bins)[n]
355
	  : s == 2 ? ((unsigned int *) bins)[n]
356
	  : ((st_index_t *) bins)[n]);
357
}
358

  
359
/* Set up N-th bin in array BINS of table with bins size index S to
360
   value V.  */
361
static inline void
362
set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v) {
363
  if (s == 0) ((unsigned char *) bins)[n] = v;
364
  else if (s == 1) ((unsigned short *) bins)[n] = v;
365
  else if (s == 2) ((unsigned int *) bins)[n] = v;
366
  else ((st_index_t *) bins)[n] = v;
367
}
368

  
369
/* These macros define reserved values for empty table bin and table
370
   bin which contains a deleted entry.  We will never use such values
371
   for an entry index in bins.  */
372
#define EMPTY_BIN    0
373
#define DELETED_BIN  1
374
/* Base of a real entry index in the bins.  */
375
#define ENTRY_BASE 2
376

  
377
/* Mark I-th bin of table TAB as empty, in other words not
378
   corresponding to any entry.  */
379
#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
380

  
381
/* Values used for not found entry and bin with given
382
   characteristics.  */
383
#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
384
#define UNDEFINED_BIN_IND (~(st_index_t) 0)
385

  
386
/* Mark I-th bin of table TAB as corresponding to a deleted table
387
   entry.  Update number of entries in the table and number of bins
388
   corresponding to deleted entries. */
389
#define MARK_BIN_DELETED(tab, i)				\
390
    do {                                                        \
391
        st_assert (i != UNDEFINED_BIN_IND);			\
392
	st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i)); 	\
393
        set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
394
    } while (0)
395

  
396
/* Macros to check that value B is used empty bins and bins
397
   corresponding deleted entries.  */
398
#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
399
#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
400
#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
401

  
402
/* Macros to check empty bins and bins corresponding to deleted
403
   entries.  Bins are given by their index I in table TAB.  */
404
#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
405
#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
406
#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
407

  
408
/* Macros for marking and checking deleted entries given by their
409
   pointer E_PTR.  */
410
#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
411
#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
412

  
413
/* Return bin size index of table TAB.  */
414
static inline st_index_t
415
get_size_ind(const st_table *tab) {
416
    return tab->size_ind;
417
}
418

  
419
/* Return the number of allocated bins of table TAB.  */
420
static inline st_index_t
421
get_bins_num(const st_table *tab) {
422
    return 1<<tab->bin_power;
423
}
424

  
425
/* Return mask for a bin index in table TAB.  */
426
static inline st_index_t
427
bins_mask(const st_table *tab) {
428
    return get_bins_num(tab) - 1;
429
}
430

  
431
/* Return the index of table TAB bin corresponding to
432
   HASH_VALUE.  */
433
static inline st_index_t
434
hash_bin(st_hash_t hash_value, st_table *tab) {
435
    return hash_value & bins_mask(tab);
436
}
437

  
438
/* Return the number of allocated entries of table TAB.  */
439
static inline st_index_t
440
get_allocated_entries (const st_table *tab) {
441
    return 1<<tab->entry_power;
442
}
443

  
444
/* Return size of the allocated bins of table TAB.  */
445
static inline st_index_t
446
bins_size(const st_table *tab) {
447
    return features[tab->entry_power].bins_words * sizeof (st_index_t);
448
}
449

  
450
/* Mark all bins of table TAB as empty.  */
451
static void
452
initialize_bins(st_table *tab) {
453
    memset(tab->bins, 0, bins_size(tab));
454
}
455

  
456
/* Make table TAB empty.  */
457
static void
458
make_tab_empty(st_table *tab)
459
{
460
    tab->curr_hash = tab->type->hash;
461
    tab->num_entries = 0;
462
    tab->rebuilds_num = 0;
463
    tab->entries_start = tab->entries_bound = 0;
464
    if (tab->bins != NULL)
465
        initialize_bins(tab);
466
}
467

  
468
#ifdef ST_DEBUG
469
/* Check the table T consistency.  It can be extremely slow.  So use
470
   it only for debugging.  */
471
static void
472
st_check(st_table *tab) {
473
    st_index_t d, e, i, n, p;
474

  
475
    for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1)
476
        ;
477
    p = i;
478
    assert (p >= MINIMAL_POWER2);
479
    assert (tab->entries_bound <= get_allocated_entries(tab)
480
	    && tab->entries_start <= tab->entries_bound);
481
    n = 0;
482
    return;
483
    if (tab->entries_bound != 0)
484
        for (i = tab->entries_start; i < tab->entries_bound; i++) {
485
  	    assert (tab->entries[i].hash != (st_hash_t) ST_INIT_VAL
486
		    && tab->entries[i].key != ST_INIT_VAL
487
		    && tab->entries[i].record != ST_INIT_VAL);
488
	    if (! DELETED_ENTRY_P(&tab->entries[i]))
489
	      n++;
490
	}
491
    assert (n == tab->num_entries);
492
    if (tab->bins == NULL)
493
        assert (p <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
494
    else {
495
        assert (p > MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
496
	for (n = d = i = 0; i < get_bins_num(tab); i++) {
497
  	    assert (get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL);
498
	    if (IND_DELETED_BIN_P(tab, i)) {
499
	        d++;
500
		continue;
501
	    }
502
	    else if (IND_EMPTY_BIN_P(tab, i))
503
	        continue;
504
	    n++;
505
	    e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE;
506
	    assert (tab->entries_start <= e && e < tab->entries_bound);
507
	    assert (! DELETED_ENTRY_P(&tab->entries[e]));
508
	    assert (tab->entries[e].hash != (st_hash_tab) ST_INIT_VAL
509
		    && tab->entries[e].key != ST_INIT_VAL
510
		    && tab->entries[e].record != ST_INIT_VAL);
511
	}
512
	assert (n == tab->num_entries);
513
    }
173 514
}
515
#endif
174 516

  
175 517
#ifdef HASH_LOG
176 518
#ifdef HAVE_UNISTD_H
......
179 521
static struct {
180 522
    int all, total, num, str, strcase;
181 523
}  collision;
524

  
525
/* Flag switching off output of package statistics at the end of
526
   program.  */
182 527
static int init_st = 0;
183 528

  
529
/* Output overall number of table searches and collisions into a
530
   temporary file.  */
184 531
static void
185 532
stat_col(void)
186 533
{
187 534
    char fname[10+sizeof(long)*3];
188 535
    FILE *f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
189 536
    fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
190
	    ((double)collision.all / (collision.total)) * 100);
537
            ((double)collision.all / (collision.total)) * 100);
191 538
    fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
192 539
    fclose(f);
193 540
}
194 541
#endif
195 542

  
196
static struct list_head *
197
st_head(const st_table *tbl)
198
{
199
    uintptr_t addr = (uintptr_t)&tbl->as.big.private_list_head;
200
    return (struct list_head *)addr;
201
}
202

  
203
st_table*
204
st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
205
{
206
    st_table *tbl;
207

  
543
/* Create and return table with TYPE which can hold at least SIZE
544
   entries.  The real number of entries which the table can hold is
545
   the nearest power of two for SIZE.  */
546
st_table *
547
st_init_table_with_size(const struct st_hash_type *type, st_index_t size) {
548
    st_table *tab;
549
    int n;
550
    
208 551
#ifdef HASH_LOG
209
# if HASH_LOG+0 < 0
552
#if HASH_LOG+0 < 0
210 553
    {
211
	const char *e = getenv("ST_HASH_LOG");
212
	if (!e || !*e) init_st = 1;
554
        const char *e = getenv("ST_HASH_LOG");
555
        if (!e || !*e) init_st = 1;
213 556
    }
214
# endif
557
#endif
215 558
    if (init_st == 0) {
216
	init_st = 1;
217
	atexit(stat_col);
559
        init_st = 1;
560
        atexit(stat_col);
218 561
    }
219 562
#endif
220

  
221

  
222
    tbl = st_alloc_table();
223
    tbl->type = type;
224
    tbl->num_entries = 0;
225
    tbl->entries_packed = size <= MAX_PACKED_HASH;
226
    if (tbl->entries_packed) {
227
	size = ST_DEFAULT_PACKED_TABLE_SIZE;
228
	tbl->real_entries = 0;
229
    }
230
    else {
231
	size = new_size(size);	/* round up to power-of-two */
232
	list_head_init(st_head(tbl));
233
    }
234
    tbl->num_bins = size;
235
    tbl->bins = st_alloc_bins(size);
236

  
237
    return tbl;
563
    
564
    n = get_power2(size);
565
    tab = (st_table *) malloc(sizeof (st_table));
566
    tab->type = type;
567
    tab->entry_power = n;
568
    tab->bin_power = features[n].bin_power;
569
    tab->size_ind = features[n].size_ind;
570
    if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
571
        tab->bins = NULL;
572
    else
573
      tab->bins = (st_index_t *) malloc(bins_size(tab));
574
    tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
575
					     * sizeof(st_table_entry));
576
#ifdef ST_DEBUG
577
    memset(tab->entries, ST_INIT_VAL_BYTE,
578
	   get_allocated_entries(tab) * sizeof(st_table_entry));
579
    if (tab->bins != NULL)
580
      memset(tab->bins, ST_INIT_VAL_BYTE, bins_size(tab));
581
#endif
582
    make_tab_empty(tab);
583
#ifdef ST_DEBUG
584
    st_check(tab);
585
#endif
586
    return tab;
238 587
}
239 588

  
240
st_table*
241
st_init_table(const struct st_hash_type *type)
242
{
589
/* Create and return table with TYPE which can hold a minimal number
590
   of entries (see comments for get_power2).  */
591
st_table *
592
st_init_table(const struct st_hash_type *type) {
243 593
    return st_init_table_with_size(type, 0);
244 594
}
245 595

  
246
st_table*
247
st_init_numtable(void)
248
{
596
/* Create and return table which can hold a minimal number of
597
   numbers.  */
598
st_table *
599
st_init_numtable(void) {
249 600
    return st_init_table(&type_numhash);
250 601
}
251 602

  
252
st_table*
253
st_init_numtable_with_size(st_index_t size)
254
{
603
/* Create and return table which can hold SIZE numbers.  */
604
st_table *
605
st_init_numtable_with_size(st_index_t size) {
255 606
    return st_init_table_with_size(&type_numhash, size);
256 607
}
257 608

  
258
st_table*
259
st_init_strtable(void)
260
{
609
/* Create and return table which can hold a minimal number of
610
   strings.  */
611
st_table *
612
st_init_strtable(void) {
261 613
    return st_init_table(&type_strhash);
262 614
}
263 615

  
264
st_table*
265
st_init_strtable_with_size(st_index_t size)
266
{
616
/* Create and return table which can hold SIZE strings.  */
617
st_table *
618
st_init_strtable_with_size(st_index_t size) {
267 619
    return st_init_table_with_size(&type_strhash, size);
268 620
}
269 621

  
270
st_table*
271
st_init_strcasetable(void)
272
{
622
/* Create and return table which can hold a minimal number of strings
623
   whose character case is ignored.  */
624
st_table *
625
st_init_strcasetable(void) {
273 626
    return st_init_table(&type_strcasehash);
274 627
}
275 628

  
276
st_table*
277
st_init_strcasetable_with_size(st_index_t size)
278
{
629
/* Create and return table which can hold SIZE strings whose character
630
   case is ignored.  */
631
st_table *
632
st_init_strcasetable_with_size(st_index_t size) {
279 633
    return st_init_table_with_size(&type_strcasehash, size);
280 634
}
281 635

  
636
/* Make table TAB empty.  */
282 637
void
283
st_clear(st_table *table)
284
{
285
    register st_table_entry *ptr = 0, *next;
286

  
287
    if (table->entries_packed) {
288
        table->num_entries = 0;
289
        table->real_entries = 0;
290
        return;
291
    }
292

  
293
    list_for_each_safe(st_head(table), ptr, next, olist) {
294
	/* list_del is not needed */
295
	st_free_entry(ptr);
296
    }
297
    table->num_entries = 0;
298
    MEMZERO(table->bins, st_table_entry*, table->num_bins);
299
    list_head_init(st_head(table));
638
st_clear(st_table *tab) {
639
    make_tab_empty(tab);
640
#ifdef ST_DEBUG
641
    st_check(tab);
642
#endif
300 643
}
301 644

  
645
/* Free table TAB space.  */
302 646
void
303
st_free_table(st_table *table)
304
{
305
    st_clear(table);
306
    st_free_bins(table->bins, table->num_bins);
307
    st_dealloc_table(table);
647
st_free_table(st_table *tab) {
648
    if (tab->bins != NULL)
649
        free(tab->bins);
650
    free(tab->entries);
651
    free(tab);
308 652
}
309 653

  
654
/* Return byte size of memory allocted for table TAB.  */
310 655
size_t
311
st_memsize(const st_table *table)
312
{
313
    if (table->entries_packed) {
314
	return table->num_bins * sizeof (void *) + sizeof(st_table);
315
    }
316
    else {
317
	return table->num_entries * sizeof(struct st_table_entry) + table->num_bins * sizeof (void *) + sizeof(st_table);
318
    }
656
st_memsize(const st_table *tab) {
657
    return(sizeof(st_table)
658
           + (tab->bins == NULL ? 0 : bins_size(tab))
659
           + get_allocated_entries(tab) * sizeof(st_table_entry));
319 660
}
320 661

  
321
#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
322
((ptr) != 0 && ((ptr)->hash != (hash_val) || !EQUAL((table), (key), (ptr))))
662
static st_index_t
663
find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
664

  
665
static st_index_t
666
find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
667

  
668
static st_index_t
669
find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
670

  
671
static st_index_t
672
find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
673
			       st_data_t key, st_index_t *bin_ind);
323 674

  
324 675
#ifdef HASH_LOG
325 676
static void
326
count_collision(const struct st_hash_type *type)
327
{
328
    collision.all++;
329
    if (type == &type_numhash) {
330
	collision.num++;
331
    }
677
count_collision(const struct st_hash_type *type) {
678
  collision.all++;
679
  if (type == &type_numhash) {
680
    collision.num++;
681
  }
332 682
    else if (type == &type_strhash) {
333
	collision.strcase++;
683
      collision.strcase++;
334 684
    }
335 685
    else if (type == &type_strcasehash) {
336
	collision.str++;
337
    }
686
    collision.str++;
687
  }
338 688
}
339
#define COLLISION (collision_check ? count_collision(table->type) : (void)0)
340
#define FOUND_ENTRY (collision_check ? collision.total++ : (void)0)
689

  
690
#define collision_check 1
691

  
692
#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
693
#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
341 694
#else
342 695
#define COLLISION
343
#define FOUND_ENTRY
696
#define FOUND_BIN
344 697
#endif
345 698

  
346
#define FIND_ENTRY(table, ptr, hash_val, bin_pos) \
347
    ((ptr) = find_entry((table), key, (hash_val), ((bin_pos) = hash_pos(hash_val, (table)->num_bins))))
699
/* If the number of entries in the table is at least REBUILD_THRESHOLD
700
   times less than the entry array length, decrease the table
701
   size.  */
702
#define REBUILD_THRESHOLD 4
348 703

  
349
static st_table_entry *
350
find_entry(const st_table *table, st_data_t key, st_index_t hash_val,
351
           st_index_t bin_pos)
352
{
353
    register st_table_entry *ptr = table->bins[bin_pos];
354
    FOUND_ENTRY;
355
    if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {
356
	COLLISION;
357
	while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {
358
	    ptr = ptr->next;
359
	}
360
	ptr = ptr->next;
361
    }
362
    return ptr;
363
}
364

  
365
static inline st_index_t
366
find_packed_index_from(const st_table *table, st_index_t hash_val,
367
		       st_data_t key, st_index_t i)
368
{
369
    while (i < table->real_entries &&
370
	   (PHASH(table, i) != hash_val || !EQUAL(table, key, &PACKED_ENT(table, i)))) {
371
	i++;
372
    }
373
    return i;
374
}
375

  
376
static inline st_index_t
377
find_packed_index(const st_table *table, st_index_t hash_val, st_data_t key)
378
{
379
    return find_packed_index_from(table, hash_val, key, 0);
380
}
381

  
382
#define collision_check 0
383

  
384
int
385
st_lookup(st_table *table, register st_data_t key, st_data_t *value)
386
{
387
    st_index_t hash_val;
388
    register st_table_entry *ptr;
704
#if REBUILD_THRESHOLD < 2
705
#error "REBUILD_THRESHOLD should be >= 2"
706
#endif
389 707

  
390
    hash_val = do_hash(key, table);
708
static int inside_table_rebuild_p = FALSE;
391 709

  
392
    if (table->entries_packed) {
393
	st_index_t i = find_packed_index(table, hash_val, key);
394
	if (i < table->real_entries) {
395
	    if (value != 0) *value = PVAL(table, i);
... This diff was truncated because it exceeds the maximum size that can be displayed.