Project

General

Profile

Feature #12142 » st-march31.patch

vmakarov (Vladimir Makarov), 04/01/2016 03:37 AM

View differences:

benchmark/bm_bighash.rb
h = {}; 20000000.times {|n| h[n] = n }
benchmark/bm_hash_aref_dsym.rb
h = {}
syms = ('a'..'z').map { |s| s.to_sym }
syms.each { |s| h[s] = 1 }
200_000.times { syms.each { |s| h[s] } }
400_000.times { syms.each { |s| h[s] } }
benchmark/bm_hash_aref_fix.rb
h = {}
nums = (1..26).to_a
nums.each { |i| h[i] = i }
200_000.times { nums.each { |s| h[s] } }
800_000.times { nums.each { |s| h[s] } }
benchmark/bm_hash_aref_flo.rb
h = {}
strs = [*1..10000].map! {|i| i.fdiv(10)}
strs.each { |s| h[s] = s }
50.times { strs.each { |s| h[s] } }
500.times { strs.each { |s| h[s] } }
benchmark/bm_hash_aref_miss.rb
strs = ('a'..'z').to_a.map!(&:freeze)
strs.each { |s| h[s] = s }
strs = ('A'..'Z').to_a
200_000.times { strs.each { |s| h[s] } }
500_000.times { strs.each { |s| h[s] } }
benchmark/bm_hash_aref_str.rb
h = {}
strs = ('a'..'z').to_a.map!(&:freeze)
strs.each { |s| h[s] = s }
200_000.times { strs.each { |s| h[s] } }
500_000.times { strs.each { |s| h[s] } }
benchmark/bm_hash_aref_sym.rb
syms.map!(&:to_sym)
end
syms.each { |s| h[s] = s }
200_000.times { syms.each { |s| h[s] } }
500_000.times { syms.each { |s| h[s] } }
benchmark/bm_hash_aref_sym_long.rb
syms.map!(&:to_sym)
end
syms.each { |s| h[s] = s }
200_000.times { syms.each { |s| h[s] } }
500_000.times { syms.each { |s| h[s] } }
benchmark/bm_hash_flatten.rb
h[i] = nil
end
1000.times do
2000.times do
h.flatten
end
benchmark/bm_hash_ident_flo.rb
h = {}.compare_by_identity
strs = (1..10000).to_a.map!(&:to_f)
strs.each { |s| h[s] = s }
50.times { strs.each { |s| h[s] } }
500.times { strs.each { |s| h[s] } }
benchmark/bm_hash_ident_num.rb
h = {}.compare_by_identity
nums = (1..26).to_a
nums.each { |n| h[n] = n }
200_000.times { nums.each { |n| h[n] } }
500_000.times { nums.each { |n| h[n] } }
benchmark/bm_hash_ident_obj.rb
h = {}.compare_by_identity
objs = 26.times.map { Object.new }
objs.each { |o| h[o] = o }
200_000.times { objs.each { |o| h[o] } }
500_000.times { objs.each { |o| h[o] } }
benchmark/bm_hash_ident_str.rb
h = {}.compare_by_identity
strs = ('a'..'z').to_a
strs.each { |s| h[s] = s }
200_000.times { strs.each { |s| h[s] } }
500_000.times { strs.each { |s| h[s] } }
benchmark/bm_hash_ident_sym.rb
h = {}.compare_by_identity
syms = ('a'..'z').to_a.map(&:to_sym)
syms.each { |s| h[s] = s }
200_000.times { syms.each { |s| h[s] } }
500_000.times { syms.each { |s| h[s] } }
benchmark/bm_hash_keys.rb
h[i] = nil
end
5000.times do
10000.times do
h.keys
end
benchmark/bm_hash_shift.rb
h[i] = nil
end
50000.times do
1000000.times do
k, v = h.shift
h[k] = v
end
benchmark/bm_hash_shift_u16.rb
h[i] = nil
end
300000.times do
1000000.times do
k, v = h.shift
h[k] = v
end
benchmark/bm_hash_shift_u24.rb
h[i] = nil
end
300000.times do
1000000.times do
k, v = h.shift
h[k] = v
end
benchmark/bm_hash_shift_u32.rb
h[i] = nil
end
300000.times do
1000000.times do
k, v = h.shift
h[k] = v
end
benchmark/bm_hash_small2.rb
1000000.times.map{|i| a={}; 2.times{|j| a[j]=j}; a}
benchmark/bm_hash_small4.rb
1000000.times.map{|i| a={}; 4.times{|j| a[j]=j}; a}
benchmark/bm_hash_small8.rb
1000000.times.map{|i| a={}; 8.times{|j| a[j]=j}; a}
benchmark/bm_hash_to_proc.rb
h[i] = nil
end
5000.times do |i|
500000.times do |i|
[i].map(&h)
end
benchmark/bm_hash_values.rb
h[i] = nil
end
5000.times do
10000.times do
h.values
end
ext/-test-/st/foreach/foreach.c
if (c->nr == 0) {
st_data_t i;
if (!c->tbl->entries_packed) rb_bug("should be packed\n");
if (c->tbl->bins == NULL) rb_bug("should be packed\n");
/* force unpacking during iteration: */
for (i = 1; i < expect_size; i++)
st_add_direct(c->tbl, i, i);
if (c->tbl->entries_packed) rb_bug("should be unpacked\n");
if (c->tbl->bins != NULL) rb_bug("should be unpacked\n");
}
if (key != c->nr) {
......
st_add_direct(tbl, 0, 0);
if (!tbl->entries_packed) rb_bug("should still be packed\n");
if (tbl->bins == NULL) rb_bug("should still be packed\n");
st_foreach_check(tbl, unp_fec_i, (st_data_t)&c, -1);
......
(VALUE)c.nr, (VALUE)expect_size);
}
if (tbl->entries_packed) rb_bug("should be unpacked\n");
if (tbl->bins != NULL) rb_bug("should be unpacked\n");
st_free_table(tbl);
......
st_add_direct(tbl, 0, 0);
if (!tbl->entries_packed) rb_bug("should still be packed\n");
if (tbl->bins == NULL) rb_bug("should still be packed\n");
st_foreach(tbl, unp_fe_i, (st_data_t)&c);
......
(VALUE)c.nr, (VALUE)expect_size);
}
if (tbl->entries_packed) rb_bug("should be unpacked\n");
if (tbl->bins != NULL) rb_bug("should be unpacked\n");
st_free_table(tbl);
hash.c
long rb_objid_hash(st_index_t index);
static st_index_t rb_num_hash_start(st_index_t n);
static st_index_t
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
obj_any_hash(VALUE obj)
{
obj = rb_hash(obj);
return FIX2LONG(obj);
}
/* Prime number (79087987342985798987987) mod 32/64 used for hash
calculations. */
#if SIZEOF_INT == SIZEOF_VOIDP
static const st_index_t jauquet_prime_mod = 2053222611; /* mod 32 */
#else
static const st_index_t jauquet_prime_mod = 6795498992951210195ULL; /* mod 64 */
#endif
static inline st_index_t
any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
{
VALUE hval;
st_index_t hnum;
......
hnum = rb_objid_hash((st_index_t)a);
}
else if (BUILTIN_TYPE(a) == T_STRING) {
hnum = rb_str_hash(a);
hnum = (strong_p
? rb_str_hash(a)
: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), jauquet_prime_mod));
}
else if (BUILTIN_TYPE(a) == T_SYMBOL) {
hnum = RSYMBOL(a)->hashval;
......
}
static st_index_t
obj_any_hash(VALUE obj)
{
obj = rb_hash(obj);
return FIX2LONG(obj);
any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) {
return any_hash_general(a, FALSE, other_func);
}
static st_index_t
rb_any_hash_weak(VALUE a) {
return any_hash_weak(a, obj_any_hash);
}
static st_index_t
any_hash(VALUE a, st_index_t (*other_func)(VALUE)) {
return any_hash_general(a, TRUE, other_func);
}
static st_index_t
......
static st_index_t
rb_num_hash_start(st_index_t n)
{
/*
* This hash function is lightly-tuned for Ruby. Further tuning
* should be possible. Notes:
*
* - (n >> 3) alone is great for heap objects and OK for fixnum,
* however symbols perform poorly.
* - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
* n.b.: +3 to remove most ID scope, +1 worked well initially, too
* n.b.: +1 (instead of 3) worked well initially, too
* - (n << 16) was finally added to avoid losing bits for fixnums
* - avoid expensive modulo instructions, it is currently only
* shifts and bitmask operations.
*/
return (n >> (RUBY_SPECIAL_SHIFT + 3) ^ (n << 16)) ^ (n >> 3);
/* Use a simple multiplicative hashing. It is not high quality
hash function but it is very fast and we can spent the saved
time on collission processing. Mix low and high bits not to
ignore high bits after the multiplication. */
#if SIZEOF_INT == SIZEOF_VOIDP
return ((n >> 16) ^ (n & 0xffff)) * jauquet_prime_mod;
#else
return ((n >> 32) ^ (n & 0xffffffff)) * jauquet_prime_mod;
#endif
}
long
rb_objid_hash(st_index_t index)
{
st_index_t hnum = rb_num_hash_start(index);
hnum = rb_hash_start(hnum);
hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
hnum = rb_hash_end(hnum);
return hnum;
return rb_num_hash_start(index);
}
static st_index_t
......
static const struct st_hash_type objhash = {
rb_any_cmp,
rb_any_hash_weak,
rb_any_hash,
};
include/ruby/st.h
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
/* This is a public domain general purpose hash table package
originally written by Peter Moore @ UCB.
/* @(#) st.h 5.1 89/12/14 */
The hash table data strutures were redesigned and the package was
rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
#ifndef RUBY_ST_H
#define RUBY_ST_H 1
......
typedef struct st_table st_table;
typedef st_data_t st_index_t;
/* Maximal value of unsigned integer type st_index_t. */
#define MAX_ST_INDEX_VAL (~(st_index_t) 0)
typedef int st_compare_func(st_data_t, st_data_t);
typedef st_index_t st_hash_func(st_data_t);
......
struct st_hash_type {
int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */
/* The following is an optional func for stronger hash. When we
have many different keys with the same hash we can switch to
use it to prevent a denial attack with usage of hash table
collisions. */
st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
};
#define ST_INDEX_BITS (sizeof(st_index_t) * CHAR_BIT)
......
# define ST_DATA_COMPATIBLE_P(type) 0
#endif
typedef struct st_table_entry st_table_entry;
struct st_table_entry; /* defined in st.c */
struct st_table {
/* Cached features of the table -- see st.c for more details. */
unsigned char entry_power, bin_power, size_ind;
/* How many times the table was rebuilt. */
unsigned int rebuilds_num;
/* Currently used hash function. */
st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
const struct st_hash_type *type;
st_index_t num_bins;
unsigned int entries_packed : 1;
#ifdef __GNUC__
/*
* C spec says,
* A bit-field shall have a type that is a qualified or unqualified
* version of _Bool, signed int, unsigned int, or some other
* implementation-defined type. It is implementation-defined whether
* atomic types are permitted.
* In short, long and long long bit-field are implementation-defined
* feature. Therefore we want to suppress a warning explicitly.
*/
__extension__
#endif
st_index_t num_entries : ST_INDEX_BITS - 1;
union {
struct {
struct st_table_entry **bins;
void *private_list_head[2];
} big;
struct {
struct st_packed_entry *entries;
st_index_t real_entries;
} packed;
} as;
/* Number of entries currently in the table. */
st_index_t num_entries;
/* Array of bins used for access by keys. */
st_index_t *bins;
/* Start and bound index of entries in array entries.
entries_starts and entries_bound are in interval
[0,allocated_entries]. */
st_index_t entries_start, entries_bound;
/* Array of size 2^entry_power. */
st_table_entry *entries;
};
#define st_is_member(table,key) st_lookup((table),(key),(st_data_t *)0)
......
int st_update(st_table *table, st_data_t key, st_update_callback_func *func, st_data_t arg);
int st_foreach(st_table *, int (*)(ANYARGS), st_data_t);
int st_foreach_check(st_table *, int (*)(ANYARGS), st_data_t, st_data_t);
int st_reverse_foreach(st_table *, int (*)(ANYARGS), st_data_t);
st_index_t st_keys(st_table *table, st_data_t *keys, st_index_t size);
st_index_t st_keys_check(st_table *table, st_data_t *keys, st_index_t size, st_data_t never);
st_index_t st_values(st_table *table, st_data_t *values, st_index_t size);
st_index_t st_values_check(st_table *table, st_data_t *values, st_index_t size, st_data_t never);
void st_add_direct(st_table *, st_data_t, st_data_t);
void st_free_table(st_table *);
size_t st_memsize(const st_table *);
void st_cleanup_safe(st_table *, st_data_t);
void st_clear(st_table *);
st_table *st_copy(st_table *);
......
int st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n);
#define st_strcasecmp st_locale_insensitive_strcasecmp
#define st_strncasecmp st_locale_insensitive_strncasecmp
size_t st_memsize(const st_table *);
st_index_t st_hash(const void *ptr, size_t len, st_index_t h);
st_index_t st_hash_uint32(st_index_t h, uint32_t i);
st_index_t st_hash_uint(st_index_t h, st_index_t i);
......
st_index_t st_hash_start(st_index_t h);
#define st_hash_start(h) ((st_index_t)(h))
st_index_t st_hash_index(st_index_t k);
st_index_t st_hash_double(double d);
RUBY_SYMBOL_EXPORT_END
#if defined(__cplusplus)
st.c
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
/* This is a public domain general purpose hash table package
originally written by Peter Moore @ UCB.
The hash table data structures were redesigned and the package was
rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
/* The original package implemented classic bucket-based hash tables
with entries doubly linked for an access by their insertion order.
To decrease pointer chasing and as a consequence to improve a data
locality the current implementation is based on storing entries in
an array and using hash tables with open addressing. The current
entries are more compact in comparison with the original ones and
this also improves the data locality.
The hash table has two arrays called *bins* and *entries*.
bins:
-------
| | entries array:
|-------| --------------------------------
| index | | | entry: | | |
|-------| | | | | |
| ... | | ... | hash | ... | ... |
|-------| | | key | | |
| empty | | | record | | |
|-------| --------------------------------
| ... | ^ ^
|-------| |_ entries start |_ entries bound
|deleted|
-------
o The entry array contains table entries in the same order as they
were inserted.
When the first entry is deleted, a variable containing index of
the current first entry (*entries start*) is changed. In all
other cases of the deletion, we just mark the entry as deleted by
using a reserved hash value.
Such organization of the entry storage makes operations of the
table shift and the entries traversal very fast.
o The bins provide access to the entries by their keys. The
key hash is mapped to a bin containing *index* of the
corresponding entry in the entry array.
The bin array size is always power of two, it makes mapping very
fast by using the corresponding lower bits of the hash.
Generally it is not a good idea to ignore some part of the hash.
But alternative approach is worse. For example, we could use a
modulo operation for mapping and a prime number for the size of
the bin array. Unfortunately, the modulo operation for big
64-bit numbers are extremely slow (it takes more than 100 cycles
on modern Intel CPUs).
Still other bits of the hash value are used when the mapping
results in a collision. In this case we use a secondary hash
value which is a result of a function of the collision bin
index and the original hash value. The function choice
guarantees that we can traverse all bins and finally find the
corresponding bin as after several iterations the function
becomes a full cycle linear congruential generator because it
satisfies requirements of the Hull-Dobell theorem.
When an entry is removed from the table besides marking the
hash in the corresponding entry described above, we also mark
the bin by a special value in order to find entries which had
a collision with the removed entries.
There are two reserved values for the bins. One denotes an
empty bin, another one denotes a bin for a deleted entry.
o The length of the bin array is at least two times more than the
entry array length. This keeps the table load factor healthy.
The trigger of rebuilding the table is always a case when we can
not insert an entry anymore at the entries bound. We could
change the entries bound too in case of deletion but than we need
a special code to count bins with corresponding deleted entries
and reset the bin values when there are too many bins
corresponding deleted entries
Table rebuilding is done by creation of a new entry array and
bins of an appropriate size. We also try to reuse the arrays
in some cases by compacting the array and removing deleted
entries.
o To save memory very small tables have no allocated arrays
bins. We use a linear search for an access by a key.
o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
bins depending on the current hash table size.
This implementation speeds up the Ruby hash table benchmarks in
average by more 40% on Intel Haswell CPU.
*/
#ifdef NOT_RUBY
#include "regint.h"
......
#include <stdlib.h>
#endif
#include <string.h>
#include "ccan/list/list.h"
#include <assert.h>
#ifdef __GNUC__
#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
#define EXPECT(expr, val) __builtin_expect(expr, val)
#define ATTRIBUTE_UNUSED __attribute__((unused))
#else
#define PREFETCH(addr, write_p)
#define EXPECT(expr, val) (expr)
#define ATTRIBUTE_UNUSED
#endif
typedef struct st_table_entry st_table_entry;
#ifdef ST_DEBUG
#define st_assert(cond) assert(cond)
#else
#define st_assert(cond) ((void)(0 && (cond)))
#endif
/* The type of hashes. */
typedef st_index_t st_hash_t;
struct st_table_entry {
st_index_t hash;
st_hash_t hash;
st_data_t key;
st_data_t record;
st_table_entry *next;
struct list_node olist;
};
typedef struct st_packed_entry {
st_index_t hash;
st_data_t key, val;
} st_packed_entry;
#ifndef STATIC_ASSERT
#define STATIC_ASSERT(name, expr) typedef int static_assert_##name##_check[(expr) ? 1 : -1]
#endif
#define ST_DEFAULT_MAX_DENSITY 5
#define ST_DEFAULT_INIT_TABLE_SIZE 16
#define ST_DEFAULT_PACKED_TABLE_SIZE 18
#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
STATIC_ASSERT(st_packed_entry, sizeof(st_packed_entry) == sizeof(st_table_entry*[PACKED_UNIT]));
STATIC_ASSERT(st_packed_bins, sizeof(st_packed_entry[MAX_PACKED_HASH]) <= sizeof(st_table_entry*[ST_DEFAULT_PACKED_TABLE_SIZE]));
/*
* DEFAULT_MAX_DENSITY is the default for the largest we allow the
* average number of items per bin before increasing the number of
* bins
*
* DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
* allocated initially
*
*/
#define type_numhash st_hashtype_num
const struct st_hash_type st_hashtype_num = {
st_numcmp,
......
strcasehash,
};
static void rehash(st_table *);
/* Value used to catch uninitialized entries/bins during debugging.
There is a possibility for a false alarm, but its probability is
extremely small. */
#define ST_INIT_VAL 0xafafafafafafafaf
#define ST_INIT_VAL_BYTE 0xafa
#ifdef RUBY
#undef malloc
#undef realloc
#undef calloc
#undef free
#define malloc xmalloc
#define calloc xcalloc
#define realloc xrealloc
#define free(x) xfree(x)
#define malloc ruby_xmalloc
#define calloc ruby_xcalloc
#define realloc ruby_xrealloc
#define free ruby_xfree
#endif
#define EQUAL(table,x,ent) ((x)==(ent)->key || (*(table)->type->compare)((x),(ent)->key) == 0)
#include <stdlib.h>
#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
#define hash_pos(h,n) ((h) & (n - 1))
#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
#define PTR_EQUAL(tab, ptr, hash_val, key) \
((ptr)->hash == (hash_val) && EQUAL((tab), (key), (ptr)->key))
/* Features of a table. */
struct st_features {
/* Power of 2 used for number of allocated entries. */
unsigned char entry_power;
/* Power of 2 used for number of allocated bins. Depending on the
table size, the number of bins is 2-4 times more than the
number of entries. */
unsigned char bin_power;
/* Enumeration of sizes of bins (8-bit, 16-bit etc). */
unsigned char size_ind;
/* Bins are packed in words of type st_index_t. The following is
a size of bins counted by words. */
st_index_t bins_words;
};
/* preparation for possible allocation improvements */
#define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry))
#define st_free_entry(entry) free(entry)
#define st_alloc_table() (st_table *)malloc(sizeof(st_table))
#define st_dealloc_table(table) free(table)
#define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *))
#define st_free_bins(bins, size) free(bins)
static inline st_table_entry**
st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
{
bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *));
MEMZERO(bins, st_table_entry*, newsize);
return bins;
}
/* Shortcut */
#define bins as.big.bins
#define real_entries as.packed.real_entries
/* preparation for possible packing improvements */
#define PACKED_BINS(table) ((table)->as.packed.entries)
#define PACKED_ENT(table, i) PACKED_BINS(table)[i]
#define PKEY(table, i) PACKED_ENT((table), (i)).key
#define PVAL(table, i) PACKED_ENT((table), (i)).val
#define PHASH(table, i) PACKED_ENT((table), (i)).hash
#define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v))
#define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v))
#define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v))
/* this function depends much on packed layout, so that it placed here */
static inline void
remove_packed_entry(st_table *table, st_index_t i)
{
table->real_entries--;
table->num_entries--;
if (i < table->real_entries) {
MEMMOVE(&PACKED_ENT(table, i), &PACKED_ENT(table, i+1),
st_packed_entry, table->real_entries - i);
}
}
/* Features of all possible size tables. */
#if SIZEOF_ST_INDEX_T == 8
#define MAX_POWER2 62
struct st_features features[] = {
{0, 2, 0, 0x0},
{1, 3, 0, 0x1},
{2, 4, 0, 0x2},
{3, 5, 0, 0x4},
{4, 6, 0, 0x8},
{5, 7, 0, 0x10},
{6, 8, 0, 0x20},
{7, 9, 0, 0x40},
{8, 10, 1, 0x100},
{9, 11, 1, 0x200},
{10, 12, 1, 0x400},
{11, 13, 1, 0x800},
{12, 14, 1, 0x1000},
{13, 15, 1, 0x2000},
{14, 16, 1, 0x4000},
{15, 17, 1, 0x8000},
{16, 18, 2, 0x20000},
{17, 19, 2, 0x40000},
{18, 20, 2, 0x80000},
{19, 21, 2, 0x100000},
{20, 22, 2, 0x200000},
{21, 23, 2, 0x400000},
{22, 24, 2, 0x800000},
{23, 25, 2, 0x1000000},
{24, 26, 2, 0x2000000},
{25, 27, 2, 0x4000000},
{26, 28, 2, 0x8000000},
{27, 29, 2, 0x10000000},
{28, 30, 2, 0x20000000},
{29, 31, 2, 0x40000000},
{30, 32, 2, 0x80000000},
{31, 33, 2, 0x100000000},
{32, 33, 3, 0x200000000},
{33, 34, 3, 0x400000000},
{34, 35, 3, 0x800000000},
{35, 36, 3, 0x1000000000},
{36, 37, 3, 0x2000000000},
{37, 38, 3, 0x4000000000},
{38, 39, 3, 0x8000000000},
{39, 40, 3, 0x10000000000},
{40, 41, 3, 0x20000000000},
{41, 42, 3, 0x40000000000},
{42, 43, 3, 0x80000000000},
{43, 44, 3, 0x100000000000},
{44, 45, 3, 0x200000000000},
{45, 46, 3, 0x400000000000},
{46, 47, 3, 0x800000000000},
{47, 48, 3, 0x1000000000000},
{48, 49, 3, 0x2000000000000},
{49, 50, 3, 0x4000000000000},
{50, 51, 3, 0x8000000000000},
{51, 52, 3, 0x10000000000000},
{52, 53, 3, 0x20000000000000},
{53, 54, 3, 0x40000000000000},
{54, 55, 3, 0x80000000000000},
{55, 56, 3, 0x100000000000000},
{56, 57, 3, 0x200000000000000},
{57, 58, 3, 0x400000000000000},
{58, 59, 3, 0x800000000000000},
{59, 60, 3, 0x1000000000000000},
{60, 61, 3, 0x2000000000000000},
{61, 62, 3, 0x4000000000000000},
{62, 63, 3, 0x8000000000000000},
};
static inline void
remove_safe_packed_entry(st_table *table, st_index_t i, st_data_t never)
{
table->num_entries--;
PKEY_SET(table, i, never);
PVAL_SET(table, i, never);
PHASH_SET(table, i, 0);
}
#else
#define MAX_POWER2 30
struct st_features features[] = {
{0, 2, 0, 0x1},
{1, 3, 0, 0x2},
{2, 4, 0, 0x4},
{3, 5, 0, 0x8},
{4, 6, 0, 0x10},
{5, 7, 0, 0x20},
{6, 8, 0, 0x40},
{7, 9, 0, 0x80},
{8, 10, 1, 0x200},
{9, 11, 1, 0x400},
{10, 12, 1, 0x800},
{11, 13, 1, 0x1000},
{12, 14, 1, 0x2000},
{13, 15, 1, 0x4000},
{14, 16, 1, 0x8000},
{15, 17, 1, 0x10000},
{16, 17, 2, 0x20000},
{17, 18, 2, 0x40000},
{18, 19, 2, 0x80000},
{19, 20, 2, 0x100000},
{20, 21, 2, 0x200000},
{21, 22, 2, 0x400000},
{22, 23, 2, 0x800000},
{23, 24, 2, 0x1000000},
{24, 25, 2, 0x2000000},
{25, 26, 2, 0x4000000},
{26, 27, 2, 0x8000000},
{27, 28, 2, 0x10000000},
{28, 29, 2, 0x20000000},
{29, 30, 2, 0x40000000},
{30, 31, 2, 0x80000000},
};
static st_index_t
next_pow2(st_index_t x)
{
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
#if SIZEOF_ST_INDEX_T == 8
x |= x >> 32;
#endif
return x + 1;
/* The reserved hash value and its substitution. */
#define RESERVED_HASH_VAL (~(st_hash_t) 0)
#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
/* Return hash value of KEY for table TAB. */
static inline st_hash_t
do_hash(st_data_t key, st_table *tab) {
st_index_t h = (st_index_t)(tab->curr_hash)(key);
#if SIZEOF_INT == SIZEOF_VOIDP
st_hash_t hash = h;
#else
st_hash_t hash = h;
#endif
/* RESERVED_HASH_VAL is used for a deleted entry. Map it into
another value. Such mapping should be extremely rare. */
return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
}
static st_index_t
new_size(st_index_t size)
{
st_index_t n;
/* Power of 2 defining the minimal number of allocated entries. */
#define MINIMAL_POWER2 3
#if MINIMAL_POWER2 < 2
#error "MINIMAL_POWER2 should be >= 2"
#endif
/* If the power2 of the allocated `entries` is less than the following
value, don't allocate bins and use a linear search. */
#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 3
if (size && (size & ~(size - 1)) == size) /* already a power-of-two? */
return size;
/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
static int
get_power2(st_index_t size) {
unsigned int n;
n = next_pow2(size);
if (n > size)
return n;
for (n = 0; size != 0; n++)
size >>= 1;
if (n <= MAX_POWER2)
return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
#ifndef NOT_RUBY
/* Ran out of the table entries */
rb_raise(rb_eRuntimeError, "st_table too big");
#endif
return -1; /* should raise exception */
/* should raise exception */
return -1;
}
/* Return value of N-th bin in array BINS of table with bins size
index S. */
static inline st_index_t
get_bin(st_index_t *bins, int s, st_index_t n) {
return (s == 0 ? ((unsigned char *) bins)[n]
: s == 1 ? ((unsigned short *) bins)[n]
: s == 2 ? ((unsigned int *) bins)[n]
: ((st_index_t *) bins)[n]);
}
/* Set up N-th bin in array BINS of table with bins size index S to
value V. */
static inline void
set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v) {
if (s == 0) ((unsigned char *) bins)[n] = v;
else if (s == 1) ((unsigned short *) bins)[n] = v;
else if (s == 2) ((unsigned int *) bins)[n] = v;
else ((st_index_t *) bins)[n] = v;
}
/* These macros define reserved values for empty table bin and table
bin which contains a deleted entry. We will never use such values
for an entry index in bins. */
#define EMPTY_BIN 0
#define DELETED_BIN 1
/* Base of a real entry index in the bins. */
#define ENTRY_BASE 2
/* Mark I-th bin of table TAB as empty, in other words not
corresponding to any entry. */
#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
/* Values used for not found entry and bin with given
characteristics. */
#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
#define UNDEFINED_BIN_IND (~(st_index_t) 0)
/* Mark I-th bin of table TAB as corresponding to a deleted table
entry. Update number of entries in the table and number of bins
corresponding to deleted entries. */
#define MARK_BIN_DELETED(tab, i) \
do { \
st_assert (i != UNDEFINED_BIN_IND); \
st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i)); \
set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
} while (0)
/* Macros to check that value B is used empty bins and bins
corresponding deleted entries. */
#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
/* Macros to check empty bins and bins corresponding to deleted
entries. Bins are given by their index I in table TAB. */
#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
/* Macros for marking and checking deleted entries given by their
pointer E_PTR. */
#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
/* Return bin size index of table TAB. */
static inline st_index_t
get_size_ind(const st_table *tab) {
return tab->size_ind;
}
/* Return the number of allocated bins of table TAB. */
static inline st_index_t
get_bins_num(const st_table *tab) {
return 1<<tab->bin_power;
}
/* Return mask for a bin index in table TAB. */
static inline st_index_t
bins_mask(const st_table *tab) {
return get_bins_num(tab) - 1;
}
/* Return the index of table TAB bin corresponding to
HASH_VALUE. */
static inline st_index_t
hash_bin(st_hash_t hash_value, st_table *tab) {
return hash_value & bins_mask(tab);
}
/* Return the number of allocated entries of table TAB. */
static inline st_index_t
get_allocated_entries (const st_table *tab) {
return 1<<tab->entry_power;
}
/* Return size of the allocated bins of table TAB. */
static inline st_index_t
bins_size(const st_table *tab) {
return features[tab->entry_power].bins_words * sizeof (st_index_t);
}
/* Mark all bins of table TAB as empty. */
static void
initialize_bins(st_table *tab) {
memset(tab->bins, 0, bins_size(tab));
}
/* Make table TAB empty. */
static void
make_tab_empty(st_table *tab)
{
tab->curr_hash = tab->type->hash;
tab->num_entries = 0;
tab->rebuilds_num = 0;
tab->entries_start = tab->entries_bound = 0;
if (tab->bins != NULL)
initialize_bins(tab);
}
#ifdef ST_DEBUG
/* Check the table T consistency. It can be extremely slow. So use
it only for debugging. */
static void
st_check(st_table *tab) {
st_index_t d, e, i, n, p;
for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1)
;
p = i;
assert (p >= MINIMAL_POWER2);
assert (tab->entries_bound <= get_allocated_entries(tab)
&& tab->entries_start <= tab->entries_bound);
n = 0;
return;
if (tab->entries_bound != 0)
for (i = tab->entries_start; i < tab->entries_bound; i++) {
assert (tab->entries[i].hash != (st_hash_t) ST_INIT_VAL
&& tab->entries[i].key != ST_INIT_VAL
&& tab->entries[i].record != ST_INIT_VAL);
if (! DELETED_ENTRY_P(&tab->entries[i]))
n++;
}
assert (n == tab->num_entries);
if (tab->bins == NULL)
assert (p <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
else {
assert (p > MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
for (n = d = i = 0; i < get_bins_num(tab); i++) {
assert (get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL);
if (IND_DELETED_BIN_P(tab, i)) {
d++;
continue;
}
else if (IND_EMPTY_BIN_P(tab, i))
continue;
n++;
e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE;
assert (tab->entries_start <= e && e < tab->entries_bound);
assert (! DELETED_ENTRY_P(&tab->entries[e]));
assert (tab->entries[e].hash != (st_hash_tab) ST_INIT_VAL
&& tab->entries[e].key != ST_INIT_VAL
&& tab->entries[e].record != ST_INIT_VAL);
}
assert (n == tab->num_entries);
}
}
#endif
#ifdef HASH_LOG
#ifdef HAVE_UNISTD_H
......
static struct {
int all, total, num, str, strcase;
} collision;
/* Flag switching off output of package statistics at the end of
program. */
static int init_st = 0;
/* Output overall number of table searches and collisions into a
temporary file. */
static void
stat_col(void)
{
char fname[10+sizeof(long)*3];
FILE *f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
((double)collision.all / (collision.total)) * 100);
((double)collision.all / (collision.total)) * 100);
fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
fclose(f);
}
#endif
static struct list_head *
st_head(const st_table *tbl)
{
uintptr_t addr = (uintptr_t)&tbl->as.big.private_list_head;
return (struct list_head *)addr;
}
st_table*
st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
{
st_table *tbl;
/* Create and return table with TYPE which can hold at least SIZE
entries. The real number of entries which the table can hold is
the nearest power of two for SIZE. */
st_table *
st_init_table_with_size(const struct st_hash_type *type, st_index_t size) {
st_table *tab;
int n;
#ifdef HASH_LOG
# if HASH_LOG+0 < 0
#if HASH_LOG+0 < 0
{
const char *e = getenv("ST_HASH_LOG");
if (!e || !*e) init_st = 1;
const char *e = getenv("ST_HASH_LOG");
if (!e || !*e) init_st = 1;
}
# endif
#endif
if (init_st == 0) {
init_st = 1;
atexit(stat_col);
init_st = 1;
atexit(stat_col);
}
#endif
tbl = st_alloc_table();
tbl->type = type;
tbl->num_entries = 0;
tbl->entries_packed = size <= MAX_PACKED_HASH;
if (tbl->entries_packed) {
size = ST_DEFAULT_PACKED_TABLE_SIZE;
tbl->real_entries = 0;
}
else {
size = new_size(size); /* round up to power-of-two */
list_head_init(st_head(tbl));
}
tbl->num_bins = size;
tbl->bins = st_alloc_bins(size);
return tbl;
n = get_power2(size);
tab = (st_table *) malloc(sizeof (st_table));
tab->type = type;
tab->entry_power = n;
tab->bin_power = features[n].bin_power;
tab->size_ind = features[n].size_ind;
if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
tab->bins = NULL;
else
tab->bins = (st_index_t *) malloc(bins_size(tab));
tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
* sizeof(st_table_entry));
#ifdef ST_DEBUG
memset(tab->entries, ST_INIT_VAL_BYTE,
get_allocated_entries(tab) * sizeof(st_table_entry));
if (tab->bins != NULL)
memset(tab->bins, ST_INIT_VAL_BYTE, bins_size(tab));
#endif
make_tab_empty(tab);
#ifdef ST_DEBUG
st_check(tab);
#endif
return tab;
}
st_table*
st_init_table(const struct st_hash_type *type)
{
/* Create and return table with TYPE which can hold a minimal number
of entries (see comments for get_power2). */
st_table *
st_init_table(const struct st_hash_type *type) {
return st_init_table_with_size(type, 0);
}
st_table*
st_init_numtable(void)
{
/* Create and return table which can hold a minimal number of
numbers. */
st_table *
st_init_numtable(void) {
return st_init_table(&type_numhash);
}
st_table*
st_init_numtable_with_size(st_index_t size)
{
/* Create and return table which can hold SIZE numbers. */
st_table *
st_init_numtable_with_size(st_index_t size) {
return st_init_table_with_size(&type_numhash, size);
}
st_table*
st_init_strtable(void)
{
/* Create and return table which can hold a minimal number of
strings. */
st_table *
st_init_strtable(void) {
return st_init_table(&type_strhash);
}
st_table*
st_init_strtable_with_size(st_index_t size)
{
/* Create and return table which can hold SIZE strings. */
st_table *
st_init_strtable_with_size(st_index_t size) {
return st_init_table_with_size(&type_strhash, size);
}
st_table*
st_init_strcasetable(void)
{
/* Create and return table which can hold a minimal number of strings
whose character case is ignored. */
st_table *
st_init_strcasetable(void) {
return st_init_table(&type_strcasehash);
}
st_table*
st_init_strcasetable_with_size(st_index_t size)
{
/* Create and return table which can hold SIZE strings whose character
case is ignored. */
st_table *
st_init_strcasetable_with_size(st_index_t size) {
return st_init_table_with_size(&type_strcasehash, size);
}
/* Make table TAB empty. */
void
st_clear(st_table *table)
{
register st_table_entry *ptr = 0, *next;
if (table->entries_packed) {
table->num_entries = 0;
table->real_entries = 0;
return;
}
list_for_each_safe(st_head(table), ptr, next, olist) {
/* list_del is not needed */
st_free_entry(ptr);
}
table->num_entries = 0;
MEMZERO(table->bins, st_table_entry*, table->num_bins);
list_head_init(st_head(table));
st_clear(st_table *tab) {
make_tab_empty(tab);
#ifdef ST_DEBUG
st_check(tab);
#endif
}
/* Free table TAB space. */
void
st_free_table(st_table *table)
{
st_clear(table);
st_free_bins(table->bins, table->num_bins);
st_dealloc_table(table);
st_free_table(st_table *tab) {
if (tab->bins != NULL)
free(tab->bins);
free(tab->entries);
free(tab);
}
/* Return byte size of memory allocted for table TAB. */
size_t
st_memsize(const st_table *table)
{
if (table->entries_packed) {
return table->num_bins * sizeof (void *) + sizeof(st_table);
}
else {
return table->num_entries * sizeof(struct st_table_entry) + table->num_bins * sizeof (void *) + sizeof(st_table);
}
st_memsize(const st_table *tab) {
return(sizeof(st_table)
+ (tab->bins == NULL ? 0 : bins_size(tab))
+ get_allocated_entries(tab) * sizeof(st_table_entry));
}
#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
((ptr) != 0 && ((ptr)->hash != (hash_val) || !EQUAL((table), (key), (ptr))))
static st_index_t
find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
static st_index_t
find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
static st_index_t
find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
static st_index_t
find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
st_data_t key, st_index_t *bin_ind);
#ifdef HASH_LOG
static void
count_collision(const struct st_hash_type *type)
{
collision.all++;
if (type == &type_numhash) {
collision.num++;
}
count_collision(const struct st_hash_type *type) {
collision.all++;
if (type == &type_numhash) {
collision.num++;
}
else if (type == &type_strhash) {
collision.strcase++;
collision.strcase++;
}
else if (type == &type_strcasehash) {
collision.str++;
}
collision.str++;
}
}
#define COLLISION (collision_check ? count_collision(table->type) : (void)0)
#define FOUND_ENTRY (collision_check ? collision.total++ : (void)0)
#define collision_check 1
#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
#else
#define COLLISION
#define FOUND_ENTRY
#define FOUND_BIN
#endif
#define FIND_ENTRY(table, ptr, hash_val, bin_pos) \
((ptr) = find_entry((table), key, (hash_val), ((bin_pos) = hash_pos(hash_val, (table)->num_bins))))
/* If the number of entries in the table is at least REBUILD_THRESHOLD
times less than the entry array length, decrease the table
size. */
#define REBUILD_THRESHOLD 4
static st_table_entry *
find_entry(const st_table *table, st_data_t key, st_index_t hash_val,
st_index_t bin_pos)
{
register st_table_entry *ptr = table->bins[bin_pos];
FOUND_ENTRY;
if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {
COLLISION;
while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {
ptr = ptr->next;
}
ptr = ptr->next;
}
return ptr;
}
static inline st_index_t
find_packed_index_from(const st_table *table, st_index_t hash_val,
st_data_t key, st_index_t i)
{
while (i < table->real_entries &&
(PHASH(table, i) != hash_val || !EQUAL(table, key, &PACKED_ENT(table, i)))) {
i++;
}
return i;
}
static inline st_index_t
find_packed_index(const st_table *table, st_index_t hash_val, st_data_t key)
{
return find_packed_index_from(table, hash_val, key, 0);
}
#define collision_check 0
int
st_lookup(st_table *table, register st_data_t key, st_data_t *value)
{
st_index_t hash_val;
register st_table_entry *ptr;
#if REBUILD_THRESHOLD < 2
#error "REBUILD_THRESHOLD should be >= 2"
#endif
hash_val = do_hash(key, table);
static int inside_table_rebuild_p = FALSE;
if (table->entries_packed) {
st_index_t i = find_packed_index(table, hash_val, key);
if (i < table->real_entries) {
if (value != 0) *value = PVAL(table, i);
... This diff was truncated because it exceeds the maximum size that can be displayed.
(1-1/12)