Project

General

Profile

Feature #12142 » strong_hash.patch

vmakarov (Vladimir Makarov), 04/29/2016 09:55 PM

View differences:

hash.c
long rb_objid_hash(st_index_t index);
#if SIZEOF_INT == SIZEOF_VOIDP
static const st_index_t str_seed = 0xfa835867;
#else
static const st_index_t str_seed = 0xc42b5e2e6480b23bULL;
#endif
static inline st_index_t
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
{
VALUE hval;
st_index_t hnum;
......
hnum = rb_objid_hash((st_index_t)a);
}
else if (BUILTIN_TYPE(a) == T_STRING) {
hnum = rb_str_hash(a);
hnum = (strong_p
? rb_str_hash(a)
: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed));
}
else if (BUILTIN_TYPE(a) == T_SYMBOL) {
hnum = RSYMBOL(a)->hashval;
......
return FIX2LONG(obj);
}
static inline st_index_t
any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) {
return any_hash_general(a, FALSE, other_func);
}
static st_index_t
rb_any_hash(VALUE a)
{
rb_any_hash_weak(VALUE a) {
return any_hash_weak(a, obj_any_hash);
}
static inline st_index_t
any_hash(VALUE a, st_index_t (*other_func)(VALUE)) {
return any_hash_general(a, TRUE, other_func);
}
static st_index_t
rb_any_hash(VALUE a) {
return any_hash(a, obj_any_hash);
}
......
static const struct st_hash_type objhash = {
rb_any_cmp,
rb_any_hash_weak,
rb_any_hash,
};
include/ruby/st.h
struct st_hash_type {
int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */
/* The following is an optional func for stronger hash. When we
have many different keys with the same hash we can switch to
use it to prevent a denial attack with usage of hash table
collisions. */
st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
};
#if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
......
struct st_table {
/* Cached features of the table -- see st.c for more details. */
unsigned char entry_power, bin_power, size_ind;
/* True when we are rebuilding the table. */
unsigned char inside_rebuild_p;
/* How many times the table was rebuilt. */
unsigned int rebuilds_num;
/* Currently used hash function. */
st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
const struct st_hash_type *type;
/* Number of entries currently in the table. */
st_index_t num_entries;
st.c
/* Return hash value of KEY for table TAB. */
static inline st_hash_t
do_hash(st_data_t key, st_table *tab) {
st_index_t h = (st_index_t)(tab->type->hash)(key);
st_index_t h = (st_index_t)(tab->curr_hash)(key);
#if SIZEOF_INT == SIZEOF_VOIDP
st_hash_t hash = h;
#else
......
static void
make_tab_empty(st_table *tab)
{
tab->curr_hash = tab->type->hash;
tab->num_entries = 0;
tab->rebuilds_num = 0;
tab->entries_start = tab->entries_bound = 0;
......
tab->entry_power = n;
tab->bin_power = features[n].bin_power;
tab->size_ind = features[n].size_ind;
tab->inside_rebuild_p = FALSE;
if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
tab->bins = NULL;
else
......
}
bound = tab->entries_bound;
entries = tab->entries;
tab->inside_rebuild_p = TRUE;
if ((2 * tab->num_entries <= get_allocated_entries(tab)
&& REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
|| (tab->entry_power > MINIMAL_POWER2
......
else {
new_tab = st_init_table_with_size(tab->type,
2 * tab->num_entries - 1);
st_assert(new_tab->curr_hash == new_tab->type->hash);
new_tab->curr_hash = tab->curr_hash;
new_entries = new_tab->entries;
}
ni = 0;
......
tab->entries_start = 0;
tab->entries_bound = tab->num_entries;
tab->rebuilds_num++;
tab->inside_rebuild_p = FALSE;
#ifdef ST_DEBUG
st_check(tab);
#endif
......
}
}
/* Recalculate hashes of entries in table TAB. */
static void
reset_entry_hashes (st_table *tab)
{
st_index_t i, bound;
st_table_entry *entries, *curr_entry_ptr;
bound = tab->entries_bound;
entries = tab->entries;
for (i = tab->entries_start; i < bound; i++) {
curr_entry_ptr = &entries[i];
if (! DELETED_ENTRY_P(curr_entry_ptr))
curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab);
}
}
/* If we have the following number of collisions with different keys
but with the same hash during finding a bin for new entry
inclusions, possibly a denial attack is going on. Start to use a
stronger hash. */
#define HIT_THRESHOULD_FOR_STRONG_HASH 10
/* Return index of table TAB bin for HASH_VALUE and KEY through
BIN_IND and the pointed value as the function result. Reserve the
bin for inclusion of the corresponding entry into the table if it
......
st_index_t entry_index;
st_index_t first_deleted_bin_ind;
st_table_entry *entries;
int hit;
st_assert(tab != NULL && tab->bins != NULL
&& tab->entries_bound <= get_allocated_entries(tab)
&& tab->entries_start <= tab->entries_bound);
repeat:
ind = hash_bin(curr_hash_value, tab);
#ifdef QUADRATIC_PROBE
d = 1;
......
FOUND_BIN;
first_deleted_bin_ind = UNDEFINED_BIN_IND;
entries = tab->entries;
hit = 0;
for (;;) {
entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
if (EMPTY_BIN_P(entry_index)) {
......
} else if (! DELETED_BIN_P(entry_index)) {
if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
break;
if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) {
hit++;
if (hit > HIT_THRESHOULD_FOR_STRONG_HASH
&& tab->curr_hash != tab->type->strong_hash
&& tab->type->strong_hash != NULL
&& ! tab->inside_rebuild_p) {
tab->curr_hash = tab->type->strong_hash;
*hash_value = curr_hash_value = do_hash(key, tab);
reset_entry_hashes(tab);
rebuild_table(tab);
goto repeat;
}
}
} else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
first_deleted_bin_ind = ind;
#ifdef QUADRATIC_PROBE
(4-4/12)