Project

General

Profile

strong_hash.patch

vmakarov (Vladimir Makarov), 04/29/2016 09:55 PM

Download (8.08 KB)

View differences:

hash.c
145 145

  
146 146
long rb_objid_hash(st_index_t index);
147 147

  
148
#if SIZEOF_INT == SIZEOF_VOIDP
149
static const st_index_t str_seed = 0xfa835867;
150
#else
151
static const st_index_t str_seed = 0xc42b5e2e6480b23bULL;
152
#endif
153

  
148 154
static inline st_index_t
149
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
155
any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
150 156
{
151 157
    VALUE hval;
152 158
    st_index_t hnum;
......
163 169
	hnum = rb_objid_hash((st_index_t)a);
164 170
    }
165 171
    else if (BUILTIN_TYPE(a) == T_STRING) {
166
	hnum = rb_str_hash(a);
172
	hnum = (strong_p
173
		? rb_str_hash(a)
174
		: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed));
167 175
    }
168 176
    else if (BUILTIN_TYPE(a) == T_SYMBOL) {
169 177
	hnum = RSYMBOL(a)->hashval;
......
200 208
    return FIX2LONG(obj);
201 209
}
202 210

  
211
static inline st_index_t
212
any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) {
213
    return any_hash_general(a, FALSE, other_func);
214
}
215

  
203 216
static st_index_t
204
rb_any_hash(VALUE a)
205
{
217
rb_any_hash_weak(VALUE a) {
218
    return any_hash_weak(a, obj_any_hash);
219
}
220

  
221
static inline st_index_t
222
any_hash(VALUE a, st_index_t (*other_func)(VALUE)) {
223
    return any_hash_general(a, TRUE, other_func);
224
}
225

  
226
static st_index_t
227
rb_any_hash(VALUE a) {
206 228
    return any_hash(a, obj_any_hash);
207 229
}
208 230

  
......
265 287

  
266 288
static const struct st_hash_type objhash = {
267 289
    rb_any_cmp,
290
    rb_any_hash_weak,
268 291
    rb_any_hash,
269 292
};
270 293

  
include/ruby/st.h
61 61
struct st_hash_type {
62 62
    int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
63 63
    st_index_t (*hash)(ANYARGS /*st_data_t*/);        /* st_hash_func* */
64
    /* The following is an optional func for stronger hash.  When we
65
       have many different keys with the same hash we can switch to
66
       use it to prevent a denial attack with usage of hash table
67
       collisions. */
68
    st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
64 69
};
65 70

  
66 71
#if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
......
77 82
struct st_table {
78 83
    /* Cached features of the table -- see st.c for more details.  */
79 84
    unsigned char entry_power, bin_power, size_ind;
85
    /* True when we are rebuilding the table.  */
86
    unsigned char inside_rebuild_p;
80 87
    /* How many times the table was rebuilt.  */
81 88
    unsigned int rebuilds_num;
89
    /* Currently used hash function.  */
90
    st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
82 91
    const struct st_hash_type *type;
83 92
    /* Number of entries currently in the table.  */
84 93
    st_index_t num_entries;
st.c
304 304
/* Return hash value of KEY for table TAB.  */
305 305
static inline st_hash_t
306 306
do_hash(st_data_t key, st_table *tab) {
307
    st_index_t h = (st_index_t)(tab->type->hash)(key);
307
    st_index_t h = (st_index_t)(tab->curr_hash)(key);
308 308
#if SIZEOF_INT == SIZEOF_VOIDP
309 309
    st_hash_t hash = h;
310 310
#else
......
455 455
static void
456 456
make_tab_empty(st_table *tab)
457 457
{
458
    tab->curr_hash = tab->type->hash;
458 459
    tab->num_entries = 0;
459 460
    tab->rebuilds_num = 0;
460 461
    tab->entries_start = tab->entries_bound = 0;
......
566 567
    tab->entry_power = n;
567 568
    tab->bin_power = features[n].bin_power;
568 569
    tab->size_ind = features[n].size_ind;
570
    tab->inside_rebuild_p = FALSE;
569 571
    if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
570 572
        tab->bins = NULL;
571 573
    else
......
723 725
    }
724 726
    bound = tab->entries_bound;
725 727
    entries = tab->entries;
728
    tab->inside_rebuild_p = TRUE;
726 729
    if ((2 * tab->num_entries <= get_allocated_entries(tab)
727 730
	 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
728 731
	|| (tab->entry_power > MINIMAL_POWER2
......
737 740
    else {
738 741
        new_tab = st_init_table_with_size(tab->type,
739 742
					  2 * tab->num_entries - 1);
743
	st_assert(new_tab->curr_hash == new_tab->type->hash);
744
 	new_tab->curr_hash = tab->curr_hash;
740 745
	new_entries = new_tab->entries;
741 746
    }
742 747
    ni = 0;
......
775 780
    tab->entries_start = 0;
776 781
    tab->entries_bound = tab->num_entries;
777 782
    tab->rebuilds_num++;
783
    tab->inside_rebuild_p = FALSE;
778 784
#ifdef ST_DEBUG
779 785
    st_check(tab);
780 786
#endif
......
937 943
    }
938 944
}
939 945

  
946
/* Recalculate hashes of entries in table TAB.  */
947
static void
948
reset_entry_hashes (st_table *tab)
949
{
950
    st_index_t i, bound;
951
    st_table_entry *entries, *curr_entry_ptr;
952
    
953
    bound = tab->entries_bound;
954
    entries = tab->entries;
955
    for (i = tab->entries_start; i < bound; i++) {
956
	curr_entry_ptr = &entries[i];
957
	if (! DELETED_ENTRY_P(curr_entry_ptr))
958
	    curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab);
959
    }
960
}
961

  
962
/* If we have the following number of collisions with different keys
963
   but with the same hash during finding a bin for new entry
964
   inclusions, possibly a denial attack is going on.  Start to use a
965
   stronger hash.  */
966
#define HIT_THRESHOULD_FOR_STRONG_HASH 10
967

  
940 968
/* Return index of table TAB bin for HASH_VALUE and KEY through
941 969
   BIN_IND and the pointed value as the function result.  Reserve the
942 970
   bin for inclusion of the corresponding entry into the table if it
......
958 986
    st_index_t entry_index;
959 987
    st_index_t first_deleted_bin_ind;
960 988
    st_table_entry *entries;
989
    int hit;
961 990
    
962 991
    st_assert(tab != NULL && tab->bins != NULL
963 992
	      && tab->entries_bound <= get_allocated_entries(tab)
964 993
	      && tab->entries_start <= tab->entries_bound);
994
  repeat:
965 995
    ind = hash_bin(curr_hash_value, tab);
966 996
#ifdef QUADRATIC_PROBE
967 997
    d = 1;
......
971 1001
    FOUND_BIN;
972 1002
    first_deleted_bin_ind = UNDEFINED_BIN_IND;
973 1003
    entries = tab->entries;
1004
    hit = 0;
974 1005
    for (;;) {
975 1006
        entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
976 1007
        if (EMPTY_BIN_P(entry_index)) {
......
985 1016
        } else if (! DELETED_BIN_P(entry_index)) {
986 1017
            if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
987 1018
                break;
1019
	    if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) {
1020
 	        hit++;
1021
 		if (hit > HIT_THRESHOULD_FOR_STRONG_HASH
1022
 		    && tab->curr_hash != tab->type->strong_hash
1023
		    && tab->type->strong_hash != NULL
1024
		    && ! tab->inside_rebuild_p) {
1025
		    tab->curr_hash = tab->type->strong_hash;
1026
 		    *hash_value = curr_hash_value = do_hash(key, tab);
1027
 		    reset_entry_hashes(tab);
1028
 		    rebuild_table(tab);
1029
 		    goto repeat;
1030
		}
1031
	    }
988 1032
        } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
989 1033
            first_deleted_bin_ind = ind;
990 1034
#ifdef QUADRATIC_PROBE