From 715bdc2fe0181361a388acaa4b6d21169ed872ed Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 9 Mar 2014 02:05:09 +0000 Subject: [PATCH 2/4] ihash: initial implementation + method table conversion This hash implementation embeds the hash chaining pointer inside the stored struct itself, avoiding a level of indirection and additional malloc overhead for storing structs. ihash is based on the new RB_CONTAINER_OF macro using pointer arithmetic to extract the location of the original struct based on the hash node. This is a common pattern for data structures in the Linux and FreeBSD kernels. ihash should be ideal for hash tables of internal data structures which require separate allocations. This includes method entries, constants, and globals. This should also be able to work well with weakmap and the symbol table, since we can share the same allocated struct common to two (or more) hash tables. Before (unpacked entries): st_table -> st_table->bins -> st_table_entry -> method_entry_t After: rb_ihash_table -> method_entry_t Benefits: + Reduced memory usage from reduced malloc overhead and the removal order-preserving linked list pointers + Reduced indirection, this should help successful lookups and reduce cache misses. + Missed lookups keeps the cache warm for future lookups/inserts Downsides: - Cost of lookup misses may be higher compared to a packed st_table (but not an unpacked st_table) - Does not preserve order of entries. Ordering may be layered on on a case-by-case basis. I will import and use the LIST_* macros from BSD if required (I will likely do so anyways for vm->living_threads). Other notes: Not general purpose, so it must be customized on a case-by-case basis. We should use something else for simple ID->VALUE or VALUE->VALUE mappings (iv_tbl, iv_index_tbl). I will look into funny-falcon's sparse array implementation for ivars. --- class.c | 89 +++++++++++--------- common.mk | 5 +- gc.c | 27 ++++--- hash.c | 3 +- ihash.c | 229 ++++++++++++++++++++++++++++++++++++++++++++++++++++ ihash.h | 94 +++++++++++++++++++++ include/ruby/ruby.h | 1 + internal.h | 16 ++-- marshal.c | 1 + method.h | 13 ++- vm.c | 16 ++-- vm_insnhelper.c | 3 +- vm_method.c | 91 +++++++++++++++------ 13 files changed, 490 insertions(+), 98 deletions(-) create mode 100644 ihash.c create mode 100644 ihash.h diff --git a/class.c b/class.c index 9405624..5a4f941 100644 --- a/class.c +++ b/class.c @@ -36,6 +36,16 @@ int rb_vm_add_root_module(ID id, VALUE module); #define id_attached id__attached__ +static void +RCLASS_M_TBL_INIT(VALUE c) +{ + struct method_table_wrapper *wrapper; + wrapper = ALLOC(struct method_table_wrapper); + wrapper->tbl = rb_ihash_new(&rb_ihash_method_entry, 0); + wrapper->serial = 0; + RCLASS_M_TBL_WRAPPER(c) = wrapper; +} + void rb_class_subclass_add(VALUE super, VALUE klass) { @@ -270,11 +280,12 @@ clone_method(VALUE klass, ID mid, const rb_method_entry_t *me) } } -static int -clone_method_i(st_data_t key, st_data_t value, st_data_t data) +static enum rb_ihash_next +clone_method_i(struct rb_ihash_node *node, void *arg) { - clone_method((VALUE)data, (ID)key, (const rb_method_entry_t *)value); - return ST_CONTINUE; + const rb_method_entry_t *me = rb_method_entry_of(node); + clone_method((VALUE)arg, me->called_id, me); + return RB_IHASH_CONTINUE; } struct clone_const_arg { @@ -357,7 +368,7 @@ rb_mod_init_copy(VALUE clone, VALUE orig) rb_free_m_tbl_wrapper(RCLASS_M_TBL_WRAPPER(clone)); } RCLASS_M_TBL_INIT(clone); - st_foreach(RCLASS_M_TBL(orig), clone_method_i, (st_data_t)clone); + rb_ihash_foreach(RCLASS_M_TBLP(orig), clone_method_i, (void *)clone); } return clone; @@ -403,7 +414,7 @@ rb_singleton_class_clone_and_attach(VALUE obj, VALUE attach) rb_singleton_class_attached(clone, attach); } RCLASS_M_TBL_INIT(clone); - st_foreach(RCLASS_M_TBL(klass), clone_method_i, (st_data_t)clone); + rb_ihash_foreach(RCLASS_M_TBLP(klass), clone_method_i, (void *)clone); rb_singleton_class_attached(RBASIC(clone)->klass, clone); FL_SET(clone, FL_SINGLETON); @@ -837,11 +848,12 @@ rb_include_module(VALUE klass, VALUE module) rb_raise(rb_eArgError, "cyclic include detected"); } -static int -add_refined_method_entry_i(st_data_t key, st_data_t value, st_data_t data) +static enum rb_ihash_next +add_refined_method_entry_i(struct rb_ihash_node *node, void *arg) { - rb_add_refined_method_entry((VALUE) data, (ID) key); - return ST_CONTINUE; + const rb_method_entry_t *me = rb_method_entry_of(node); + rb_add_refined_method_entry((VALUE) arg, me->called_id); + return RB_IHASH_CONTINUE; } static int @@ -849,7 +861,7 @@ include_modules_at(const VALUE klass, VALUE c, VALUE module) { VALUE p, iclass; int method_changed = 0, constant_changed = 0; - const st_table *const klass_m_tbl = RCLASS_M_TBL(RCLASS_ORIGIN(klass)); + const struct rb_ihash_tbl *const klass_m_tbl = RCLASS_M_TBL(RCLASS_ORIGIN(klass)); while (module) { int superclass_seen = FALSE; @@ -888,8 +900,8 @@ include_modules_at(const VALUE klass, VALUE c, VALUE module) VALUE refined_class = rb_refinement_module_get_refined_class(klass); - st_foreach(RMODULE_M_TBL(module), add_refined_method_entry_i, - (st_data_t) refined_class); + rb_ihash_foreach(RMODULE_M_TBLP(module), add_refined_method_entry_i, + (void *)refined_class); FL_SET(c, RMODULE_INCLUDED_INTO_REFINEMENT); } if (RMODULE_M_TBL(module) && RMODULE_M_TBL(module)->num_entries) @@ -906,11 +918,11 @@ include_modules_at(const VALUE klass, VALUE c, VALUE module) return method_changed; } -static int -move_refined_method(st_data_t key, st_data_t value, st_data_t data) +static enum rb_ihash_next +move_refined_method(struct rb_ihash_node *node, void *arg) { - rb_method_entry_t *me = (rb_method_entry_t *) value; - st_table *tbl = (st_table *) data; + rb_method_entry_t *me = rb_method_entry_of(node); + struct rb_ihash_tbl **tblp = arg; if (me->def->type == VM_METHOD_TYPE_REFINED) { if (me->def->body.orig_me) { @@ -918,18 +930,19 @@ move_refined_method(st_data_t key, st_data_t value, st_data_t data) me->def->body.orig_me = NULL; new_me = ALLOC(rb_method_entry_t); *new_me = *me; - st_add_direct(tbl, key, (st_data_t) new_me); + orig_me->mtbl_node = me->mtbl_node; /* careful */ + rb_ihash_add_direct(tblp, &new_me->mtbl_node); *me = *orig_me; xfree(orig_me); - return ST_CONTINUE; + return RB_IHASH_CONTINUE; } else { - st_add_direct(tbl, key, (st_data_t) me); - return ST_DELETE; + rb_ihash_add_direct(tblp, &me->mtbl_node); + return RB_IHASH_UNLINKED; } } else { - return ST_CONTINUE; + return RB_IHASH_CONTINUE; } } @@ -955,8 +968,8 @@ rb_prepend_module(VALUE klass, VALUE module) RCLASS_ORIGIN(klass) = origin; RCLASS_M_TBL_WRAPPER(origin) = RCLASS_M_TBL_WRAPPER(klass); RCLASS_M_TBL_INIT(klass); - st_foreach(RCLASS_M_TBL(origin), move_refined_method, - (st_data_t) RCLASS_M_TBL(klass)); + rb_ihash_foreach(RCLASS_M_TBLP(origin), move_refined_method, + (void *)RCLASS_M_TBLP(klass)); } changed = include_modules_at(klass, klass, module); if (changed < 0) @@ -1113,27 +1126,27 @@ ins_methods_pub_i(st_data_t name, st_data_t type, st_data_t ary) return ins_methods_push((ID)name, (long)type, (VALUE)ary, NOEX_PUBLIC); } -static int -method_entry_i(st_data_t key, st_data_t value, st_data_t data) +static enum rb_ihash_next +method_entry_i(struct rb_ihash_node *node, void *arg) { - const rb_method_entry_t *me = (const rb_method_entry_t *)value; - st_table *list = (st_table *)data; + const rb_method_entry_t *me = rb_method_entry_of(node); + st_table *list = arg; long type; if (me && me->def->type == VM_METHOD_TYPE_REFINED) { me = rb_resolve_refined_method(Qnil, me, NULL); - if (!me) return ST_CONTINUE; + if (!me) return RB_IHASH_CONTINUE; } - if (!st_lookup(list, key, 0)) { + if (!st_lookup(list, me->called_id, 0)) { if (UNDEFINED_METHOD_ENTRY_P(me)) { type = -1; /* none */ } else { type = VISI(me->flag); } - st_add_direct(list, key, type); + st_add_direct(list, me->called_id, type); } - return ST_CONTINUE; + return RB_IHASH_CONTINUE; } static VALUE @@ -1159,7 +1172,7 @@ class_instance_method_list(int argc, VALUE *argv, VALUE mod, int obj, int (*func list = st_init_numtable(); for (; mod; mod = RCLASS_SUPER(mod)) { - if (RCLASS_M_TBL(mod)) st_foreach(RCLASS_M_TBL(mod), method_entry_i, (st_data_t)list); + if (RCLASS_M_TBL(mod)) rb_ihash_foreach(RCLASS_M_TBLP(mod), method_entry_i, list); if (BUILTIN_TYPE(mod) == T_ICLASS && !prepended) continue; if (obj && FL_TEST(mod, FL_SINGLETON)) continue; if (!recur) break; @@ -1379,7 +1392,7 @@ VALUE rb_obj_singleton_methods(int argc, VALUE *argv, VALUE obj) { VALUE recur, ary, klass, origin; - st_table *list, *mtbl; + st_table *list; if (argc == 0) { recur = Qtrue; @@ -1391,14 +1404,14 @@ rb_obj_singleton_methods(int argc, VALUE *argv, VALUE obj) origin = RCLASS_ORIGIN(klass); list = st_init_numtable(); if (klass && FL_TEST(klass, FL_SINGLETON)) { - if ((mtbl = RCLASS_M_TBL(origin)) != 0) - st_foreach(mtbl, method_entry_i, (st_data_t)list); + if (RCLASS_M_TBL(origin)) + rb_ihash_foreach(RCLASS_M_TBLP(origin), method_entry_i, list); klass = RCLASS_SUPER(klass); } if (RTEST(recur)) { while (klass && (FL_TEST(klass, FL_SINGLETON) || RB_TYPE_P(klass, T_ICLASS))) { - if (klass != origin && (mtbl = RCLASS_M_TBL(klass)) != 0) - st_foreach(mtbl, method_entry_i, (st_data_t)list); + if (klass != origin && RCLASS_M_TBL(klass)) + rb_ihash_foreach(RCLASS_M_TBLP(klass), method_entry_i, list); klass = RCLASS_SUPER(klass); } } diff --git a/common.mk b/common.mk index cafc3e5..3590e0e 100644 --- a/common.mk +++ b/common.mk @@ -48,6 +48,7 @@ COMMONOBJS = array.$(OBJEXT) \ enumerator.$(OBJEXT) \ error.$(OBJEXT) \ eval.$(OBJEXT) \ + ihash.$(OBJEXT) \ load.$(OBJEXT) \ proc.$(OBJEXT) \ file.$(OBJEXT) \ @@ -611,7 +612,8 @@ ENCODING_H_INCLUDES= {$(VPATH)}encoding.h {$(VPATH)}oniguruma.h PROBES_H_INCLUDES = {$(VPATH)}probes.h VM_CORE_H_INCLUDES = {$(VPATH)}vm_core.h {$(VPATH)}thread_$(THREAD_MODEL).h \ {$(VPATH)}node.h {$(VPATH)}method.h {$(VPATH)}ruby_atomic.h \ - {$(VPATH)}vm_debug.h {$(VPATH)}id.h {$(VPATH)}thread_native.h + {$(VPATH)}vm_debug.h {$(VPATH)}id.h {$(VPATH)}thread_native.h \ + {$(VPATH)}ihash.h ### @@ -673,6 +675,7 @@ eval.$(OBJEXT): {$(VPATH)}eval.c {$(VPATH)}eval_intern.h {$(VPATH)}vm.h \ $(RUBY_H_INCLUDES) $(VM_CORE_H_INCLUDES) {$(VPATH)}eval_error.c \ {$(VPATH)}eval_jump.c {$(VPATH)}gc.h {$(VPATH)}iseq.h \ $(ENCODING_H_INCLUDES) {$(VPATH)}internal.h $(PROBES_H_INCLUDES) {$(VPATH)}vm_opts.h {$(VPATH)}probes_helper.h +ihash.$(OBJEXT): {$(VPATH)}ihash.c $(RUBY_H_INCLUDES) {$(VPATH)}ihash.h load.$(OBJEXT): {$(VPATH)}load.c {$(VPATH)}eval_intern.h \ {$(VPATH)}util.h $(RUBY_H_INCLUDES) $(VM_CORE_H_INCLUDES) \ {$(VPATH)}dln.h {$(VPATH)}internal.h $(PROBES_H_INCLUDES) {$(VPATH)}vm_opts.h diff --git a/gc.c b/gc.c index 9a6a8c7..8e8605f 100644 --- a/gc.c +++ b/gc.c @@ -1433,27 +1433,29 @@ is_pointer_to_heap(rb_objspace_t *objspace, void *ptr) return FALSE; } -static int -free_method_entry_i(ID key, rb_method_entry_t *me, st_data_t data) +static enum rb_ihash_next +free_method_entry_i(struct rb_ihash_node *node, void *unused) { + rb_method_entry_t *me = rb_method_entry_of(node); + if (!me->mark) { rb_free_method_entry(me); } - return ST_CONTINUE; + return RB_IHASH_UNLINKED; } void -rb_free_m_tbl(st_table *tbl) +rb_free_m_tbl(struct rb_ihash_tbl **tblp) { - st_foreach(tbl, free_method_entry_i, 0); - st_free_table(tbl); + rb_ihash_foreach(tblp, free_method_entry_i, 0); + rb_ihash_free(*tblp); } void rb_free_m_tbl_wrapper(struct method_table_wrapper *wrapper) { if (wrapper->tbl) { - rb_free_m_tbl(wrapper->tbl); + rb_free_m_tbl(&wrapper->tbl); } xfree(wrapper); } @@ -2456,7 +2458,7 @@ obj_memsize_of(VALUE obj, int use_tdata) size += sizeof(struct method_table_wrapper); } if (RCLASS_M_TBL(obj)) { - size += st_memsize(RCLASS_M_TBL(obj)); + size += rb_ihash_memsize(RCLASS_M_TBL(obj)); } if (RCLASS_EXT(obj)) { if (RCLASS_IV_TBL(obj)) { @@ -3425,10 +3427,11 @@ rb_mark_method_entry(const rb_method_entry_t *me) mark_method_entry(&rb_objspace, me); } -static int -mark_method_entry_i(ID key, const rb_method_entry_t *me, st_data_t data) +static enum rb_ihash_next +mark_method_entry_i(struct rb_ihash_node *node, void *mta) { - struct mark_tbl_arg *arg = (void*)data; + const rb_method_entry_t *me = rb_method_entry_of(node); + struct mark_tbl_arg *arg = mta; mark_method_entry(arg->objspace, me); return ST_CONTINUE; } @@ -3446,7 +3449,7 @@ mark_m_tbl_wrapper(rb_objspace_t *objspace, struct method_table_wrapper *wrapper wrapper->serial = serial; } arg.objspace = objspace; - st_foreach(wrapper->tbl, mark_method_entry_i, (st_data_t)&arg); + rb_ihash_foreach(&wrapper->tbl, mark_method_entry_i, &arg); } static int diff --git a/hash.c b/hash.c index ccc0d44..76989f8 100644 --- a/hash.c +++ b/hash.c @@ -18,6 +18,7 @@ #include "internal.h" #include #include "probes.h" +#include "ihash.h" #ifdef __APPLE__ # ifdef HAVE_CRT_EXTERNS_H @@ -38,7 +39,7 @@ has_extra_methods(VALUE klass) const VALUE base = rb_cHash; VALUE c = klass; while (c != base) { - st_table *mtbl = RCLASS_M_TBL(c); + struct rb_ihash_tbl *mtbl = RCLASS_M_TBL(c); if (mtbl && mtbl->num_entries) return klass; c = RCLASS_SUPER(c); } diff --git a/ihash.c b/ihash.c new file mode 100644 index 0000000..ac1da6f --- /dev/null +++ b/ihash.c @@ -0,0 +1,229 @@ +#include "ihash.h" +#include "internal.h" + +/* accounts for the compiler-agnostic flexible array byte */ +#define SIZEOF_IHASH_TBL (sizeof(struct rb_ihash_tbl) - 1) + +static inline size_t +rb_ihash_nbins(const struct rb_ihash_tbl *tbl) +{ + return tbl->hash_mask + 1; +} + +static inline size_t +rb_ihash_binpos(const struct rb_ihash_tbl *tbl, st_index_t hash) +{ + return ((size_t)hash & (size_t)tbl->hash_mask); +} + +static inline struct rb_ihash_node ** +rb_ihash_bins(const struct rb_ihash_tbl *tbl) +{ + return (struct rb_ihash_node **)tbl->flexible_array; +} + +size_t +rb_ihash_memsize(const struct rb_ihash_tbl *tbl) +{ + size_t nbins = rb_ihash_nbins(tbl); + size_t n = SIZEOF_IHASH_TBL + nbins * sizeof(struct rb_ihash_node *); + + return n + sizeof(struct rb_ihash_node) * tbl->num_entries; +} + +static struct rb_ihash_tbl * +rb_ihash_alloc(const struct rb_ihash_type *type, size_t nbins) +{ + size_t nbytes = SIZEOF_IHASH_TBL + nbins * sizeof(struct rb_ihash_node *); + struct rb_ihash_tbl *tbl = xcalloc(1, nbytes); + + tbl->type = type; + tbl->hash_mask = nbins - 1; + + if ((size_t)tbl->hash_mask != (nbins - 1)) { + rb_bug("ihash table too big"); + } + + return tbl; +} + +struct rb_ihash_tbl * +rb_ihash_new(const struct rb_ihash_type *type, size_t power) +{ + return rb_ihash_alloc(type, 1 << power); +} + +int +rb_ihash_foreach(struct rb_ihash_tbl **tblp, rb_ihash_iterate fn, void *arg) +{ + struct rb_ihash_tbl *tbl = *tblp; + size_t nbins = rb_ihash_nbins(tbl); + struct rb_ihash_node **bins = rb_ihash_bins(tbl); + size_t i; + + for (i = 0; i < nbins; i++) { + struct rb_ihash_node *cur = bins[i]; + struct rb_ihash_node *last = 0; + + while (cur) { + struct rb_ihash_node *next = cur->ihash_next; + struct rb_ihash_node *tmp; + + switch (fn(cur, arg)) { + case RB_IHASH_UNLINKED: + if (last) last->ihash_next = next; + else bins[i] = next; + cur = next; + tbl->num_entries--; + break; + case RB_IHASH_CHECK: + /* check if hash is modified during iteration */ + tmp = 0; + tbl = *tblp; + nbins = rb_ihash_nbins(tbl); + bins = rb_ihash_bins(tbl); + + if (i < nbins) { + for (tmp = bins[i]; tmp; tmp = tmp->ihash_next) { + if (tmp == cur) break; + } + } + if (!tmp) return 1; + last = cur; + cur = cur->ihash_next; + break; + case RB_IHASH_CONTINUE: + last = cur; + cur = next; + break; + case RB_IHASH_STOP: + return 0; + } + } + } + return 0; +} + +struct rb_ihash_node * +rb_ihash_lookup(const struct rb_ihash_tbl *tbl, const struct rb_ihash_node *key) +{ + st_index_t hval = tbl->type->hash(key); + struct rb_ihash_node **bins = rb_ihash_bins(tbl); + size_t pos = rb_ihash_binpos(tbl, hval); + struct rb_ihash_node *cur = bins[pos]; + + for (; cur; cur = cur->ihash_next) { + if (tbl->type->cmp(key, cur) == 0) { + return cur; + } + } + return 0; +} + +static inline void +rb_ihash_add_pos(struct rb_ihash_tbl *tbl, + struct rb_ihash_node *ins, size_t pos) +{ + struct rb_ihash_node **bins = rb_ihash_bins(tbl); + + ins->ihash_next = bins[pos]; + bins[pos] = ins; +} + +static enum rb_ihash_next +relink_i(struct rb_ihash_node *node, void *ptr) +{ + struct rb_ihash_tbl *new_tbl = ptr; + st_index_t hval = new_tbl->type->hash(node); + size_t pos = rb_ihash_binpos(new_tbl, hval); + + rb_ihash_add_pos(new_tbl, node, pos); + + return RB_IHASH_UNLINKED; +} + +static struct rb_ihash_tbl * +rb_ihash_realloc(struct rb_ihash_tbl **oldp, size_t nbins) +{ + struct rb_ihash_tbl *old_tbl = *oldp; + struct rb_ihash_tbl *new_tbl = rb_ihash_alloc(old_tbl->type, nbins); + + rb_ihash_foreach(oldp, relink_i, new_tbl); + rb_ihash_free(old_tbl); + return new_tbl; +} + +static void +rb_ihash_added(struct rb_ihash_tbl **tblp) +{ + struct rb_ihash_tbl *tbl = *tblp; + size_t nbins = rb_ihash_nbins(tbl); + static const size_t max_density = 6; + + tbl->num_entries++; + if (tbl->num_entries > (max_density * nbins)) { + *tblp = rb_ihash_realloc(tblp, nbins << 1); + } +} + +void +rb_ihash_add_direct(struct rb_ihash_tbl **tblp, struct rb_ihash_node *ins) +{ + struct rb_ihash_tbl *tbl = *tblp; + st_index_t hval = tbl->type->hash(ins); + size_t pos = rb_ihash_binpos(tbl, hval); + + rb_ihash_add_pos(tbl, ins, pos); + rb_ihash_added(tblp); +} + +/* returns pointer to replaced node, 0 if nothing was replaced */ +struct rb_ihash_node * +rb_ihash_insert(struct rb_ihash_tbl **tblp, struct rb_ihash_node *ins) +{ + struct rb_ihash_tbl *tbl = *tblp; + st_index_t hval = tbl->type->hash(ins); + struct rb_ihash_node **bins = rb_ihash_bins(tbl); + size_t i = rb_ihash_binpos(tbl, hval); + struct rb_ihash_node *last = NULL; + struct rb_ihash_node *cur = bins[i]; + + while (cur) { + if (tbl->type->cmp(ins, cur) == 0) { + /* replace and return existing entry */ + ins->ihash_next = cur->ihash_next; + if (last) last->ihash_next = ins; + else bins[i] = ins; + return cur; + } + last = cur; + cur = cur->ihash_next; + } + rb_ihash_add_pos(tbl, ins, i); + rb_ihash_added(tblp); + return 0; +} + +/* returns pointer to unlinked node, 0 if nothing was unlinked */ +struct rb_ihash_node * +rb_ihash_unlink(struct rb_ihash_tbl *tbl, const struct rb_ihash_node *key) +{ + st_index_t hval = tbl->type->hash(key); + struct rb_ihash_node **bins = rb_ihash_bins(tbl); + size_t i = rb_ihash_binpos(tbl, hval); + struct rb_ihash_node *last = 0; + struct rb_ihash_node *cur = bins[i]; + + while (cur) { + if (tbl->type->cmp(key, cur) == 0) { + if (last) last->ihash_next = cur->ihash_next; + else bins[i] = cur->ihash_next; + tbl->num_entries--; + return cur; + } + last = cur; + cur = cur->ihash_next; + } + + return 0; +} diff --git a/ihash.h b/ihash.h new file mode 100644 index 0000000..266cb93 --- /dev/null +++ b/ihash.h @@ -0,0 +1,94 @@ +/* + * Hash table implementation for internal use inside Ruby. + * + * Notes: + * + * - Users are expected to embed the rb_ihash_node struct and st_data_t key + * inside a main struct stored by users. This reduces pointer chasing + * and allows cache-warming even on missed lookups. + * Wrap the RB_CONTAINER_OF macro in a static inline to get the + * stored struct pointer from an st_data_t-compatible key pointer. + * + * - does no allocation for entires, user is responsible for it + * + * - this should be safe to use outside of GVL. Of course users must manage + * their own locking. + */ +#ifndef RUBY_IHASH_H +#define RUBY_IHASH_H 1 +#if defined(__cplusplus) +extern "C" { +#if 0 +} /* satisfy cc-mode */ +#endif +#endif + +#include "ruby/ruby.h" +#include "ruby/st.h" /* we use st_data_t and st_index_t data types */ + +/* + * each struct must have its own rb_ihash_node field inside it, + * use CONTAINER_OF to get the original struct ptr + */ +struct rb_ihash_node; +struct rb_ihash_node { + struct rb_ihash_node *ihash_next; +}; + +enum rb_ihash_next { + RB_IHASH_CONTINUE = 0, + RB_IHASH_CHECK, + RB_IHASH_STOP, + RB_IHASH_UNLINKED /* user must deallocate this manually */ +}; + +typedef int (*rb_ihash_compare)(const struct rb_ihash_node *, + const struct rb_ihash_node *); +typedef st_index_t (*rb_ihash_compute)(const struct rb_ihash_node *); +typedef enum rb_ihash_next (*rb_ihash_iterate)(struct rb_ihash_node *, void *); + +struct rb_ihash_type { + rb_ihash_compare cmp; + rb_ihash_compute hash; +}; + +struct rb_ihash_tbl { + const struct rb_ihash_type *type; + uint32_t num_entries; + uint32_t hash_mask; + char flexible_array[1]; +}; + +struct rb_ihash_tbl *rb_ihash_new(const struct rb_ihash_type *, size_t); + +struct rb_ihash_node * +rb_ihash_lookup(const struct rb_ihash_tbl *, const struct rb_ihash_node *); + +struct rb_ihash_node * +rb_ihash_insert(struct rb_ihash_tbl **, struct rb_ihash_node *); + +void rb_ihash_add_direct(struct rb_ihash_tbl **, struct rb_ihash_node *); + +struct rb_ihash_node * +rb_ihash_unlink(struct rb_ihash_tbl *, const struct rb_ihash_node *); + +int rb_ihash_foreach(struct rb_ihash_tbl **, rb_ihash_iterate, void *); + +static inline void +rb_ihash_free(struct rb_ihash_tbl *tbl) +{ + /* + * user must call rb_ihash_foreach with an iterator which frees + * and returns RB_IHASH_UNLINK for each entry before calling this + */ + xfree(tbl); +} + +size_t rb_ihash_memsize(const struct rb_ihash_tbl *); +#if defined(__cplusplus) +#if 0 +{ /* satisfy cc-mode */ +#endif +} /* extern "C" { */ +#endif +#endif /* RUBY_IHASH_H */ diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h index abd4b4b..a8e1756 100644 --- a/include/ruby/ruby.h +++ b/include/ruby/ruby.h @@ -798,6 +798,7 @@ struct RClass { #define RMODULE_IV_TBL(m) RCLASS_IV_TBL(m) #define RMODULE_CONST_TBL(m) RCLASS_CONST_TBL(m) #define RMODULE_M_TBL(m) RCLASS_M_TBL(m) +#define RMODULE_M_TBLP(m) RCLASS_M_TBLP(m) #define RMODULE_SUPER(m) RCLASS_SUPER(m) #define RMODULE_IS_OVERLAID FL_USER2 #define RMODULE_IS_REFINEMENT FL_USER3 diff --git a/internal.h b/internal.h index 3309b65..40b0747 100644 --- a/internal.h +++ b/internal.h @@ -55,6 +55,9 @@ extern "C" { #define numberof(array) ((int)(sizeof(array) / sizeof((array)[0]))) +#define RB_CONTAINER_OF(ptr,type,field) \ + ((type *)((uint8_t *)(ptr) - offsetof(type,field))) + #define STATIC_ASSERT(name, expr) typedef int static_assert_##name##_check[1 - 2*!(expr)] #define GCC_VERSION_SINCE(major, minor, patchlevel) \ @@ -301,7 +304,7 @@ struct rb_classext_struct { }; struct method_table_wrapper { - st_table *tbl; + struct rb_ihash_tbl *tbl; size_t serial; }; @@ -413,21 +416,12 @@ void rb_class_remove_from_super_subclasses(VALUE); #define RCLASS_CONST_TBL(c) (RCLASS_EXT(c)->const_tbl) #define RCLASS_M_TBL_WRAPPER(c) (RCLASS(c)->m_tbl_wrapper) #define RCLASS_M_TBL(c) (RCLASS_M_TBL_WRAPPER(c) ? RCLASS_M_TBL_WRAPPER(c)->tbl : 0) +#define RCLASS_M_TBLP(c) (&RCLASS_M_TBL_WRAPPER(c)->tbl) #define RCLASS_IV_INDEX_TBL(c) (RCLASS_EXT(c)->iv_index_tbl) #define RCLASS_ORIGIN(c) (RCLASS_EXT(c)->origin) #define RCLASS_REFINED_CLASS(c) (RCLASS_EXT(c)->refined_class) #define RCLASS_SERIAL(c) (RCLASS_EXT(c)->class_serial) -static inline void -RCLASS_M_TBL_INIT(VALUE c) -{ - struct method_table_wrapper *wrapper; - wrapper = ALLOC(struct method_table_wrapper); - wrapper->tbl = st_init_numtable(); - wrapper->serial = 0; - RCLASS_M_TBL_WRAPPER(c) = wrapper; -} - #undef RCLASS_SUPER static inline VALUE RCLASS_SUPER(VALUE klass) diff --git a/marshal.c b/marshal.c index 7772b4c..bc0b7b9 100644 --- a/marshal.c +++ b/marshal.c @@ -15,6 +15,7 @@ #include "ruby/util.h" #include "ruby/encoding.h" #include "internal.h" +#include "ihash.h" #include #ifdef HAVE_FLOAT_H diff --git a/method.h b/method.h index 556d286..15a0d13 100644 --- a/method.h +++ b/method.h @@ -12,6 +12,7 @@ #define METHOD_H #include "internal.h" +#include "ihash.h" #ifndef END_OF_ENUMERATION # if defined(__GNUC__) &&! defined(__STRICT_ANSI__) @@ -95,13 +96,20 @@ typedef struct rb_method_definition_struct { } rb_method_definition_t; typedef struct rb_method_entry_struct { + ID called_id; /* key for ihash */ + struct rb_ihash_node mtbl_node; /* TODO: union for unlinked entries */ rb_method_flag_t flag; char mark; rb_method_definition_t *def; - ID called_id; VALUE klass; /* should be mark */ } rb_method_entry_t; +static inline rb_method_entry_t * +rb_method_entry_of(const struct rb_ihash_node *node) +{ + return RB_CONTAINER_OF(node, rb_method_entry_t, mtbl_node); +} + struct unlinked_method_entry_list_entry { struct unlinked_method_entry_list_entry *next; rb_method_entry_t *me; @@ -136,7 +144,8 @@ VALUE rb_obj_method_location(VALUE obj, ID id); void rb_mark_method_entry(const rb_method_entry_t *me); void rb_free_method_entry(rb_method_entry_t *me); void rb_sweep_method_entry(void *vm); -void rb_free_m_tbl(st_table *tbl); +void rb_free_m_tbl(struct rb_ihash_tbl **); void rb_free_m_tbl_wrapper(struct method_table_wrapper *wrapper); +extern struct rb_ihash_type rb_ihash_method_entry; #endif /* METHOD_H */ diff --git a/vm.c b/vm.c index d9ac028..fc4f93e 100644 --- a/vm.c +++ b/vm.c @@ -1123,25 +1123,25 @@ rb_vm_check_redefinition_opt_method(const rb_method_entry_t *me, VALUE klass) } } -static int -check_redefined_method(st_data_t key, st_data_t value, st_data_t data) +static enum rb_ihash_next +check_redefined_method(struct rb_ihash_node *node, void *arg) { - ID mid = (ID)key; - rb_method_entry_t *me = (rb_method_entry_t *)value; - VALUE klass = (VALUE)data; + rb_method_entry_t *me = rb_method_entry_of(node); + ID mid = me->called_id; + VALUE klass = (VALUE)arg; rb_method_entry_t *newme = rb_method_entry(klass, mid, NULL); if (newme != me) rb_vm_check_redefinition_opt_method(me, me->klass); - return ST_CONTINUE; + return RB_IHASH_CONTINUE; } void rb_vm_check_redefinition_by_prepend(VALUE klass) { if (!vm_redefinition_check_flag(klass)) return; - st_foreach(RCLASS_M_TBL(RCLASS_ORIGIN(klass)), check_redefined_method, - (st_data_t)klass); + rb_ihash_foreach(RCLASS_M_TBLP(RCLASS_ORIGIN(klass)), check_redefined_method, + (void *)klass); } static void diff --git a/vm_insnhelper.c b/vm_insnhelper.c index bdd0b67..2a2a45e 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1103,6 +1103,7 @@ vm_callee_setup_arg_complex(rb_thread_t *th, rb_call_info_t *ci, const rb_iseq_t const int max = (iseq->arg_rest == -1) ? m + opts + iseq->arg_post_len : UNLIMITED_ARGUMENTS; const int orig_argc = ci->argc; int argc = orig_argc; + VALUE *new_argv; VALUE *argv = orig_argv; VALUE keyword_hash = Qnil; rb_num_t opt_pc = 0; @@ -1125,7 +1126,7 @@ vm_callee_setup_arg_complex(rb_thread_t *th, rb_call_info_t *ci, const rb_iseq_t /* post arguments */ if (iseq->arg_post_len) { if (!(orig_argc < iseq->arg_post_start)) { - VALUE *new_argv = ALLOCA_N(VALUE, argc); + new_argv = ALLOCA_N(VALUE, argc); MEMCPY(new_argv, argv, VALUE, argc); argv = new_argv; } diff --git a/vm_method.c b/vm_method.c index 203b8b8..ddf7b4f 100644 --- a/vm_method.c +++ b/vm_method.c @@ -195,17 +195,34 @@ rb_free_method_entry(rb_method_entry_t *me) static int rb_method_definition_eq(const rb_method_definition_t *d1, const rb_method_definition_t *d2); +/* + * use small struct here to avoid errors from tiny stacks in fiber tests, + * TODO: unify this struct into rb_method_entry_t in the future, + * this struct only exists to minimize code churn from introducing ihash + * into method tables + */ +struct rb_method_entry_finder { + ID called_id; + union { + struct rb_ihash_node mtbl_node; /* lookup does not look into this */ + struct rb_ihash_node *retval; + }; +}; + +STATIC_ASSERT(method_entry_finder_offsets, + offsetof(struct rb_method_entry_finder, called_id) == + offsetof(rb_method_entry_t, called_id) && + offsetof(struct rb_method_entry_finder, mtbl_node) == + offsetof(rb_method_entry_t, mtbl_node)); + static inline rb_method_entry_t * lookup_method_table(VALUE klass, ID id) { - st_data_t body; - st_table *m_tbl = RCLASS_M_TBL(klass); - if (st_lookup(m_tbl, id, &body)) { - return (rb_method_entry_t *) body; - } - else { - return 0; - } + struct rb_method_entry_finder finder; + + finder.called_id = id; + finder.retval = rb_ihash_lookup(RCLASS_M_TBL(klass), &finder.mtbl_node); + return finder.retval ? rb_method_entry_of(finder.retval) : 0; } static void @@ -248,12 +265,11 @@ rb_method_entry_make(VALUE klass, ID mid, rb_method_type_t type, rb_method_definition_t *def, rb_method_flag_t noex, VALUE defined_class) { - rb_method_entry_t *me; + rb_method_entry_t *me, *old_me; + struct rb_ihash_node *old; #if NOEX_NOREDEF VALUE rklass; #endif - st_table *mtbl; - st_data_t data; int make_refined = 0; if (NIL_P(klass)) { @@ -279,18 +295,16 @@ rb_method_entry_make(VALUE klass, ID mid, rb_method_type_t type, rb_add_refined_method_entry(refined_class, mid); } if (type == VM_METHOD_TYPE_REFINED) { - rb_method_entry_t *old_me = - lookup_method_table(RCLASS_ORIGIN(klass), mid); + old_me = lookup_method_table(RCLASS_ORIGIN(klass), mid); if (old_me) rb_vm_check_redefinition_opt_method(old_me, klass); } else { klass = RCLASS_ORIGIN(klass); } - mtbl = RCLASS_M_TBL(klass); /* check re-definition */ - if (st_lookup(mtbl, mid, &data)) { - rb_method_entry_t *old_me = (rb_method_entry_t *)data; + old_me = lookup_method_table(klass, mid); + if (old_me) { rb_method_definition_t *old_def = old_me->def; if (rb_method_definition_eq(old_def, def)) return old_me; @@ -376,7 +390,10 @@ rb_method_entry_make(VALUE klass, ID mid, rb_method_type_t type, make_method_entry_refined(me); } - st_insert(mtbl, mid, (st_data_t) me); + old = rb_ihash_insert(RCLASS_M_TBLP(klass), &me->mtbl_node); + if (old_me && (old != &old_me->mtbl_node)) { + rb_bug("rb_method_entry_make: ihash race between lookup and insert"); + } return me; } @@ -729,8 +746,8 @@ rb_method_entry_without_refinements(VALUE klass, ID id, static void remove_method(VALUE klass, ID mid) { - st_data_t key, data; - rb_method_entry_t *me = 0; + rb_method_entry_t *me; + struct rb_ihash_node *node; VALUE self = klass; klass = RCLASS_ORIGIN(klass); @@ -739,14 +756,16 @@ remove_method(VALUE klass, ID mid) rb_warn("removing `%s' may cause serious problems", rb_id2name(mid)); } - if (!st_lookup(RCLASS_M_TBL(klass), mid, &data) || - !(me = (rb_method_entry_t *)data) || - (!me->def || me->def->type == VM_METHOD_TYPE_UNDEF)) { + me = lookup_method_table(klass, mid); + if (!me || !me->def || (me->def->type == VM_METHOD_TYPE_UNDEF)) { rb_name_error(mid, "method `%s' not defined in %s", rb_id2name(mid), rb_class2name(klass)); } - key = (st_data_t)mid; - st_delete(RCLASS_M_TBL(klass), &key, &data); + + node = rb_ihash_unlink(RCLASS_M_TBL(klass), &me->mtbl_node); + if (node != &me->mtbl_node) { + rb_bug("remove_method: ihash race between lookup and unlink"); + } rb_vm_check_redefinition_opt_method(me, klass); rb_clear_method_cache_by_class(klass); @@ -1752,3 +1771,27 @@ Init_eval_method(void) REPLICATE_METHOD(rb_eException, idRespond_to_missing, NOEX_PUBLIC); } } + +static int +rb_ihash_me_cmp(const struct rb_ihash_node *a, const struct rb_ihash_node *b) +{ + rb_method_entry_t *me1 = rb_method_entry_of(a); + rb_method_entry_t *me2 = rb_method_entry_of(b); + + return me1->called_id != me2->called_id; +} + +static st_index_t +rb_ihash_me_hash(const struct rb_ihash_node *node) +{ + rb_method_entry_t *me = rb_method_entry_of(node); + ID id = me->called_id; + + /* TODO: tuning try generating id from hash of str associated with sym */ + return (st_index_t)((id >> 3) ^ (id << 3)); +} + +struct rb_ihash_type rb_ihash_method_entry = { + rb_ihash_me_cmp, + rb_ihash_me_hash +}; -- 1.9.0.rc3.13.gda73b5f