From c3b89b1d41e68104aaced7b0974d9f437466a601 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 9 Mar 2014 02:05:10 +0000 Subject: [PATCH 4/4] ihash: implement rb_ihash_update to support rb_fstring This saves another 100K or so on "ruby -e exit", as the new fstring struct no longer needs two extra pointers for ordering, nor does it need a separate key/value field like st_table_entry. --- ihash.c | 41 +++++++++++++++++++++++ ihash.h | 13 ++++++++ string.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++---------------- 3 files changed, 140 insertions(+), 28 deletions(-) diff --git a/ihash.c b/ihash.c index ac1da6f..3f85342 100644 --- a/ihash.c +++ b/ihash.c @@ -227,3 +227,44 @@ rb_ihash_unlink(struct rb_ihash_tbl *tbl, const struct rb_ihash_node *key) return 0; } + +int +rb_ihash_update(struct rb_ihash_tbl **tblp, struct rb_ihash_node *upd, + rb_ihash_updater fn, void *arg) +{ + struct rb_ihash_tbl *tbl = *tblp; + st_index_t hval = tbl->type->hash(upd); + struct rb_ihash_node **bins = rb_ihash_bins(tbl); + size_t i = rb_ihash_binpos(tbl, hval); + struct rb_ihash_node *last = NULL; + struct rb_ihash_node *cur = bins[i]; + + while (cur) { + if (tbl->type->cmp(upd, cur) == 0) { + struct rb_ihash_node *next = cur->ihash_next; + + switch (fn(&cur, upd, arg)) { + case RB_IHASH_UPDATED: + cur->ihash_next = next; /* preserve linkage */ + return 1; + case RB_IHASH_DESTROYED: /* do we need to support this? */ + if (last) last->ihash_next = next; + else bins[i] = 0; + return 1; + } + } + last = cur; + cur = cur->ihash_next; + } + + cur = 0; + switch (fn(&cur, upd, arg)) { + case RB_IHASH_UPDATED: + rb_ihash_add_pos(tbl, cur, i); + rb_ihash_added(tblp); + return 0; + case RB_IHASH_DESTROYED: + rb_bug("RB_IHASH_DESTROYED unexpected"); + } + return 0; +} diff --git a/ihash.h b/ihash.h index 266cb93..b2c0fa6 100644 --- a/ihash.h +++ b/ihash.h @@ -42,11 +42,20 @@ enum rb_ihash_next { RB_IHASH_UNLINKED /* user must deallocate this manually */ }; +enum rb_ihash_updated { + RB_IHASH_UPDATED = 0, + RB_IHASH_DESTROYED /* user already deallocated */ +}; + typedef int (*rb_ihash_compare)(const struct rb_ihash_node *, const struct rb_ihash_node *); typedef st_index_t (*rb_ihash_compute)(const struct rb_ihash_node *); typedef enum rb_ihash_next (*rb_ihash_iterate)(struct rb_ihash_node *, void *); +typedef enum rb_ihash_updated (*rb_ihash_updater)(struct rb_ihash_node **cur, + struct rb_ihash_node *upd, + void *arg); + struct rb_ihash_type { rb_ihash_compare cmp; rb_ihash_compute hash; @@ -85,6 +94,10 @@ rb_ihash_free(struct rb_ihash_tbl *tbl) } size_t rb_ihash_memsize(const struct rb_ihash_tbl *); + +int rb_ihash_update(struct rb_ihash_tbl **, struct rb_ihash_node *, + rb_ihash_updater, void *arg); + #if defined(__cplusplus) #if 0 { /* satisfy cc-mode */ diff --git a/string.c b/string.c index c268cd6..b649c59 100644 --- a/string.c +++ b/string.c @@ -15,6 +15,7 @@ #include "ruby/re.h" #include "ruby/encoding.h" #include "internal.h" +#include "ihash.h" #include "probes.h" #include @@ -157,28 +158,52 @@ get_encoding(VALUE str) return get_actual_encoding(ENCODING_GET(str), str); } -static int fstring_cmp(VALUE a, VALUE b); +static int fstring_cmp(const struct rb_ihash_node *, + const struct rb_ihash_node *); -static st_table* frozen_strings; +static struct rb_ihash_tbl* frozen_strings; +struct fstring { + VALUE fstr; + st_index_t hashval; + struct rb_ihash_node fstr_node; +}; + +struct fstring * +fstring_of(const struct rb_ihash_node *node) +{ + return RB_CONTAINER_OF(node, struct fstring, fstr_node); +} + +static st_index_t +fstring_hash(const struct rb_ihash_node *ptr) +{ + return fstring_of(ptr)->hashval; /* precomputed */ +} -static const struct st_hash_type fstring_hash_type = { +static const struct rb_ihash_type fstring_hash_type = { fstring_cmp, - rb_str_hash, + fstring_hash, }; -static int -fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing) +static enum rb_ihash_updated +fstr_update_callback(struct rb_ihash_node **cur_node, + struct rb_ihash_node *upd_node, void *unused) { - VALUE *fstr = (VALUE *)arg; - VALUE str = (VALUE)*key; + struct fstring *upd = fstring_of(upd_node); + struct fstring *ins; + VALUE str; - if (existing) { - /* because of lazy sweep, str may be unmarked already and swept + if (*cur_node) { + const struct fstring *cur = fstring_of(*cur_node); + + /* because of lazy sweep, cur->fstr may be unmarked already and swept * at next time */ - rb_gc_resurrect(*fstr = *key); - return ST_STOP; + rb_gc_resurrect(upd->fstr = cur->fstr); + return RB_IHASH_UPDATED; } + /* create new node */ + str = upd->fstr; if (STR_SHARED_P(str)) { /* str should not be shared */ str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), STR_ENC_GET(str)); @@ -188,42 +213,58 @@ fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existi str = rb_str_new_frozen(str); } RBASIC(str)->flags |= RSTRING_FSTR; + ins = ALLOC(struct fstring); + ins->hashval = upd->hashval; + upd->fstr = ins->fstr = str; + *cur_node = &ins->fstr_node; /* trigger insert */ - *key = *value = *fstr = str; - return ST_CONTINUE; + return RB_IHASH_UPDATED; } VALUE rb_fstring(VALUE str) { - VALUE fstr = Qnil; + struct fstring fstr; Check_Type(str, T_STRING); if (!frozen_strings) - frozen_strings = st_init_table(&fstring_hash_type); + frozen_strings = rb_ihash_new(&fstring_hash_type, 0); if (FL_TEST(str, RSTRING_FSTR)) return str; - st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&fstr); - return fstr; + fstr.fstr = str; + fstr.hashval = rb_str_hash(str); + rb_ihash_update(&frozen_strings, &fstr.fstr_node, fstr_update_callback, 0); + return fstr.fstr; } -static int -fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg) +static enum rb_ihash_next +fstring_set_class_i(struct rb_ihash_node *node, void *arg) { - RBASIC_SET_CLASS((VALUE)key, (VALUE)arg); - return ST_CONTINUE; + VALUE klass = (VALUE)arg; + struct fstring *fstr = fstring_of(node); + + RBASIC_SET_CLASS((VALUE)fstr->fstr, klass); + return RB_IHASH_CONTINUE; } static int -fstring_cmp(VALUE a, VALUE b) +fstring_cmp(const struct rb_ihash_node *na, const struct rb_ihash_node *nb) { - int cmp = rb_str_hash_cmp(a, b); + struct fstring *a = fstring_of(na); + struct fstring *b = fstring_of(nb); + int cmp; + + if (a->hashval != b->hashval) { + return 1; + } + + cmp = rb_str_hash_cmp(a->fstr, b->fstr); if (cmp != 0) { return cmp; } - return ENCODING_GET(b) - ENCODING_GET(a); + return ENCODING_GET(b->fstr) - ENCODING_GET(a->fstr); } static inline int @@ -907,12 +948,29 @@ rb_free_tmp_buffer(volatile VALUE *store) if (s) rb_str_clear(s); } +static void +fstring_delete(VALUE str) +{ + struct fstring fstr; + struct rb_ihash_node *node; + + fstr.fstr = str; + fstr.hashval = rb_str_hash(str); + node = rb_ihash_unlink(frozen_strings, &fstr.fstr_node); + if (node) { + struct fstring *del = fstring_of(node); + xfree(del); + } + else { + rb_bug("RSTRING_FSTR consistency error"); /* do we need this? */ + } +} + void rb_str_free(VALUE str) { if (FL_TEST(str, RSTRING_FSTR)) { - st_data_t fstr = (st_data_t)str; - st_delete(frozen_strings, &fstr, NULL); + fstring_delete(str); } if (!STR_EMBED_P(str) && !FL_TEST(str, STR_SHARED)) { @@ -8841,5 +8899,5 @@ Init_String(void) rb_define_method(rb_cSymbol, "encoding", sym_encoding, 0); if (frozen_strings) - st_foreach(frozen_strings, fstring_set_class_i, rb_cString); + rb_ihash_foreach(&frozen_strings, fstring_set_class_i, (void *)rb_cString); } -- 1.9.0.rc3.13.gda73b5f