Project

General

Profile

Feature #9614 ยป 0003-parse.y-switch-to-ihash-saves-200K-out-of-the-box.patch

normalperson (Eric Wong), 03/09/2014 02:22 AM

View differences:

parse.y
22 22
#include "ruby/st.h"
23 23
#include "ruby/encoding.h"
24 24
#include "internal.h"
25
#include "ihash.h"
25 26
#include "node.h"
26 27
#include "parse.h"
27 28
#include "id.h"
......
10103 10104

  
10104 10105
static struct symbols {
10105 10106
    ID last_id;
10106
    st_table *sym_id;
10107
    st_table *id_str;
10107
    struct rb_ihash_tbl *sym_id;
10108
    struct rb_ihash_tbl *id_str;
10108 10109
#if ENABLE_SELECTOR_NAMESPACE
10109 10110
    st_table *ivar2_id;
10110 10111
    st_table *id_ivar2;
......
10113 10114
    int minor_marked;
10114 10115
} global_symbols = {tLAST_TOKEN};
10115 10116

  
10116
static const struct st_hash_type symhash = {
10117
    rb_str_hash_cmp,
10118
    rb_str_hash,
10117
/* this struct is stored in both sym_id and id_str tables */
10118
struct rb_idsym {
10119
    struct rb_ihash_node id_str_node;
10120
    ID id;
10121
    struct rb_ihash_node sym_id_node;
10122
    VALUE symstr;
10123
    st_index_t hashval;
10124
};
10125

  
10126
static inline struct rb_idsym *
10127
rb_idsym_of_s(const struct rb_ihash_node *node)
10128
{
10129
    return RB_CONTAINER_OF(node, struct rb_idsym, sym_id_node);
10130
}
10131

  
10132
static int
10133
sym_id_cmp(const struct rb_ihash_node *n1, const struct rb_ihash_node *n2)
10134
{
10135
    struct rb_idsym *is1 = rb_idsym_of_s(n1);
10136
    struct rb_idsym *is2 = rb_idsym_of_s(n2);
10137

  
10138
    if (is1->hashval == is2->hashval) {
10139
	return rb_str_hash_cmp(is1->symstr, is2->symstr);
10140
    }
10141
    return 1;
10142
}
10143

  
10144
static st_index_t
10145
sym_id_hash(const struct rb_ihash_node *node)
10146
{
10147
    struct rb_idsym *is = rb_idsym_of_s(node);
10148

  
10149
    return is->hashval; /* needs to be precomputed */
10150
}
10151

  
10152
static inline struct rb_idsym *
10153
rb_idsym_of_i(const struct rb_ihash_node *node)
10154
{
10155
    return RB_CONTAINER_OF(node, struct rb_idsym, id_str_node);
10156
}
10157

  
10158
static int
10159
id_str_cmp(const struct rb_ihash_node *n1, const struct rb_ihash_node *n2)
10160
{
10161
    return rb_idsym_of_i(n1)->id != rb_idsym_of_i(n2)->id;
10162
}
10163

  
10164
static st_index_t
10165
id_str_hash(const struct rb_ihash_node *node)
10166
{
10167
    return rb_idsym_of_i(node)->id;
10168
}
10169

  
10170
static int
10171
id_str_lookup(ID id, VALUE *str)
10172
{
10173
    struct rb_idsym finder;
10174
    struct rb_ihash_node *node;
10175

  
10176
    finder.id = id;
10177
    node = rb_ihash_lookup(global_symbols.id_str, &finder.id_str_node);
10178

  
10179
    if (node) {
10180
	*str = rb_idsym_of_i(node)->symstr;
10181
	return 1;
10182
    }
10183
    return 0;
10184
}
10185

  
10186
static int
10187
sym_id_lookup(VALUE str, ID *id)
10188
{
10189
    struct rb_idsym finder;
10190
    struct rb_ihash_node *node;
10191

  
10192
    finder.symstr = str;
10193
    finder.hashval = rb_str_hash(str);
10194
    node = rb_ihash_lookup(global_symbols.sym_id, &finder.sym_id_node);
10195

  
10196
    if (node) {
10197
	*id = rb_idsym_of_s(node)->id;
10198
	return 1;
10199
    }
10200
    return 0;
10201
}
10202

  
10203
static const struct rb_ihash_type sym_id_hash_type = {
10204
    sym_id_cmp,
10205
    sym_id_hash,
10206
};
10207

  
10208
static const struct rb_ihash_type id_str_hash_type = {
10209
    id_str_cmp,
10210
    id_str_hash,
10119 10211
};
10120 10212

  
10121 10213
#if ENABLE_SELECTOR_NAMESPACE
......
10148 10240
void
10149 10241
Init_sym(void)
10150 10242
{
10151
    global_symbols.sym_id = st_init_table_with_size(&symhash, 1000);
10152
    global_symbols.id_str = st_init_numtable_with_size(1000);
10243
    global_symbols.sym_id = rb_ihash_new(&sym_id_hash_type, 10);
10244
    global_symbols.id_str = rb_ihash_new(&id_str_hash_type, 10);
10153 10245
#if ENABLE_SELECTOR_NAMESPACE
10154 10246
    global_symbols.ivar2_id = st_init_table_with_size(&ivar2_hash_type, 1000);
10155 10247
    global_symbols.id_ivar2 = st_init_numtable_with_size(1000);
......
10164 10256
    Init_id();
10165 10257
}
10166 10258

  
10259
static enum rb_ihash_next
10260
mark_symstr(struct rb_ihash_node *id_str_node, void *unused)
10261
{
10262
    struct rb_idsym *is = rb_idsym_of_i(id_str_node);
10263
    rb_gc_mark(is->symstr);
10264
    return RB_IHASH_CONTINUE;
10265
}
10266

  
10167 10267
void
10168 10268
rb_gc_mark_symbols(int full_mark)
10169 10269
{
10170 10270
    if (full_mark || global_symbols.minor_marked == 0) {
10171
	rb_mark_tbl(global_symbols.id_str);
10271
	if (global_symbols.id_str) {
10272
	    rb_ihash_foreach(&global_symbols.id_str, mark_symstr, 0);
10273
	}
10172 10274
	rb_gc_mark_locations(global_symbols.op_sym,
10173 10275
			     global_symbols.op_sym + numberof(global_symbols.op_sym));
10174 10276

  
......
10351 10453
static ID
10352 10454
register_symid_str(ID id, VALUE str)
10353 10455
{
10456
    struct rb_idsym *is = ALLOC(struct rb_idsym);
10457

  
10354 10458
    OBJ_FREEZE(str);
10355 10459
    str = rb_fstring(str);
10356 10460

  
......
10358 10462
	RUBY_DTRACE_SYMBOL_CREATE(RSTRING_PTR(str), rb_sourcefile(), rb_sourceline());
10359 10463
    }
10360 10464

  
10361
    st_add_direct(global_symbols.sym_id, (st_data_t)str, id);
10362
    st_add_direct(global_symbols.id_str, id, (st_data_t)str);
10465
    is->hashval = rb_str_hash(str);
10466
    is->symstr = str;
10467
    is->id = id;
10468
    rb_ihash_add_direct(&global_symbols.sym_id, &is->sym_id_node);
10469
    rb_ihash_add_direct(&global_symbols.id_str, &is->id_str_node);
10363 10470
    global_symbols.minor_marked = 0;
10364 10471
    return id;
10365 10472
}
......
10398 10505
ID
10399 10506
rb_intern3(const char *name, long len, rb_encoding *enc)
10400 10507
{
10401
    st_data_t data;
10508
    ID id;
10402 10509
    struct RString fake_str;
10403 10510
    VALUE str = setup_fake_str(&fake_str, name, len);
10404 10511
    rb_enc_associate(str, enc);
10405 10512
    OBJ_FREEZE(str);
10406 10513

  
10407
    if (st_lookup(global_symbols.sym_id, str, &data))
10408
	return (ID)data;
10514
    if (sym_id_lookup(str, &id))
10515
	return id;
10409 10516

  
10410 10517
    str = rb_enc_str_new(name, len, enc); /* make true string */
10411 10518
    return intern_str(str);
......
10550 10657
ID
10551 10658
rb_intern_str(VALUE str)
10552 10659
{
10553
    st_data_t id;
10660
    ID id;
10554 10661

  
10555
    if (st_lookup(global_symbols.sym_id, str, &id))
10556
	return (ID)id;
10662
    if (sym_id_lookup(str, &id))
10663
	return id;
10557 10664
    return intern_str(rb_str_dup(str));
10558 10665
}
10559 10666

  
10560 10667
VALUE
10561 10668
rb_id2str(ID id)
10562 10669
{
10563
    st_data_t data;
10670
    VALUE str;
10564 10671

  
10565 10672
    if (id < tLAST_TOKEN) {
10566 10673
	int i = 0;
10567 10674

  
10568 10675
	if (id < INT_MAX && rb_ispunct((int)id)) {
10569
	    VALUE str = global_symbols.op_sym[i = (int)id];
10676
	    str = global_symbols.op_sym[i = (int)id];
10570 10677
	    if (!str) {
10571 10678
		char name[2];
10572 10679
		name[0] = (char)id;
......
10581 10688
	}
10582 10689
	for (i = 0; i < op_tbl_count; i++) {
10583 10690
	    if (op_tbl[i].token == id) {
10584
		VALUE str = global_symbols.op_sym[i];
10691
		str = global_symbols.op_sym[i];
10585 10692
		if (!str) {
10586 10693
		    str = rb_usascii_str_new2(op_tbl[i].name);
10587 10694
		    OBJ_FREEZE(str);
......
10594 10701
	}
10595 10702
    }
10596 10703

  
10597
    if (st_lookup(global_symbols.id_str, id, &data)) {
10598
        VALUE str = (VALUE)data;
10704
    if (id_str_lookup(id, &str)) {
10599 10705
        if (RBASIC(str)->klass == 0)
10600 10706
            RBASIC_SET_CLASS_RAW(str, rb_cString);
10601 10707
	return str;
......
10603 10709

  
10604 10710
    if (is_attrset_id(id)) {
10605 10711
	ID id_stem = (id & ~ID_SCOPE_MASK);
10606
	VALUE str;
10607 10712

  
10608 10713
	do {
10609 10714
	    if (!!(str = rb_id2str(id_stem | ID_LOCAL))) break;
......
10617 10722
	str = rb_str_dup(str);
10618 10723
	rb_str_cat(str, "=", 1);
10619 10724
	register_symid_str(id, str);
10620
	if (st_lookup(global_symbols.id_str, id, &data)) {
10621
            VALUE str = (VALUE)data;
10725
	if (id_str_lookup(id, &str)) {
10622 10726
            if (RBASIC(str)->klass == 0)
10623 10727
                RBASIC_SET_CLASS_RAW(str, rb_cString);
10624 10728
            return str;
......
10642 10746
    return next_id_base() | ID_INTERNAL;
10643 10747
}
10644 10748

  
10645
static int
10646
symbols_i(VALUE sym, ID value, VALUE ary)
10749
static enum rb_ihash_next
10750
symbols_i(struct rb_ihash_node *sym_id_node, void *arg)
10647 10751
{
10648
    rb_ary_push(ary, ID2SYM(value));
10649
    return ST_CONTINUE;
10752
    VALUE ary = (VALUE)arg;
10753
    struct rb_idsym *is = rb_idsym_of_s(sym_id_node);
10754

  
10755
    rb_ary_push(ary, ID2SYM(is->id));
10756
    return RB_IHASH_CONTINUE;
10650 10757
}
10651 10758

  
10652 10759
/*
......
10670 10777
{
10671 10778
    VALUE ary = rb_ary_new2(global_symbols.sym_id->num_entries);
10672 10779

  
10673
    st_foreach(global_symbols.sym_id, symbols_i, ary);
10780
    rb_ihash_foreach(&global_symbols.sym_id, symbols_i, (void *)ary);
10674 10781
    return ary;
10675 10782
}
10676 10783

  
......
10730 10837
ID
10731 10838
rb_check_id(volatile VALUE *namep)
10732 10839
{
10733
    st_data_t id;
10840
    ID id;
10734 10841
    VALUE tmp;
10735 10842
    VALUE name = *namep;
10736 10843

  
......
10750 10857

  
10751 10858
    sym_check_asciionly(name);
10752 10859

  
10753
    if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id))
10754
	return (ID)id;
10860
    if (sym_id_lookup(name, &id))
10861
	return id;
10755 10862

  
10756 10863
    if (rb_is_attrset_name(name)) {
10757 10864
	struct RString fake_str;
......
10760 10867
	rb_enc_copy(localname, name);
10761 10868
	OBJ_FREEZE(localname);
10762 10869

  
10763
	if (st_lookup(global_symbols.sym_id, (st_data_t)localname, &id)) {
10870
	if (sym_id_lookup(localname, &id)) {
10764 10871
	    return rb_id_attrset((ID)id);
10765 10872
	}
10766 10873
	RB_GC_GUARD(name);
......
10772 10879
ID
10773 10880
rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
10774 10881
{
10775
    st_data_t id;
10882
    ID id;
10776 10883
    struct RString fake_str;
10777 10884
    const VALUE name = setup_fake_str(&fake_str, ptr, len);
10778 10885
    rb_enc_associate(name, enc);
10779 10886

  
10780 10887
    sym_check_asciionly(name);
10781 10888

  
10782
    if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id))
10783
	return (ID)id;
10889
    if (sym_id_lookup(name, &id))
10890
	return id;
10784 10891

  
10785 10892
    if (rb_is_attrset_name(name)) {
10786 10893
	fake_str.as.heap.len = len - 1;
10787
	if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id)) {
10788
	    return rb_id_attrset((ID)id);
10894
	if (sym_id_lookup(name, &id)) {
10895
	    return rb_id_attrset(id);
10789 10896
	}
10790 10897
    }
10791 10898

  
10792
-