Project

General

Profile

Feature #2447 ยป symbol_table_without_string.patch

mame (Yusuke Endoh), 12/06/2009 11:24 PM

View differences:

parse.y
#define ENABLE_SELECTOR_NAMESPACE 0
#endif
struct sym_entry {
rb_encoding *enc;
int only_7bit_and_len;
const char *name;
};
#define SYM_INFO_LEN(obj) ((obj)->only_7bit_and_len >> 1)
#define SYM_INFO_ONLY_7BIT(obj) ((obj)->only_7bit_and_len & 1)
static st_index_t
sym_hash(struct sym_entry *sym)
{
return rb_memhash(sym->name, SYM_INFO_LEN(sym)) ^ (SYM_INFO_ONLY_7BIT(sym) ? 0 : (VALUE)sym->enc);
}
static inline int
sym_comparable(struct sym_entry *sym1, struct sym_entry *sym2)
{
if (SYM_INFO_LEN(sym1) == 0) return TRUE;
if (SYM_INFO_LEN(sym2) == 0) return TRUE;
if (sym1->enc == sym2->enc) return TRUE;
if (SYM_INFO_ONLY_7BIT(sym1)) {
if (SYM_INFO_ONLY_7BIT(sym2)) return TRUE;
if (rb_enc_asciicompat(sym2->enc))
return TRUE;
}
if (SYM_INFO_ONLY_7BIT(sym2)) {
if (rb_enc_asciicompat(sym1->enc))
return TRUE;
}
return FALSE;
}
static int
sym_hash_cmp(struct sym_entry *sym1, struct sym_entry *sym2)
{
long len;
if (!sym_comparable(sym1, sym2)) return 1;
if (SYM_INFO_LEN(sym1) == SYM_INFO_LEN(sym2) &&
memcmp(sym1->name, sym2->name, SYM_INFO_LEN(sym1)) == 0) {
return 0;
}
return 1;
}
static struct symbols {
ID last_id;
st_table *sym_id;
......
st_table *ivar2_id;
st_table *id_ivar2;
#endif
VALUE op_sym[tLAST_TOKEN];
struct sym_entry op_sym[tLAST_TOKEN];
} global_symbols = {tLAST_ID};
static const struct st_hash_type symhash = {
rb_str_hash_cmp,
rb_str_hash,
sym_hash_cmp,
sym_hash,
};
#if ENABLE_SELECTOR_NAMESPACE
......
void
rb_gc_mark_symbols(void)
{
rb_mark_tbl(global_symbols.id_str);
rb_gc_mark_locations(global_symbols.op_sym,
global_symbols.op_sym + tLAST_TOKEN);
}
#endif /* !RIPPER */
......
return m == e;
}
static inline void
setup_sym_entry(struct sym_entry *sym, const char *name, long len, rb_encoding *enc)
{
int ascii = rb_coderange_scan(name, len, enc) == ENC_CODERANGE_7BIT;
sym->enc = enc;
sym->name = name;
sym->only_7bit_and_len = (len << 1) | ascii;
}
static ID
register_symid(ID id, const char *name, long len, rb_encoding *enc)
{
VALUE str = rb_enc_str_new(name, len, enc);
OBJ_FREEZE(str);
st_add_direct(global_symbols.sym_id, (st_data_t)str, id);
st_add_direct(global_symbols.id_str, id, (st_data_t)str);
struct sym_entry *sym = ALLOC(struct sym_entry);
char *buf = ALLOC_N(char, len + 1);
memcpy(buf, name, len);
buf[len] = 0;
setup_sym_entry(sym, buf, len, enc);
sym->name = buf;
st_add_direct(global_symbols.sym_id, (st_data_t)sym, id);
st_add_direct(global_symbols.id_str, id, (st_data_t)sym);
return id;
}
......
const char *m = name;
const char *e = m + len;
unsigned char c;
VALUE str;
ID id;
long last;
int mb;
struct RString fake_str;
fake_str.basic.flags = T_STRING|RSTRING_NOEMBED|FL_FREEZE;
fake_str.basic.klass = rb_cString;
fake_str.as.heap.len = len;
fake_str.as.heap.ptr = (char *)name;
fake_str.as.heap.aux.capa = len;
str = (VALUE)&fake_str;
rb_enc_associate(str, enc);
if (st_lookup(global_symbols.sym_id, str, (st_data_t *)&id))
struct sym_entry sym;
setup_sym_entry(&sym, name, len, enc);
if (st_lookup(global_symbols.sym_id, (st_data_t)&sym, (st_data_t *)&id))
return id;
if (rb_cString && !rb_enc_asciicompat(enc)) {
......
return id;
}
VALUE
rb_id2str(ID id)
static struct sym_entry *
find_sym_entry(ID id)
{
st_data_t data;
......
int i = 0;
if (id < INT_MAX && rb_ispunct((int)id)) {
VALUE str = global_symbols.op_sym[i = (int)id];
if (!str) {
struct sym_entry *sym = &global_symbols.op_sym[i = (int)id];
if (!sym->name) {
char name[2];
name[0] = (char)id;
name[1] = 0;
str = rb_usascii_str_new(name, 1);
OBJ_FREEZE(str);
global_symbols.op_sym[i] = str;
setup_sym_entry(sym, name, 1, rb_usascii_encoding());
sym->name = strdup(name);
}
return str;
return sym;
}
for (i = 0; i < op_tbl_count; i++) {
if (op_tbl[i].token == id) {
VALUE str = global_symbols.op_sym[i];
if (!str) {
str = rb_usascii_str_new2(op_tbl[i].name);
OBJ_FREEZE(str);
global_symbols.op_sym[i] = str;
struct sym_entry *sym = &global_symbols.op_sym[i];
if (!sym->name) {
const char *name = op_tbl[i].name;
setup_sym_entry(sym, name, strlen(name), rb_usascii_encoding());
}
return str;
return sym;
}
}
}
if (st_lookup(global_symbols.id_str, id, &data)) {
VALUE str = (VALUE)data;
if (RBASIC(str)->klass == 0)
RBASIC(str)->klass = rb_cString;
return str;
return (struct sym_entry *)data;
}
if (is_attrset_id(id)) {
......
rb_str_cat(str, "=", 1);
rb_intern_str(str);
if (st_lookup(global_symbols.id_str, id, &data)) {
VALUE str = (VALUE)data;
if (RBASIC(str)->klass == 0)
RBASIC(str)->klass = rb_cString;
return str;
return (struct sym_entry *)data;
}
}
return 0;
}
VALUE
rb_id2str(ID id)
{
struct sym_entry *sym = find_sym_entry(id);
if (!sym) return 0;
return rb_enc_str_new(sym->name, SYM_INFO_LEN(sym), sym->enc);
}
const char *
rb_id2name(ID id)
{
VALUE str = rb_id2str(id);
struct sym_entry *sym = find_sym_entry(id);
if (!str) return 0;
return RSTRING_PTR(str);
if (!sym) return 0;
return sym->name;
}
static int
string.c
return NULL;
}
static int
coderange_scan(const char *p, long len, rb_encoding *enc)
int
rb_coderange_scan(const char *p, long len, rb_encoding *enc)
{
const char *e = p + len;
......
if (cr == ENC_CODERANGE_UNKNOWN) {
rb_encoding *enc = STR_ENC_GET(str);
cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
cr = rb_coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
ENC_CODERANGE_SET(str, cr);
}
return cr;
......
ptr_cr = ENC_CODERANGE_UNKNOWN;
}
else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
ptr_cr = coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
ptr_cr = rb_coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
}
}
else {
......
goto incompatible;
}
if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
ptr_cr = coderange_scan(ptr, len, ptr_enc);
ptr_cr = rb_coderange_scan(ptr, len, ptr_enc);
}
if (str_cr == ENC_CODERANGE_UNKNOWN) {
if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) {
......
if (!enc) {
rb_encoding *str_enc = STR_ENC_GET(str);
p = RSTRING_PTR(str); len = RSTRING_LEN(str);
if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
if (rb_coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
rb_coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
rb_enc_name(str_enc),
rb_enc_name(STR_ENC_GET(repl)));
    (1-1/1)