Feature #2447 ยป symbol_table_without_string.patch
parse.y | ||
---|---|---|
#define ENABLE_SELECTOR_NAMESPACE 0
|
||
#endif
|
||
struct sym_entry {
|
||
rb_encoding *enc;
|
||
int only_7bit_and_len;
|
||
const char *name;
|
||
};
|
||
#define SYM_INFO_LEN(obj) ((obj)->only_7bit_and_len >> 1)
|
||
#define SYM_INFO_ONLY_7BIT(obj) ((obj)->only_7bit_and_len & 1)
|
||
static st_index_t
|
||
sym_hash(struct sym_entry *sym)
|
||
{
|
||
return rb_memhash(sym->name, SYM_INFO_LEN(sym)) ^ (SYM_INFO_ONLY_7BIT(sym) ? 0 : (VALUE)sym->enc);
|
||
}
|
||
static inline int
|
||
sym_comparable(struct sym_entry *sym1, struct sym_entry *sym2)
|
||
{
|
||
if (SYM_INFO_LEN(sym1) == 0) return TRUE;
|
||
if (SYM_INFO_LEN(sym2) == 0) return TRUE;
|
||
if (sym1->enc == sym2->enc) return TRUE;
|
||
if (SYM_INFO_ONLY_7BIT(sym1)) {
|
||
if (SYM_INFO_ONLY_7BIT(sym2)) return TRUE;
|
||
if (rb_enc_asciicompat(sym2->enc))
|
||
return TRUE;
|
||
}
|
||
if (SYM_INFO_ONLY_7BIT(sym2)) {
|
||
if (rb_enc_asciicompat(sym1->enc))
|
||
return TRUE;
|
||
}
|
||
return FALSE;
|
||
}
|
||
static int
|
||
sym_hash_cmp(struct sym_entry *sym1, struct sym_entry *sym2)
|
||
{
|
||
long len;
|
||
if (!sym_comparable(sym1, sym2)) return 1;
|
||
if (SYM_INFO_LEN(sym1) == SYM_INFO_LEN(sym2) &&
|
||
memcmp(sym1->name, sym2->name, SYM_INFO_LEN(sym1)) == 0) {
|
||
return 0;
|
||
}
|
||
return 1;
|
||
}
|
||
static struct symbols {
|
||
ID last_id;
|
||
st_table *sym_id;
|
||
... | ... | |
st_table *ivar2_id;
|
||
st_table *id_ivar2;
|
||
#endif
|
||
VALUE op_sym[tLAST_TOKEN];
|
||
struct sym_entry op_sym[tLAST_TOKEN];
|
||
} global_symbols = {tLAST_ID};
|
||
static const struct st_hash_type symhash = {
|
||
rb_str_hash_cmp,
|
||
rb_str_hash,
|
||
sym_hash_cmp,
|
||
sym_hash,
|
||
};
|
||
#if ENABLE_SELECTOR_NAMESPACE
|
||
... | ... | |
void
|
||
rb_gc_mark_symbols(void)
|
||
{
|
||
rb_mark_tbl(global_symbols.id_str);
|
||
rb_gc_mark_locations(global_symbols.op_sym,
|
||
global_symbols.op_sym + tLAST_TOKEN);
|
||
}
|
||
#endif /* !RIPPER */
|
||
... | ... | |
return m == e;
|
||
}
|
||
static inline void
|
||
setup_sym_entry(struct sym_entry *sym, const char *name, long len, rb_encoding *enc)
|
||
{
|
||
int ascii = rb_coderange_scan(name, len, enc) == ENC_CODERANGE_7BIT;
|
||
sym->enc = enc;
|
||
sym->name = name;
|
||
sym->only_7bit_and_len = (len << 1) | ascii;
|
||
}
|
||
static ID
|
||
register_symid(ID id, const char *name, long len, rb_encoding *enc)
|
||
{
|
||
VALUE str = rb_enc_str_new(name, len, enc);
|
||
OBJ_FREEZE(str);
|
||
st_add_direct(global_symbols.sym_id, (st_data_t)str, id);
|
||
st_add_direct(global_symbols.id_str, id, (st_data_t)str);
|
||
struct sym_entry *sym = ALLOC(struct sym_entry);
|
||
char *buf = ALLOC_N(char, len + 1);
|
||
memcpy(buf, name, len);
|
||
buf[len] = 0;
|
||
setup_sym_entry(sym, buf, len, enc);
|
||
sym->name = buf;
|
||
st_add_direct(global_symbols.sym_id, (st_data_t)sym, id);
|
||
st_add_direct(global_symbols.id_str, id, (st_data_t)sym);
|
||
return id;
|
||
}
|
||
... | ... | |
const char *m = name;
|
||
const char *e = m + len;
|
||
unsigned char c;
|
||
VALUE str;
|
||
ID id;
|
||
long last;
|
||
int mb;
|
||
struct RString fake_str;
|
||
fake_str.basic.flags = T_STRING|RSTRING_NOEMBED|FL_FREEZE;
|
||
fake_str.basic.klass = rb_cString;
|
||
fake_str.as.heap.len = len;
|
||
fake_str.as.heap.ptr = (char *)name;
|
||
fake_str.as.heap.aux.capa = len;
|
||
str = (VALUE)&fake_str;
|
||
rb_enc_associate(str, enc);
|
||
if (st_lookup(global_symbols.sym_id, str, (st_data_t *)&id))
|
||
struct sym_entry sym;
|
||
setup_sym_entry(&sym, name, len, enc);
|
||
if (st_lookup(global_symbols.sym_id, (st_data_t)&sym, (st_data_t *)&id))
|
||
return id;
|
||
if (rb_cString && !rb_enc_asciicompat(enc)) {
|
||
... | ... | |
return id;
|
||
}
|
||
VALUE
|
||
rb_id2str(ID id)
|
||
static struct sym_entry *
|
||
find_sym_entry(ID id)
|
||
{
|
||
st_data_t data;
|
||
... | ... | |
int i = 0;
|
||
if (id < INT_MAX && rb_ispunct((int)id)) {
|
||
VALUE str = global_symbols.op_sym[i = (int)id];
|
||
if (!str) {
|
||
struct sym_entry *sym = &global_symbols.op_sym[i = (int)id];
|
||
if (!sym->name) {
|
||
char name[2];
|
||
name[0] = (char)id;
|
||
name[1] = 0;
|
||
str = rb_usascii_str_new(name, 1);
|
||
OBJ_FREEZE(str);
|
||
global_symbols.op_sym[i] = str;
|
||
setup_sym_entry(sym, name, 1, rb_usascii_encoding());
|
||
sym->name = strdup(name);
|
||
}
|
||
return str;
|
||
return sym;
|
||
}
|
||
for (i = 0; i < op_tbl_count; i++) {
|
||
if (op_tbl[i].token == id) {
|
||
VALUE str = global_symbols.op_sym[i];
|
||
if (!str) {
|
||
str = rb_usascii_str_new2(op_tbl[i].name);
|
||
OBJ_FREEZE(str);
|
||
global_symbols.op_sym[i] = str;
|
||
struct sym_entry *sym = &global_symbols.op_sym[i];
|
||
if (!sym->name) {
|
||
const char *name = op_tbl[i].name;
|
||
setup_sym_entry(sym, name, strlen(name), rb_usascii_encoding());
|
||
}
|
||
return str;
|
||
return sym;
|
||
}
|
||
}
|
||
}
|
||
if (st_lookup(global_symbols.id_str, id, &data)) {
|
||
VALUE str = (VALUE)data;
|
||
if (RBASIC(str)->klass == 0)
|
||
RBASIC(str)->klass = rb_cString;
|
||
return str;
|
||
return (struct sym_entry *)data;
|
||
}
|
||
if (is_attrset_id(id)) {
|
||
... | ... | |
rb_str_cat(str, "=", 1);
|
||
rb_intern_str(str);
|
||
if (st_lookup(global_symbols.id_str, id, &data)) {
|
||
VALUE str = (VALUE)data;
|
||
if (RBASIC(str)->klass == 0)
|
||
RBASIC(str)->klass = rb_cString;
|
||
return str;
|
||
return (struct sym_entry *)data;
|
||
}
|
||
}
|
||
return 0;
|
||
}
|
||
VALUE
|
||
rb_id2str(ID id)
|
||
{
|
||
struct sym_entry *sym = find_sym_entry(id);
|
||
if (!sym) return 0;
|
||
return rb_enc_str_new(sym->name, SYM_INFO_LEN(sym), sym->enc);
|
||
}
|
||
const char *
|
||
rb_id2name(ID id)
|
||
{
|
||
VALUE str = rb_id2str(id);
|
||
struct sym_entry *sym = find_sym_entry(id);
|
||
if (!str) return 0;
|
||
return RSTRING_PTR(str);
|
||
if (!sym) return 0;
|
||
return sym->name;
|
||
}
|
||
static int
|
string.c | ||
---|---|---|
return NULL;
|
||
}
|
||
static int
|
||
coderange_scan(const char *p, long len, rb_encoding *enc)
|
||
int
|
||
rb_coderange_scan(const char *p, long len, rb_encoding *enc)
|
||
{
|
||
const char *e = p + len;
|
||
... | ... | |
if (cr == ENC_CODERANGE_UNKNOWN) {
|
||
rb_encoding *enc = STR_ENC_GET(str);
|
||
cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
|
||
cr = rb_coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
|
||
ENC_CODERANGE_SET(str, cr);
|
||
}
|
||
return cr;
|
||
... | ... | |
ptr_cr = ENC_CODERANGE_UNKNOWN;
|
||
}
|
||
else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
|
||
ptr_cr = coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
|
||
ptr_cr = rb_coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
|
||
}
|
||
}
|
||
else {
|
||
... | ... | |
goto incompatible;
|
||
}
|
||
if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
|
||
ptr_cr = coderange_scan(ptr, len, ptr_enc);
|
||
ptr_cr = rb_coderange_scan(ptr, len, ptr_enc);
|
||
}
|
||
if (str_cr == ENC_CODERANGE_UNKNOWN) {
|
||
if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) {
|
||
... | ... | |
if (!enc) {
|
||
rb_encoding *str_enc = STR_ENC_GET(str);
|
||
p = RSTRING_PTR(str); len = RSTRING_LEN(str);
|
||
if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
|
||
coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
|
||
if (rb_coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
|
||
rb_coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
|
||
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
|
||
rb_enc_name(str_enc),
|
||
rb_enc_name(STR_ENC_GET(repl)));
|