Bug #9171

[patch] use fstrings for symbol table

Added by Aman Gupta 5 months ago. Updated 5 months ago.

[ruby-core:58656]
Status:Closed
Priority:Normal
Assignee:Nobuyoshi Nakada
Category:core
Target version:2.1.0
ruby -v:ruby 2.1.0dev (2013-11-27 trunk 43880) Backport:1.9.3: UNKNOWN, 2.0.0: UNKNOWN

Description

Here is a simple patch to use fstrings for the table backing symbols.

Unfortunately it causes a segfault in test/rdoc/testrdocparser_ruby.rb. Maybe someone wants to investigate.

diff --git a/parse.y b/parse.y
index 8207ad7..b1f3112 100644
--- a/parse.y
+++ b/parse.y
@@ -10333,7 +10333,7 @@ registersymid(ID id, const char *name, long len, rbencoding *enc)
static ID
registersymidstr(ID id, VALUE str)
{
- OBJFREEZE(str);
+ str = rb
fstring(str);

 if (RUBY_DTRACE_SYMBOL_CREATE_ENABLED()) {
RUBY_DTRACE_SYMBOL_CREATE(RSTRING_PTR(str), rb_sourcefile(), rb_sourceline());

diff --git a/string.c b/string.c
index 231bb2f..6ae33e3 100644
--- a/string.c
+++ b/string.c
@@ -138,6 +138,9 @@ rbfstring(VALUE str)
st
datat fstr;
Check
Type(str, T_STRING);

  • if (!frozen_strings)
  • frozenstrings = stinittable(&fstringhashtype); + if (FLTEST(str, RSTRING_FSTR)) return str;

@@ -8707,8 +8710,6 @@ InitString(void)
#undef rb
intern
#define rbintern(str) rbintern_const(str)

- frozenstrings = stinittable(&fstringhash_type);

 rb_cString  = rb_define_class("String", rb_cObject);
 rb_include_module(rb_cString, rb_mComparable);
 rb_define_alloc_func(rb_cString, empty_str_alloc);

Associated revisions

Revision 44057
Added by Aman Gupta 5 months ago

parse.y: use rb_fstring() for strings stored in the symbol table

  • parse.y (registersymidstr): use fstrings in symbol table [Bug #9171]
  • parse.y (rb_id2str): ditto
  • string.c (rbfstring): create frozenstrings on first usage. this allows rb_fstring() calls from the parser (before cString is created)
  • string.c (fstringsetclass_i): set klass on fstrings generated before cString was defined
  • string.c (InitString): convert frozenstrings table to String objects after boot
  • ext/-test-/symbol/type.c (bugsymid2str): expose rb_id2str()
  • test/-ext-/symbol/testtype.rb (module TestSymbol): verify symbol table entries are fstrings

History

#1 Updated by Hiroshi SHIBATA 5 months ago

  • Assignee set to Koichi Sasada

ko1: Could you review this?

#2 Updated by Koichi Sasada 5 months ago

  • Category set to core
  • Assignee changed from Koichi Sasada to Nobuyoshi Nakada

Nobu, could you check it?

#3 Updated by Andrew Vit 5 months ago

Would this allow the symbol table to be GC'd or is that a separate issue?

#4 Updated by Nobuyoshi Nakada 5 months ago

different.

#5 Updated by Nobuyoshi Nakada 5 months ago

=begin
Seems my reply hasn't caught by the ITS...

(13/11/28 16:55), tmm1 (Aman Gupta) wrote:
diff --git a/parse.y b/parse.y
index 8207ad7..b1f3112 100644
--- a/parse.y
+++ b/parse.y
@@ -10333,7 +10333,7 @@ registersymid(ID id, const char *name, long len, rbencoding *enc)
static ID
registersymidstr(ID id, VALUE str)
{
- OBJFREEZE(str);
+ str = rb
fstring(str);

At this point, (({rbcString})) is not created yet.
And unfrozen string will be duplicated in (({rb
fstring()})).

diff --git i/parse.y w/parse.y
index 8207ad7..9f580e6 100644
--- i/parse.y
+++ w/parse.y
@@ -10334,6 +10334,7 @@ static ID
registersymidstr(ID id, VALUE str)
{
OBJFREEZE(str);
+ str = rb
fstring(str);

  if (RUBY_DTRACE_SYMBOL_CREATE_ENABLED()) {
RUBY_DTRACE_SYMBOL_CREATE(RSTRING_PTR(str), rb_sourcefile(), rb_sourceline());

diff --git i/string.c w/string.c
index 151170c..ec43af7 100644
--- i/string.c
+++ w/string.c
@@ -165,6 +165,9 @@ rbfstring(VALUE str)
VALUE fstr = Qnil;
Check
Type(str, T_STRING);

  • if (!frozen_strings)
  • frozenstrings = stinittable(&fstringhashtype);
    +
    if (FL
    TEST(str, RSTRING_FSTR))
    return str;

    @@ -173,6 +176,13 @@ rb_fstring(VALUE str)
    }

    static int
    +fstringsetclassi(stdatat key, stdatat val, stdata_t arg)
    +{

  • RBASICSETCLASS((VALUE)key, (VALUE)arg);

  • return STCONTINUE;
    +}
    +
    +static int
    fstring
    cmp(VALUE a, VALUE b)
    {
    int cmp = rbstrhashcmp(a, b);
    @@ -8718,8 +8728,6 @@ Init
    String(void)
    #undef rbintern
    #define rb
    intern(str) rbinternconst(str)

  • frozenstrings = stinittable(&fstringhash_type);

    rbcString = rbdefineclass("String", rbcObject);
    rbincludemodule(rbcString, rbmComparable);
    rbdefineallocfunc(rbcString, emptystralloc);
    @@ -8891,4 +8899,6 @@ InitString(void)
    rb
    definemethod(rbcSymbol, "swapcase", sym_swapcase, 0);

    rbdefinemethod(rbcSymbol, "encoding", symencoding, 0);
    +

  • stforeach(frozenstrings, fstringsetclassi, rbcString);
    }

=end

#6 Updated by Aman Gupta 5 months ago

Ah great! Thanks for finding my bug.

With your patch, long-lived strings on our rails app heap are reduced by 6000. I think we should merge it.

#7 Updated by Aman Gupta 5 months ago

  • Status changed from Open to Closed
  • % Done changed from 0 to 100

This issue was solved with changeset r44057.
Aman, thank you for reporting this issue.
Your contribution to Ruby is greatly appreciated.
May Ruby be with you.


parse.y: use rb_fstring() for strings stored in the symbol table

  • parse.y (registersymidstr): use fstrings in symbol table [Bug #9171]
  • parse.y (rb_id2str): ditto
  • string.c (rbfstring): create frozenstrings on first usage. this allows rb_fstring() calls from the parser (before cString is created)
  • string.c (fstringsetclass_i): set klass on fstrings generated before cString was defined
  • string.c (InitString): convert frozenstrings table to String objects after boot
  • ext/-test-/symbol/type.c (bugsymid2str): expose rb_id2str()
  • test/-ext-/symbol/testtype.rb (module TestSymbol): verify symbol table entries are fstrings

Also available in: Atom PDF