Bug #8148

[patch] reduce allocations due to __FILE__ and {class,module}_eval

Added by Aman Gupta about 1 year ago. Updated about 1 year ago.

[ruby-core:53635]
Status:Rejected
Priority:Normal
Assignee:Aman Gupta
Category:-
Target version:-
ruby -v:ruby 2.1.0dev (2013-03-22 trunk 39875) [x86_64-darwin12.2.1] Backport:

Description

In my application, I noticed many long lived "(eval)" strings on the heap.

I also noticed many copies of filename strings when using moduleeval(.., _FILE_, ..). Attached patch adds a new NODEFILE to re-use the original path string.

./ruby -Ilib -rpp -rforwardable -e '

module Test

extend Forwardable
def_delegators :@test, *("a".."z")
end

GC.start
strings = ObjectSpace.each_object(String).select{ |s| s.frozen? && s =~ /forwardable/ }
p strings.size
pp strings.inject(Hash.new 0){ |h,s| h[s]+=1; h }
'

before:

33
{"/Users/test/code/ruby-trunk/lib/forwardable.rb"=>31,
 "forwardable"=>1,
 "/Users/test/code/ruby\\-trunk/lib/forwardable\\.rb"=>1}

after:

4
{"/Users/test/code/ruby-trunk/lib/forwardable.rb"=>2,
 "forwardable"=>1,
 "/Users/test/code/ruby\\-trunk/lib/forwardable\\.rb"=>1}

diff --git a/compile.c b/compile.c
index 9360f5b..54733d9 100644
--- a/compile.c
+++ b/compile.c
@@ -5149,6 +5149,12 @@ iseqcompileeach(rbiseqt *iseq, LINKANCHOR *ret, NODE * node, int poped)
}
break;
}
+ case NODE
FILE:{
+ if (!poped) {
+ ADDINSN1(ret, line, putobject, iseq->location.path);
+ }
+ break;
+ }
case NODE
ERRINFO:{
if (!poped) {
if (iseq->type == ISEQTYPERESCUE) {
diff --git a/ext/objspace/objspace.c b/ext/objspace/objspace.c
index 720c918..2f32208 100644
--- a/ext/objspace/objspace.c
+++ b/ext/objspace/objspace.c
@@ -530,6 +530,7 @@ countnodes(int argc, VALUE *argv, VALUE os)
COUNT
NODE(NODENIL);
COUNT
NODE(NODETRUE);
COUNT
NODE(NODEFALSE);
+ COUNT
NODE(NODEFILE);
COUNT
NODE(NODEERRINFO);
COUNT
NODE(NODEDEFINED);
COUNT
NODE(NODEPOSTEXE);
diff --git a/node.h b/node.h
index 0fa7254..3f4345e 100644
--- a/node.h
+++ b/node.h
@@ -232,6 +232,8 @@ enum node
type {
#define NODEPRELUDE NODEPRELUDE
NODELAMBDA,
#define NODE
LAMBDA NODELAMBDA
+ NODE
FILE,
+#define NODEFILE NODEFILE
NODELAST
#define NODE
LAST NODELAST
};
@@ -450,6 +452,7 @@ typedef struct RNode {
#define NEW
NIL() NEWNODE(NODENIL,0,0,0)
#define NEWTRUE() NEWNODE(NODETRUE,0,0,0)
#define NEW
FALSE() NEWNODE(NODEFALSE,0,0,0)
+#define NEWFILE() NEWNODE(NODEFILE,0,0,0)
#define NEW
ERRINFO() NEWNODE(NODEERRINFO,0,0,0)
#define NEWDEFINED(e) NEWNODE(NODEDEFINED,e,0,0)
#define NEW
PREEXE(b) NEWSCOPE(b)
diff --git a/parse.y b/parse.y
index 0f0fbe8..a177ca0 100644
--- a/parse.y
+++ b/parse.y
@@ -8429,8 +8429,7 @@ gettable
gen(struct parserparams *parser, ID id)
case keyword
false:
return NEWFALSE();
case keyword
FILE:
- return NEW
STR(rbexternalstrnewwithenc(rubysourcefile, strlen(rubysourcefile),
- rb
filesystemencoding()));
+ return NEW
FILE();
case keywordLINE:
return NEWLIT(INT2FIX(tokline));
case keyword
ENCODING:
diff --git a/test/ruby/test
literal.rb b/test/ruby/testliteral.rb
index e4c35e0..1dbcf8e 100644
--- a/test/ruby/test
literal.rb
+++ b/test/ruby/testliteral.rb
@@ -362,6 +362,7 @@ class TestRubyLiteral < Test::Unit::TestCase
def test
FILE_
assertinstanceof String, FILE
assertequal _FILE, __FILE
+ assertequal _FILE.objectid, _FILE.objectid
assert
equal 'testliteral.rb', File.basename(FILE_)
end

diff --git a/test/rubygems/testgem.rb b/test/rubygems/testgem.rb
index b6c7465..1b2ee37 100644
--- a/test/rubygems/testgem.rb
+++ b/test/rubygems/test
gem.rb
@@ -1489,7 +1489,7 @@ class TestGem < Gem::TestCase
assertequal [a,b,c], Gem.detectgemdeps
end

  • LIBPATH = File.expandpath "../../../lib".untaint, FILE.untaint
  • LIBPATH = File.expandpath "../../../lib".untaint, FILE.dup.untaint

    def testlooksforgemdepsfilesautomaticallyonstart
    util
    cleargems
    diff --git a/vm
    eval.c b/vmeval.c
    index 7c18f70..bc54646 100644
    --- a/vm
    eval.c
    +++ b/vmeval.c
    @@ -19,6 +19,8 @@ static VALUE vm
    exec(rbthreadt *th);
    static void vmsetevalstack(rbthreadt * th, VALUE iseqval, const NODE *cref, rbblockt *baseblock);
    static int vmcollectlocalvariablesinheap(rbthread_t *th, VALUE *dfp, VALUE ary);

+static VALUE evalfile;
+
/* vm
backtrace.c */
VALUE rbvmbacktracestrary(rbthreadt *th, int lev, int n);

@@ -1156,7 +1158,7 @@ rb_each(VALUE obj)
}

static VALUE
-evalstringwithcref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char *volatile file, volatile int line)
+eval
stringwithcref(VALUE self, VALUE src, VALUE scope, NODE *cref, volatile VALUE file, volatile int line)
{
int state;
VALUE result = Qundef;
@@ -1168,8 +1170,8 @@ evalstringwithcref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
volatile int parse
ineval;
volatile int mild
compile_error;

  • if (file == 0) {
  • file = rb_sourcefile();
  • if (!RTEST(file)) {
  • file = rbsourcefilename(); line = rbsourceline(); }

@@ -1184,8 +1186,8 @@ evalstringwithcref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
if (rb
objiskindof(scope, rbcBinding)) {
GetBindingPtr(scope, bind);
envval = bind->env;
- if (strcmp(file, "(eval)") == 0 && bind->path != Qnil) {
- file = RSTRINGPTR(bind->path);
+ if (rb
strcmp(file, evalfile) == 0 && bind->path != Qnil) {
+ file = bind->path;
line = bind->firstlineno;
}
}
@@ -1214,7 +1216,7 @@ eval
stringwithcref(VALUE self, VALUE src, VALUE scope, NODE cref, const char
/
make eval iseq */
th->parseineval++;
th->mildcompileerror++;
- iseqval = rbiseqcompileonbase(src, rbstrnew2(file), INT2FIX(line), baseblock);
+ iseqval = rb
iseqcompileonbase(src, file, INT2FIX(line), baseblock);
th->mildcompileerror--;
th->parseineval--;

@@ -1242,7 +1244,7 @@ evalstringwithcref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
if (state) {
if (state == TAG
RAISE) {
VALUE errinfo = th->errinfo;
- if (strcmp(file, "(eval)") == 0) {
+ if (rbstrcmp(file, evalfile) == 0) {
VALUE mesg, errat, bt2;
ID id
mesg;

@@ -1272,7 +1274,7 @@ evalstringwith_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
}

static VALUE
-evalstring(VALUE self, VALUE src, VALUE scope, const char *file, int line)
+eval
string(VALUE self, VALUE src, VALUE scope, VALUE file, int line)
{
return evalstringwithcref(self, src, scope, 0, file, line);
}
@@ -1299,7 +1301,7 @@ VALUE
rb
feval(int argc, VALUE *argv, VALUE self)
{
VALUE src, scope, vfile, vline;
- const char *file = "(eval)";
+ VALUE file = eval
file;
int line = 1;

 rb_scan_args(argc, argv, "13", &src, &scope, &vfile, &vline);

@@ -1321,7 +1323,7 @@ rbfeval(int argc, VALUE *argv, VALUE self)
}

 if (!NIL_P(vfile))
  • file = RSTRING_PTR(vfile);
  • file = vfile; return eval_string(self, src, scope, file, line); }

@@ -1329,7 +1331,7 @@ rbfeval(int argc, VALUE *argv, VALUE self)
VALUE
rubyevalstringfromfile(const char *str, const char *filename)
{
- return evalstring(rbvmtopself(), rbstrnew2(str), Qnil, filename, 1);
+ return evalstring(rbvmtopself(), rbstrnew2(str), Qnil, rbstrnew2(filename), 1);
}

struct evalstringfromfilearg {
@@ -1341,7 +1343,7 @@ static VALUE
evalstringfromfilehelper(void data)
{
const struct evalstringfromfilearg *const arg = (struct evalstringfromfilearg
)data;
- return evalstring(rbvmtopself(), rbstrnew2(arg->str), Qnil, arg->filename, 1);
+ return evalstring(rbvmtopself(), rbstrnew2(arg->str), Qnil, rbstrnew2(arg->filename), 1);
}

VALUE
@@ -1368,7 +1370,7 @@ rubyevalstringfromfileprotect(const char *str, const char *filename, int *s
VALUE
rb
evalstring(const char *str)
{
- return ruby
evalstringfromfile(str, "eval");
+ return eval
string(rbvmtopself(), rbstrnew2(str), Qnil, evalfile, 1);
}

/**
@@ -1509,7 +1511,7 @@ rbyieldrefine_block(VALUE refinement, VALUE refinements)

/* string eval under the class/module context */
static VALUE
-evalunder(VALUE under, VALUE self, VALUE src, const char *file, int line)
+eval
under(VALUE under, VALUE self, VALUE src, VALUE file, int line)
{
NODE *cref = vmcrefpush(GETTHREAD(), under, NOEXPUBLIC, NULL);

@@ -1534,7 +1536,7 @@ specificeval(int argc, VALUE *argv, VALUE klass, VALUE self)
return yield
under(klass, self, Qundef);
}
else {
- const char *file = "(eval)";
+ VALUE file = eval_file;
int line = 1;

rb_check_arity(argc, 1, 3);

@@ -1547,7 +1549,7 @@ specificeval(int argc, VALUE *argv, VALUE klass, VALUE self)
if (argc > 2)
line = NUM2INT(argv[2]);
if (argc > 1) {
- file = StringValuePtr(argv[1]);
+ file = StringValue(argv[1]);
}
return eval
under(klass, self, argv[0], file, line);
}
@@ -1928,6 +1930,9 @@ rbcurrentrealfilepath(void)
void
Initvmeval(void)
{
+ evalfile = rbstrnew2("(eval)");
+ rb
gcregisteraddress(&evalfile);
+
rb
defineglobalfunction("eval", rbfeval, -1);
rbdefineglobalfunction("localvariables", rbflocalvariables, 0);
rb
defineglobalfunction("iterator?", rbfblockgivenp, 0);


Related issues

Related to ruby-trunk - Bug #9159: [patch] use rb_fstring for internal strings Closed 11/26/2013

History

#1 Updated by Aman Gupta about 1 year ago

  • Assignee set to Aman Gupta

ko1-san, do you have any opinion on this patch? Is there a simpler solution instead of adding NODE_FILE?

I converted all eval functions inside the VM to use VALUE filename instead of char *filename, so it is easier to re-use existing RString.

  • LIBPATH = File.expandpath "../../../lib".untaint, FILE.untaint
  • LIBPATH = File.expandpath "../../../lib".untaint, FILE.dup.untaint

I had to add dup because FILE is frozen now.

-evalstringwithcref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char *volatile file, volatile int line)
+eval
stringwithcref(VALUE self, VALUE src, VALUE scope, NODE *cref, volatile VALUE file, volatile int line)

I'm not sure why it was using const char* volatile before. Is volatile VALUE necessary here?

   case keyword__FILE__:
  • return NEWSTR(rbexternalstrnewwithenc(rubysourcefile, strlen(rubysourcefile),
  • rbfilesystemencoding()));
  • return NEW_FILE();

  •  case NODE_FILE:{
    
  • if (!poped) {

  • ADD_INSN1(ret, line, putobject, iseq->location.path);
    

Will iseq->location.path during evaluation always be equal to ruby_sourcefile from parse phase?

#2 Updated by Charles Nutter about 1 year ago

I'd like to work with you to find more places we could be sharing frozen strings. A change over the summer made defined? results always be shared frozen strings, and apparently that was a source of lots and lots of extra string creation in Rails (according to wycats). There's many other such opportunities.

If course if FILE were always returning shared, frozen Strings, it would be far cheaper; one String ever for a given filename.

#3 Updated by Eric Wong about 1 year ago

"headius (Charles Nutter)" headius@headius.com wrote:

I'd like to work with you to find more places we could be sharing
frozen strings. A change over the summer made defined? results always
be shared frozen strings, and apparently that was a source of lots and
lots of extra string creation in Rails (according to wycats). There's
many other such opportunities.

I've considered (but never got around to implementing/testing) caching
for rbstrdup_frozen. This might help with hashes which use common
strings as keys (e.g. CGI params and HTTP headers).

Perhaps one of you can experiment with this before I can get around
to it..

#4 Updated by Aman Gupta about 1 year ago

On Wed, Apr 17, 2013 at 11:49 AM, Eric Wong normalperson@yhbt.net wrote:

"headius (Charles Nutter)" headius@headius.com wrote:

I'd like to work with you to find more places we could be sharing
frozen strings. A change over the summer made defined? results always
be shared frozen strings, and apparently that was a source of lots and
lots of extra string creation in Rails (according to wycats). There's
many other such opportunities.

I've considered (but never got around to implementing/testing) caching
for rbstrdup_frozen. This might help with hashes which use common
strings as keys (e.g. CGI params and HTTP headers).

Perhaps one of you can experiment with this before I can get around
to it..

I actually started on this a few weeks ago. I added a frozen string
cache and was able to remove a large number of duplicated strings from
the Ruby heap.

I'll clean up the patch and open a redmine issue for review.

Aman

#5 Updated by Aman Gupta about 1 year ago

  • Status changed from Open to Rejected

Is there a simpler solution instead of adding NODE_FILE?

To fix this simple case, NODE_FILE is unnecessary:

def testFILEshared
assertequal _FILE.objectid, _FILE.object_id
end

Here we can just ensure parse.y generates a NODELIT re-using an existing string (instead of a NODESTR with a new string every time). All instances of FILE encountered in one parse invocation would re-use same string.

But things get tricky when you want to share path name strings across parse invocations, i.e. for dynamically generated code. You need to emit a NODE_FILE to make this work:

def testFILEshared
file = "filename"
obj = Object.new
class << obj
classeval <<-RUBY, file, 0
def filename
_
FILE__
end
RUBY
end
assertequal file.objectid, obj.filename.object_id
end

Since all the parse.y APIs use char* we have to use RSTRINGPTR, and then end up creating a new RString inside parse.y. The NODEFILE avoids this by acting as a temporary placeholder that can be swapped out for the original RString in compile.c.

Also available in: Atom PDF