Bug #8148

[patch] reduce allocations due to __FILE__ and {class,module}_eval

Added by Aman Gupta over 2 years ago. Updated about 2 years ago.

[ruby-core:53635]
Status:Rejected
Priority:Normal
Assignee:Aman Gupta
ruby -v:ruby 2.1.0dev (2013-03-22 trunk 39875) [x86_64-darwin12.2.1] Backport:

Description

In my application, I noticed many long lived "(eval)" strings on the heap.

I also noticed many copies of filename strings when using module_eval(.., FILE, ..). Attached patch adds a new NODE_FILE to re-use the original path string.

./ruby -Ilib -rpp -rforwardable -e '

module Test

extend Forwardable
def_delegators :@test, *("a".."z")
end

GC.start
strings = ObjectSpace.each_object(String).select{ |s| s.frozen? && s =~ /forwardable/ }
p strings.size
pp strings.inject(Hash.new 0){ |h,s| h[s]+=1; h }
'

before:

33
{"/Users/test/code/ruby-trunk/lib/forwardable.rb"=>31,
 "forwardable"=>1,
 "/Users/test/code/ruby\\-trunk/lib/forwardable\\.rb"=>1}

after:

4
{"/Users/test/code/ruby-trunk/lib/forwardable.rb"=>2,
 "forwardable"=>1,
 "/Users/test/code/ruby\\-trunk/lib/forwardable\\.rb"=>1}

diff --git a/compile.c b/compile.c
index 9360f5b..54733d9 100644
--- a/compile.c
+++ b/compile.c
@@ -5149,6 +5149,12 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
}
break;
}
+ case NODE_FILE:{
+ if (!poped) {
+ ADD_INSN1(ret, line, putobject, iseq->location.path);
+ }
+ break;
+ }
case NODE_ERRINFO:{
if (!poped) {
if (iseq->type == ISEQ_TYPE_RESCUE) {
diff --git a/ext/objspace/objspace.c b/ext/objspace/objspace.c
index 720c918..2f32208 100644
--- a/ext/objspace/objspace.c
+++ b/ext/objspace/objspace.c
@@ -530,6 +530,7 @@ count_nodes(int argc, VALUE *argv, VALUE os)
COUNT_NODE(NODE_NIL);
COUNT_NODE(NODE_TRUE);
COUNT_NODE(NODE_FALSE);
+ COUNT_NODE(NODE_FILE);
COUNT_NODE(NODE_ERRINFO);
COUNT_NODE(NODE_DEFINED);
COUNT_NODE(NODE_POSTEXE);
diff --git a/node.h b/node.h
index 0fa7254..3f4345e 100644
--- a/node.h
+++ b/node.h
@@ -232,6 +232,8 @@ enum node_type {
#define NODE_PRELUDE NODE_PRELUDE
NODE_LAMBDA,
#define NODE_LAMBDA NODE_LAMBDA
+ NODE_FILE,
+#define NODE_FILE NODE_FILE
NODE_LAST
#define NODE_LAST NODE_LAST
};
@@ -450,6 +452,7 @@ typedef struct RNode {
#define NEW_NIL() NEW_NODE(NODE_NIL,0,0,0)
#define NEW_TRUE() NEW_NODE(NODE_TRUE,0,0,0)
#define NEW_FALSE() NEW_NODE(NODE_FALSE,0,0,0)
+#define NEW_FILE() NEW_NODE(NODE_FILE,0,0,0)
#define NEW_ERRINFO() NEW_NODE(NODE_ERRINFO,0,0,0)
#define NEW_DEFINED(e) NEW_NODE(NODE_DEFINED,e,0,0)
#define NEW_PREEXE(b) NEW_SCOPE(b)
diff --git a/parse.y b/parse.y
index 0f0fbe8..a177ca0 100644
--- a/parse.y
+++ b/parse.y
@@ -8429,8 +8429,7 @@ gettable_gen(struct parser_params *parser, ID id)
case keyword_false:
return NEW_FALSE();
case keyword_FILE:
- return NEW_STR(rb_external_str_new_with_enc(ruby_sourcefile, strlen(ruby_sourcefile),
- rb_filesystem_encoding()));
+ return NEW_FILE();
case keyword
LINE:
return NEW_LIT(INT2FIX(tokline));
case keyword
ENCODING:
diff --git a/test/ruby/test_literal.rb b/test/ruby/test_literal.rb
index e4c35e0..1dbcf8e 100644
--- a/test/ruby/test_literal.rb
+++ b/test/ruby/test_literal.rb
@@ -362,6 +362,7 @@ class TestRubyLiteral < Test::Unit::TestCase
def test
FILE_
assert_instance_of String, FILE
assert_equal FILE, FILE
+ assert_equal FILE.object_id, FILE.object_id
assert_equal 'test_literal.rb', File.basename(FILE)
end

diff --git a/test/rubygems/test_gem.rb b/test/rubygems/test_gem.rb
index b6c7465..1b2ee37 100644
--- a/test/rubygems/test_gem.rb
+++ b/test/rubygems/test_gem.rb
@@ -1489,7 +1489,7 @@ class TestGem < Gem::TestCase
assert_equal [a,b,c], Gem.detect_gemdeps
end

  • LIB_PATH = File.expand_path "../../../lib".untaint, FILE.untaint
  • LIB_PATH = File.expand_path "../../../lib".untaint, FILE.dup.untaint

def test_looks_for_gemdeps_files_automatically_on_start
util_clear_gems
diff --git a/vm_eval.c b/vm_eval.c
index 7c18f70..bc54646 100644
--- a/vm_eval.c
+++ b/vm_eval.c
@@ -19,6 +19,8 @@ static VALUE vm_exec(rb_thread_t *th);
static void vm_set_eval_stack(rb_thread_t * th, VALUE iseqval, const NODE *cref, rb_block_t *base_block);
static int vm_collect_local_variables_in_heap(rb_thread_t *th, VALUE *dfp, VALUE ary);

+static VALUE eval_file;
+
/* vm_backtrace.c */
VALUE rb_vm_backtrace_str_ary(rb_thread_t *th, int lev, int n);

@@ -1156,7 +1158,7 @@ rb_each(VALUE obj)
}

static VALUE
-eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char *volatile file, volatile int line)
+eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, volatile VALUE file, volatile int line)
{
int state;
VALUE result = Qundef;
@@ -1168,8 +1170,8 @@ eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
volatile int parse_in_eval;
volatile int mild_compile_error;

  • if (file == 0) {
  • file = rb_sourcefile();
  • if (!RTEST(file)) {
  • file = rb_sourcefilename(); line = rb_sourceline(); }

@@ -1184,8 +1186,8 @@ eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE cref, const char
if (rb_obj_is_kind_of(scope, rb_cBinding)) {
GetBindingPtr(scope, bind);
envval = bind->env;
- if (strcmp(file, "(eval)") == 0 && bind->path != Qnil) {
- file = RSTRING_PTR(bind->path);
+ if (rb_str_cmp(file, eval_file) == 0 && bind->path != Qnil) {
+ file = bind->path;
line = bind->first_lineno;
}
}
@@ -1214,7 +1216,7 @@ eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
/
make eval iseq */
th->parse_in_eval++;
th->mild_compile_error++;
- iseqval = rb_iseq_compile_on_base(src, rb_str_new2(file), INT2FIX(line), base_block);
+ iseqval = rb_iseq_compile_on_base(src, file, INT2FIX(line), base_block);
th->mild_compile_error--;
th->parse_in_eval--;

@@ -1242,7 +1244,7 @@ eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
if (state) {
if (state == TAG_RAISE) {
VALUE errinfo = th->errinfo;
- if (strcmp(file, "(eval)") == 0) {
+ if (rb_str_cmp(file, eval_file) == 0) {
VALUE mesg, errat, bt2;
ID id_mesg;

@@ -1272,7 +1274,7 @@ eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
}

static VALUE
-eval_string(VALUE self, VALUE src, VALUE scope, const char *file, int line)
+eval_string(VALUE self, VALUE src, VALUE scope, VALUE file, int line)
{
return eval_string_with_cref(self, src, scope, 0, file, line);
}
@@ -1299,7 +1301,7 @@ VALUE
rb_f_eval(int argc, VALUE *argv, VALUE self)
{
VALUE src, scope, vfile, vline;
- const char *file = "(eval)";
+ VALUE file = eval_file;
int line = 1;

 rb_scan_args(argc, argv, "13", &src, &scope, &vfile, &vline);

@@ -1321,7 +1323,7 @@ rb_f_eval(int argc, VALUE *argv, VALUE self)
}

 if (!NIL_P(vfile))
  • file = RSTRING_PTR(vfile);
  • file = vfile; return eval_string(self, src, scope, file, line); }

@@ -1329,7 +1331,7 @@ rb_f_eval(int argc, VALUE *argv, VALUE self)
VALUE
ruby_eval_string_from_file(const char *str, const char *filename)
{
- return eval_string(rb_vm_top_self(), rb_str_new2(str), Qnil, filename, 1);
+ return eval_string(rb_vm_top_self(), rb_str_new2(str), Qnil, rb_str_new2(filename), 1);
}

struct eval_string_from_file_arg {
@@ -1341,7 +1343,7 @@ static VALUE
eval_string_from_file_helper(void data)
{
const struct eval_string_from_file_arg *const arg = (struct eval_string_from_file_arg
)data;
- return eval_string(rb_vm_top_self(), rb_str_new2(arg->str), Qnil, arg->filename, 1);
+ return eval_string(rb_vm_top_self(), rb_str_new2(arg->str), Qnil, rb_str_new2(arg->filename), 1);
}

VALUE
@@ -1368,7 +1370,7 @@ ruby_eval_string_from_file_protect(const char *str, const char *filename, int *s
VALUE
rb_eval_string(const char *str)
{
- return ruby_eval_string_from_file(str, "eval");
+ return eval_string(rb_vm_top_self(), rb_str_new2(str), Qnil, eval_file, 1);
}

/**
@@ -1509,7 +1511,7 @@ rb_yield_refine_block(VALUE refinement, VALUE refinements)

/* string eval under the class/module context */
static VALUE
-eval_under(VALUE under, VALUE self, VALUE src, const char *file, int line)
+eval_under(VALUE under, VALUE self, VALUE src, VALUE file, int line)
{
NODE *cref = vm_cref_push(GET_THREAD(), under, NOEX_PUBLIC, NULL);

@@ -1534,7 +1536,7 @@ specific_eval(int argc, VALUE *argv, VALUE klass, VALUE self)
return yield_under(klass, self, Qundef);
}
else {
- const char *file = "(eval)";
+ VALUE file = eval_file;
int line = 1;

rb_check_arity(argc, 1, 3);

@@ -1547,7 +1549,7 @@ specific_eval(int argc, VALUE *argv, VALUE klass, VALUE self)
if (argc > 2)
line = NUM2INT(argv[2]);
if (argc > 1) {
- file = StringValuePtr(argv[1]);
+ file = StringValue(argv[1]);
}
return eval_under(klass, self, argv[0], file, line);
}
@@ -1928,6 +1930,9 @@ rb_current_realfilepath(void)
void
Init_vm_eval(void)
{
+ eval_file = rb_str_new2("(eval)");
+ rb_gc_register_address(&eval_file);
+
rb_define_global_function("eval", rb_f_eval, -1);
rb_define_global_function("local_variables", rb_f_local_variables, 0);
rb_define_global_function("iterator?", rb_f_block_given_p, 0);


Related issues

Related to Ruby trunk - Bug #9159: [patch] use rb_fstring for internal strings Closed 11/26/2013

History

#1 Updated by Aman Gupta about 2 years ago

  • Assignee set to Aman Gupta

ko1-san, do you have any opinion on this patch? Is there a simpler solution instead of adding NODE_FILE?

I converted all eval functions inside the VM to use VALUE filename instead of char *filename, so it is easier to re-use existing RString.

  • LIB_PATH = File.expand_path "../../../lib".untaint, FILE.untaint
  • LIB_PATH = File.expand_path "../../../lib".untaint, FILE.dup.untaint

I had to add dup because FILE is frozen now.

-eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char *volatile file, volatile int line)
+eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, volatile VALUE file, volatile int line)

I'm not sure why it was using const char* volatile before. Is volatile VALUE necessary here?

   case keyword__FILE__:
  • return NEW_STR(rb_external_str_new_with_enc(ruby_sourcefile, strlen(ruby_sourcefile),
  • rb_filesystem_encoding()));
  • return NEW_FILE();

  •  case NODE_FILE:{
    
  • if (!poped) {

  • ADD_INSN1(ret, line, putobject, iseq->location.path);
    

Will iseq->location.path during evaluation always be equal to ruby_sourcefile from parse phase?

#2 Updated by Charles Nutter about 2 years ago

I'd like to work with you to find more places we could be sharing frozen strings. A change over the summer made defined? results always be shared frozen strings, and apparently that was a source of lots and lots of extra string creation in Rails (according to wycats). There's many other such opportunities.

If course if FILE were always returning shared, frozen Strings, it would be far cheaper; one String ever for a given filename.

#3 Updated by Eric Wong about 2 years ago

"headius (Charles Nutter)" headius@headius.com wrote:

I'd like to work with you to find more places we could be sharing
frozen strings. A change over the summer made defined? results always
be shared frozen strings, and apparently that was a source of lots and
lots of extra string creation in Rails (according to wycats). There's
many other such opportunities.

I've considered (but never got around to implementing/testing) caching
for rb_str_dup_frozen. This might help with hashes which use common
strings as keys (e.g. CGI params and HTTP headers).

Perhaps one of you can experiment with this before I can get around
to it..

#4 Updated by Aman Gupta about 2 years ago

On Wed, Apr 17, 2013 at 11:49 AM, Eric Wong normalperson@yhbt.net wrote:

"headius (Charles Nutter)" headius@headius.com wrote:

I'd like to work with you to find more places we could be sharing
frozen strings. A change over the summer made defined? results always
be shared frozen strings, and apparently that was a source of lots and
lots of extra string creation in Rails (according to wycats). There's
many other such opportunities.

I've considered (but never got around to implementing/testing) caching
for rb_str_dup_frozen. This might help with hashes which use common
strings as keys (e.g. CGI params and HTTP headers).

Perhaps one of you can experiment with this before I can get around
to it..

I actually started on this a few weeks ago. I added a frozen string
cache and was able to remove a large number of duplicated strings from
the Ruby heap.

I'll clean up the patch and open a redmine issue for review.

Aman

#5 Updated by Aman Gupta about 2 years ago

  • Status changed from Open to Rejected

Is there a simpler solution instead of adding NODE_FILE?

To fix this simple case, NODE_FILE is unnecessary:

def test_FILEshared
assert_equal __FILE
.object_id, __FILE_.object_id
end

Here we can just ensure parse.y generates a NODE_LIT re-using an existing string (instead of a NODE_STR with a new string every time). All instances of FILE encountered in one parse invocation would re-use same string.

But things get tricky when you want to share path name strings across parse invocations, i.e. for dynamically generated code. You need to emit a NODE_FILE to make this work:

def test_FILEshared
file = "filename"
obj = Object.new
class << obj
class_eval <<-RUBY, file, 0
def filename
_
FILE__
end
RUBY
end
assert_equal file.object_id, obj.filename.object_id
end

Since all the parse.y APIs use char* we have to use RSTRING_PTR, and then end up creating a new RString inside parse.y. The NODE_FILE avoids this by acting as a temporary placeholder that can be swapped out for the original RString in compile.c.

Also available in: Atom PDF