Return-Path: <e@80x24.org>
X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net
X-Spam-Level: 
X-Spam-ASN: AS43350 77.247.176.0/21
X-Spam-Status: No, score=-2.1 required=3.0 tests=AWL,BAYES_00,
 RCVD_IN_DNSWL_BLOCKED,RCVD_IN_XBL shortcircuit=no autolearn=no version=3.3.2
X-Original-To: spew@80x24.org
Received: from 80x24.org (chomsky.torservers.net [77.247.181.162]) by
 dcvr.yhbt.net (Postfix) with ESMTP id 78E691F7B4 for <spew@80x24.org>; Sat, 
 6 Dec 2014 00:23:30 +0000 (UTC)
From: Eric Wong <e@80x24.org>
To: spew@80x24.org
Subject: [RFC] struct: avoid all O(n) behavior on access
Date: Sat,  6 Dec 2014 00:23:24 +0000
Message-Id: <struct_iseq-v1@r48725>
X-Mailer: git-send-email 2.2.0
List-Id: <spew@80x24.org>

This avoids O(n) on lookups with structs over 10 members.
This also avoids O(n) behavior on all assignments on Struct members.
Members 0..9 still use existing C methods to read in O(1) time

Benchmark results:

vm2_struct_big_aref_hi*	1.305
vm2_struct_big_aref_lo*	1.157
vm2_struct_big_aset*	3.306
vm2_struct_small_aref*	1.015
vm2_struct_small_aset*	3.273

Note: I chose use loading instructions from an array instead of writing
directly to linked-lists in compile.c for ease-of-maintainability.  We
may move the method definitions to prelude.rb-like files in the future.

I have also tested this patch with the following patch to disable
the C ref_func methods and ensured the test suite and rubyspec works

	--- a/struct.c
	+++ b/struct.c
	@@ -209,7 +209,7 @@ setup_struct(VALUE nstr, VALUE members)
		ID id = SYM2ID(ptr_members[i]);
		VALUE off = LONG2NUM(i);
	 
	-	if (i < N_REF_FUNC) {
	+	if (0 && i < N_REF_FUNC) {
		    rb_define_method_id(nstr, id, ref_func[i], 0);
		}
		else {
---
 benchmark/bm_vm2_struct_big_aref_hi.rb |   7 +++
 benchmark/bm_vm2_struct_big_aref_lo.rb |   7 +++
 benchmark/bm_vm2_struct_big_aset.rb    |   7 +++
 benchmark/bm_vm2_struct_small_aref.rb  |   7 +++
 benchmark/bm_vm2_struct_small_aset.rb  |   7 +++
 iseq.c                                 | 101 +++++++++++++++++++++++++++++++++
 struct.c                               |  60 +++++++++-----------
 test/ruby/test_struct.rb               |   9 +++
 8 files changed, 172 insertions(+), 33 deletions(-)
 create mode 100644 benchmark/bm_vm2_struct_big_aref_hi.rb
 create mode 100644 benchmark/bm_vm2_struct_big_aref_lo.rb
 create mode 100644 benchmark/bm_vm2_struct_big_aset.rb
 create mode 100644 benchmark/bm_vm2_struct_small_aref.rb
 create mode 100644 benchmark/bm_vm2_struct_small_aset.rb

diff --git a/benchmark/bm_vm2_struct_big_aref_hi.rb b/benchmark/bm_vm2_struct_big_aref_hi.rb
new file mode 100644
index 0000000..22cb26b
--- /dev/null
+++ b/benchmark/bm_vm2_struct_big_aref_hi.rb
@@ -0,0 +1,7 @@
+s = Struct.new(*('a'..'z').map { |x| x.to_sym })
+x = s.new
+i = 0
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  x.z # x[25]
+end
diff --git a/benchmark/bm_vm2_struct_big_aref_lo.rb b/benchmark/bm_vm2_struct_big_aref_lo.rb
new file mode 100644
index 0000000..5e61a70
--- /dev/null
+++ b/benchmark/bm_vm2_struct_big_aref_lo.rb
@@ -0,0 +1,7 @@
+s = Struct.new(*('a'..'z').map { |x| x.to_sym })
+x = s.new
+i = 0
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  x.k # x[10]
+end
diff --git a/benchmark/bm_vm2_struct_big_aset.rb b/benchmark/bm_vm2_struct_big_aset.rb
new file mode 100644
index 0000000..5a1c3d1
--- /dev/null
+++ b/benchmark/bm_vm2_struct_big_aset.rb
@@ -0,0 +1,7 @@
+s = Struct.new(*('a'..'z').map { |x| x.to_sym })
+x = s.new
+i = 0
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  x.k = i # x[10] = i
+end
diff --git a/benchmark/bm_vm2_struct_small_aref.rb b/benchmark/bm_vm2_struct_small_aref.rb
new file mode 100644
index 0000000..8eaa555
--- /dev/null
+++ b/benchmark/bm_vm2_struct_small_aref.rb
@@ -0,0 +1,7 @@
+s = Struct.new(:a, :b, :c)
+x = s.new
+i = 0
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  x.a
+end
diff --git a/benchmark/bm_vm2_struct_small_aset.rb b/benchmark/bm_vm2_struct_small_aset.rb
new file mode 100644
index 0000000..ecd0f95
--- /dev/null
+++ b/benchmark/bm_vm2_struct_small_aset.rb
@@ -0,0 +1,7 @@
+s = Struct.new(:a, :b, :c)
+x = s.new
+i = 0
+while i<6_000_000 # benchmark loop 2
+  i += 1
+  x.a = i
+end
diff --git a/iseq.c b/iseq.c
index 8f84f88..15d1de6 100644
--- a/iseq.c
+++ b/iseq.c
@@ -548,6 +548,107 @@ iseq_load(VALUE self, VALUE data, VALUE parent, VALUE opt)
     return iseqval;
 }
 
+rb_iseq_t *
+rb_method_for_self_aref(VALUE name, VALUE arg)
+{
+    VALUE iseqval = iseq_alloc(rb_cISeq);
+    rb_iseq_t *iseq;
+    VALUE path = rb_str_new2("<compiled>");
+    VALUE lineno = INT2FIX(1);
+    VALUE parent = 0;
+    VALUE misc, locals, params, exception, body, send_arg;
+    int flag = VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE;
+
+    GetISeqPtr(iseqval, iseq);
+    iseq->self = iseqval;
+    iseq->local_iseq = iseq;
+
+    prepare_iseq_build(iseq, rb_sym2str(name), path, path, lineno, parent,
+		       ISEQ_TYPE_METHOD, &COMPILE_OPTION_DEFAULT);
+
+    misc = params = rb_hash_new(); /* empty */
+    locals = exception = rb_ary_new(); /* empty */
+    body = rb_ary_new();
+
+#define S(s) ID2SYM(rb_intern(#s))
+    /* def name; self[arg]; end */
+    rb_ary_push(body, lineno);
+    rb_ary_push(body, rb_ary_new3(1, S(putself)));
+    rb_ary_push(body, rb_ary_new3(2, S(putobject), arg));
+
+    /* {:mid=>:[], :flag=>264, :blockptr=>nil, :orig_argc=>1} */
+    send_arg = rb_hash_new();
+    rb_hash_aset(send_arg, S(mid), ID2SYM(idAREF));
+    rb_hash_aset(send_arg, S(flag), INT2FIX(flag));
+    rb_hash_aset(send_arg, S(blockptr), Qnil);
+    rb_hash_aset(send_arg, S(orig_argc), INT2FIX(1));
+
+    /* we do not want opt_aref for struct */
+    rb_ary_push(body, rb_ary_new3(2, S(opt_send_without_block), send_arg));
+    rb_ary_push(body, rb_ary_new3(1, S(leave)));
+#undef S
+
+    rb_iseq_build_from_ary(iseq, misc, locals, params, exception, body);
+    cleanup_iseq_build(iseq);
+
+    return iseq;
+}
+
+rb_iseq_t *
+rb_method_for_self_aset(VALUE name, VALUE arg)
+{
+    VALUE iseqval = iseq_alloc(rb_cISeq);
+    rb_iseq_t *iseq;
+    VALUE path = rb_str_new2("<compiled>");
+    VALUE lineno = INT2FIX(1);
+    VALUE parent = 0;
+    VALUE misc, locals, params, exception, body, send_arg;
+    int flag = VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE;
+
+    GetISeqPtr(iseqval, iseq);
+    iseq->self = iseqval;
+    iseq->local_iseq = iseq;
+
+    prepare_iseq_build(iseq, rb_sym2str(name), path, path, lineno, parent,
+		       ISEQ_TYPE_METHOD, &COMPILE_OPTION_DEFAULT);
+
+    /* def name=(val); self[arg] = val; end */
+#define S(s) ID2SYM(rb_intern(#s))
+    misc = rb_hash_new(); /* empty */
+    locals = rb_ary_new3(1, S(val));
+    params = rb_hash_new();
+    exception = rb_ary_new(); /* empty */
+    body = rb_ary_new();
+
+    rb_hash_aset(params, S(lead_num), INT2FIX(1));
+
+    rb_ary_push(body, lineno);
+    rb_ary_push(body, rb_ary_new3(1, S(putnil)));
+    rb_ary_push(body, rb_ary_new3(1, S(putself)));
+    rb_ary_push(body, rb_ary_new3(2, S(putobject), arg));
+    rb_ary_push(body, rb_ary_new3(3, S(getlocal), INT2FIX(2), INT2FIX(0)));
+    rb_ary_push(body, rb_ary_new3(2, S(setn), INT2FIX(3)));
+
+    /* {:mid=>:[]=, :flag=>264, :blockptr=>nil, :orig_argc=>2} */
+    send_arg = rb_hash_new();
+    rb_hash_aset(send_arg, S(mid), ID2SYM(idASET));
+    rb_hash_aset(send_arg, S(flag), INT2FIX(flag));
+    rb_hash_aset(send_arg, S(blockptr), Qnil);
+    rb_hash_aset(send_arg, S(orig_argc), INT2FIX(2));
+
+    /* we do not want opt_aset for struct */
+    rb_ary_push(body, rb_ary_new3(2, S(opt_send_without_block), send_arg));
+
+    rb_ary_push(body, rb_ary_new3(1, S(pop)));
+    rb_ary_push(body, rb_ary_new3(1, S(leave)));
+#undef S
+
+    rb_iseq_build_from_ary(iseq, misc, locals, params, exception, body);
+    cleanup_iseq_build(iseq);
+
+    return iseq;
+}
+
 /*
  * :nodoc:
  */
diff --git a/struct.c b/struct.c
index cbc4028..3a2ea47 100644
--- a/struct.c
+++ b/struct.c
@@ -10,6 +10,11 @@
 **********************************************************************/
 
 #include "internal.h"
+#include "vm_core.h"
+#include "method.h"
+
+rb_iseq_t *rb_method_for_self_aref(VALUE name, VALUE arg);
+rb_iseq_t *rb_method_for_self_aset(VALUE name, VALUE arg);
 
 VALUE rb_cStruct;
 static ID id_members;
@@ -105,12 +110,6 @@ rb_struct_getmember(VALUE obj, ID id)
     UNREACHABLE;
 }
 
-static VALUE
-rb_struct_ref(VALUE obj)
-{
-    return rb_struct_getmember(obj, rb_frame_this_func());
-}
-
 static VALUE rb_struct_ref0(VALUE obj) {return RSTRUCT_GET(obj, 0);}
 static VALUE rb_struct_ref1(VALUE obj) {return RSTRUCT_GET(obj, 1);}
 static VALUE rb_struct_ref2(VALUE obj) {return RSTRUCT_GET(obj, 2);}
@@ -145,31 +144,6 @@ rb_struct_modify(VALUE s)
 }
 
 static VALUE
-rb_struct_set(VALUE obj, VALUE val)
-{
-    VALUE members, slot, fsym;
-    long i, len;
-    ID fid = rb_frame_this_func();
-
-    members = rb_struct_members(obj);
-    len = RARRAY_LEN(members);
-    rb_struct_modify(obj);
-    fid = rb_id_attrget(fid);
-    if (!fid) not_a_member(rb_frame_this_func());
-    fsym = ID2SYM(fid);
-    for (i=0; i<len; i++) {
-	slot = RARRAY_AREF(members, i);
-	if (slot == fsym) {
-	    RSTRUCT_SET(obj, i, val);
-	    return val;
-	}
-    }
-    not_a_member(fid);
-
-    UNREACHABLE;
-}
-
-static VALUE
 anonymous_struct(VALUE klass)
 {
     VALUE nstr;
@@ -198,6 +172,24 @@ new_struct(VALUE name, VALUE super)
     return rb_define_class_id_under(super, id, super);
 }
 
+static void
+define_aref_method(VALUE nstr, VALUE name, VALUE off)
+{
+    rb_iseq_t *iseq = rb_method_for_self_aref(name, off);
+
+    rb_add_method(nstr, SYM2ID(name), VM_METHOD_TYPE_ISEQ, iseq, NOEX_PUBLIC);
+    RB_GC_GUARD(iseq->self);
+}
+
+static void
+define_aset_method(VALUE nstr, VALUE name, VALUE off)
+{
+    rb_iseq_t *iseq = rb_method_for_self_aset(name, off);
+
+    rb_add_method(nstr, SYM2ID(name), VM_METHOD_TYPE_ISEQ, iseq, NOEX_PUBLIC);
+    RB_GC_GUARD(iseq->self);
+}
+
 static VALUE
 setup_struct(VALUE nstr, VALUE members)
 {
@@ -215,13 +207,15 @@ setup_struct(VALUE nstr, VALUE members)
     len = RARRAY_LEN(members);
     for (i=0; i< len; i++) {
 	ID id = SYM2ID(ptr_members[i]);
+	VALUE off = LONG2NUM(i);
+
 	if (i < N_REF_FUNC) {
 	    rb_define_method_id(nstr, id, ref_func[i], 0);
 	}
 	else {
-	    rb_define_method_id(nstr, id, rb_struct_ref, 0);
+	    define_aref_method(nstr, ptr_members[i], off);
 	}
-	rb_define_method_id(nstr, rb_id_attrset(id), rb_struct_set, 1);
+	define_aset_method(nstr, ID2SYM(rb_id_attrset(id)), off);
     }
 
     return nstr;
diff --git a/test/ruby/test_struct.rb b/test/ruby/test_struct.rb
index 71d93fb..8307d79 100644
--- a/test/ruby/test_struct.rb
+++ b/test/ruby/test_struct.rb
@@ -186,6 +186,15 @@ module TestStruct
     assert_raise(ArgumentError) { o.select(1) }
   end
 
+  def test_big_struct
+    klass1 = @Struct.new(*('a'..'z').map(&:to_sym))
+    o = klass1.new
+    assert_nil o.z
+    assert_equal(:foo, o.z = :foo)
+    assert_equal(:foo, o.z)
+    assert_equal(:foo, o[25])
+  end
+
   def test_equal
     klass1 = @Struct.new(:a)
     klass2 = @Struct.new(:a, :b)
-- 
EW

