Project

General

Profile

Actions

Feature #12463

closed

ruby lacks plus-plus

Added by shyouhei (Shyouhei Urabe) over 5 years ago. Updated over 4 years ago.

Status:
Rejected
Priority:
Normal
Target version:
-
[ruby-core:75859]

Description

From c0f0d0c5f5c58d56fec95ca4303a0f5db5b54d56 Mon Sep 17 00:00:00 2001
From: "Urabe, Shyouhei" shyouhei@ruby-lang.org
Date: Mon, 6 Jun 2016 17:07:11 +0900
Subject: [PATCH 1/1] ruby lacks plus-plus operation.

This commit adds opt_plusplus VM instruction. Before this, a ruby
snippet foo += 1 was converted to up to 4 VM instructions. By
unifying these instructions into one, we can avoid unnecessary stack
manipulations and can issue dedicated machine instruction, if any.

This speeds up things. A set of microbenchmark results shows that it
gains up to 180% boost on a very tight loop.


raw data:

[["loop_whileloop",
  [[0.624962, 0.579907, 0.562363, 0.579649, 0.589442, 0.595911, 0.583657],
   [0.317089, 0.337159, 0.311376, 0.311721, 0.317585, 0.310999, 0.313107]]],
 ["loop_whileloop2",
  [[0.129038, 0.137306, 0.13944, 0.121268, 0.120862, 0.121065, 0.135851],
   [0.072271, 0.074998, 0.080031, 0.078602, 0.07527, 0.07627, 0.072029]]]]

Elapsed time: 7.775304 (sec)


benchmark results:
minimum results in each 7 measurements.
Execution time (sec)

name trunk ours
loop_whileloop 0.562 0.311
loop_whileloop2 0.121 0.072

Speedup ratio: compare with the result of `trunk' (greater is better)

name ours
loop_whileloop 1.808
loop_whileloop2 1.678

Also for a nontrivial benchmark, optcarrot speeds up from 31.14 fps to
31.98 fps (102.6%). This is not that huge gain (disappointed), but it
does speeds up more or less.


raw data:

[["optcarrot",
  [[30.403074843558894, 30.310258944580042, 32.71944350007326],
   [31.959367571755536, 31.721515210358586, 32.26915908959967]]]]

    * configure.in (__builtin_add_overflow): check existence of this
      compiler intrinsic which is available in recent gcc and clang,
      with expectedly-optimal implementation of overflow detection.

    * insns.def (opt_plusplus): new instruction.  This unified
      instruction is expected to appear when some local variable is
      `+=1`ed.  That was converted to 4 separate instructions before
      (load variable, load immediate one, call plus, store variable).

    * insns.def (opt_plus): refactor move implementation to share with
      opt_plusplus.

    * vm_insnhelper.c (opt_plus_func): moved to here.

    * compile.c (iseq_peephole_optimize): generate opt_plusplus when
      possible.

Signed-off-by: Urabe, Shyouhei <shyouhei@ruby-lang.org>
---
 compile.c       |  46 ++++++++++++++++++++++++
 configure.in    |   1 +
 insns.def       | 109 +++++++++++++++++++++++++++++++++++---------------------
 vm_insnhelper.c |  62 ++++++++++++++++++++++++++++++++
 4 files changed, 177 insertions(+), 41 deletions(-)

diff --git a/compile.c b/compile.c
index cc496cb..6d99d0b 100644
--- a/compile.c
+++ b/compile.c
@@ -2254,6 +2254,52 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
    }
     }

+    if (IS_INSN_ID(iobj, getlocal)) {
+   /*
+    *  getlocal_OP__WC__0
+    *  putobject_OP_INT2FIX_O_1_C_
+    *  opt_plus
+    *  setlocal_OP__WC__0
+    * =>
+    *  opt_plusplus
+    */
+   INSN *nobj = (INSN *)iobj->link.next;
+   VALUE idx = OPERAND_AT(iobj, 0);
+   VALUE lev = OPERAND_AT(iobj, 1);
+   if (IS_INSN((LINK_ELEMENT *)nobj) &&
+       IS_INSN_ID(nobj, putobject)) {
+       if(OPERAND_AT(nobj, 0) == INT2FIX(1)) {
+       nobj = (INSN * )nobj->link.next;
+       if (IS_INSN((LINK_ELEMENT *)nobj) &&
+           IS_INSN_ID(nobj, send)) {
+           struct rb_call_info *ci = (void *)OPERAND_AT(nobj, 0);
+           struct rb_call_cache *cc = (void *)OPERAND_AT(nobj, 1);
+           if ((ci->flag & VM_CALL_ARGS_SIMPLE) &&
+           (ci->orig_argc == 1) &&
+           (ci->mid == idPLUS)) {
+           nobj = (INSN *)nobj->link.next;
+           if (IS_INSN((LINK_ELEMENT *)nobj) &&
+               IS_INSN_ID(nobj, setlocal)) {
+               if ((OPERAND_AT(nobj, 0) == idx) &&
+               (OPERAND_AT(nobj, 1) == lev)) {
+               INSN *opt_plusplus = new_insn_body(
+                   iseq, iobj->line_no, BIN(opt_plusplus),
+                   4, idx, lev, (VALUE)ci, (VALUE)cc);
+               nobj = (INSN * )nobj->link.next;
+               REMOVE_ELEM(nobj->link.prev->prev->prev->prev);
+               REMOVE_ELEM(nobj->link.prev->prev->prev);
+               REMOVE_ELEM(nobj->link.prev->prev);
+               REMOVE_ELEM(nobj->link.prev);
+               INSERT_ELEM_PREV(
+                   &nobj->link, (LINK_ELEMENT *)opt_plusplus);
+               }
+           }
+           }
+       }
+       }
+   }
+    }
+
     if (do_tailcallopt &&
    (IS_INSN_ID(iobj, send) ||
     IS_INSN_ID(iobj, opt_aref_with) ||
diff --git a/configure.in b/configure.in
index 063d839..e0b7a91 100644
--- a/configure.in
+++ b/configure.in
@@ -2480,6 +2480,7 @@ if test x$rb_cv_builtin___builtin_choose_expr = xyes; then
     ])
 fi
 RUBY_CHECK_BUILTIN_FUNC(__builtin_types_compatible_p, [__builtin_types_compatible_p(int, int)])
+RUBY_CHECK_BUILTIN_FUNC(__builtin_add_overflow, [__builtin_add_overflow(0, 0, (int*)0)])

 if test "$ac_cv_func_qsort_r" != no; then
   AC_CACHE_CHECK(whether qsort_r is GNU version, rb_cv_gnu_qsort_r,
diff --git a/insns.def b/insns.def
index cdc8287..1438e29 100644
--- a/insns.def
+++ b/insns.def
@@ -1377,47 +1377,7 @@ opt_plus
 (VALUE recv, VALUE obj)
 (VALUE val)
 {
-    if (FIXNUM_2_P(recv, obj) &&
-   BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) {
-   /* fixnum + fixnum */
-#ifndef LONG_LONG_VALUE
-   val = (recv + (obj & (~1)));
-   if ((~(recv ^ obj) & (recv ^ val)) &
-       ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) {
-       val = rb_big_plus(rb_int2big(FIX2LONG(recv)),
-                 rb_int2big(FIX2LONG(obj)));
-   }
-#else
-   long a, b, c;
-   a = FIX2LONG(recv);
-   b = FIX2LONG(obj);
-   c = a + b;
-   val = LONG2NUM(c);
-#endif
-    }
-    else if (FLONUM_2_P(recv, obj) &&
-        BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
-   val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
-    }
-    else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
-   if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
-       BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
-       val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
-   }
-   else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString &&
-        BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) {
-       val = rb_str_plus(recv, obj);
-   }
-   else if (RBASIC_CLASS(recv) == rb_cArray &&
-        BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) {
-       val = rb_ary_plus(recv, obj);
-   }
-   else {
-       goto INSN_LABEL(normal_dispatch);
-   }
-    }
-    else {
-   INSN_LABEL(normal_dispatch):
+    if ((val = opt_plus_func(recv, obj)) == Qundef) {
    PUSH(recv);
    PUSH(obj);
    CALL_SIMPLE_METHOD(recv);
@@ -2163,6 +2123,73 @@ opt_call_c_function
 }

 /**
+  @c optimize
+  @e unified instruction that does getlocal + putobject + opt_plus + setlocal.
+ */
+DEFINE_INSN
+opt_plusplus
+(lindex_t idx, rb_num_t level, CALL_INFO ci, CALL_CACHE cc)
+()
+()
+{
+    int const lv = level;
+    VALUE *ep = GET_EP();
+    VALUE *ptr;
+    VALUE tmp;
+    int i;
+
+    for (i = 0; i < lv; i++) {
+   ep = GET_PREV_EP(ep);
+    }
+    ptr = ep - idx;
+
+    if (FIXNUM_P(*ptr) &&
+        BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) {
+#if defined LONG_LONG_VALUE
+        long a, b;
+        a = FIX2LONG(*ptr);
+        b = a + 1;
+        *ptr = LONG2NUM(b);
+#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW
+        const signed long one = INT2FIX(1) & ~RUBY_FIXNUM_FLAG;
+        signed long ret;
+        if (UNLIKELY(__builtin_saddl_overflow((signed long)*ptr, one, &ret))) {
+            *ptr = rb_big_plus(rb_int2big(FIX2LONG(*ptr)),
+                               rb_int2big(FIX2LONG(1)));
+        }
+        else {
+            *ptr = (VALUE)ret;
+        }
+#else
+        const VALUE a, b, c;
+        a = *ptr;
+        b = INT2FIX(1) & ~RUBY_FIXNUM_FLAG;
+        c = a + b;
+        if ((~(a ^ b) & (a ^ c)) &
+            ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) {
+            *ptr = rb_big_plus(rb_int2big(FIX2LONG(a)),
+                              rb_int2big(FIX2LONG(b)));
+        }
+        else {
+            *ptr = c;
+        }
+#endif
+    }
+    else if ((tmp = opt_plus_func(*ptr, INT2FIX(1))) != Qundef) {
+        *ptr = tmp;
+    }
+    else {
+        VALUE val;
+        VALUE recv = *ptr;
+
+   PUSH(recv);
+   PUSH(INT2FIX(1));
+   CALL_SIMPLE_METHOD(recv);
+        *ptr = val;
+    }
+}
+
+/**
   @c joke
   @e BLT
   @j BLT
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 691c37a..4ff21c5 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -1209,6 +1209,68 @@ rb_equal_opt(VALUE obj1, VALUE obj2)
     return opt_eq_func(obj1, obj2, &ci, &cc);
 }

+static inline VALUE
+fixnum_plus_fixnum(VALUE i, VALUE j)
+{
+#if defined LONG_LONG_VALUE
+    long a, b, c;
+    a = FIX2LONG(i);
+    b = FIX2LONG(j);
+    c = a + b;
+    return LONG2NUM(c);
+#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW
+    signed long ret;
+    if (__builtin_saddl_overflow(i, j & ~RUBY_FIXNUM_FLAG, &ret)) {
+   return rb_big_plus(rb_int2big(FIX2LONG(i)),
+              rb_int2big(FIX2LONG(j)));
+    }
+    else {
+   return (VALUE)ret;
+    }
+#else
+    VALUE val = (i + (j & ~RUBY_FIXNUM_FLAG));
+    if ((~(i ^ j) & (i ^ val)) &
+   ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) {
+   val = rb_big_plus(rb_int2big(FIX2LONG(i)),
+             rb_int2big(FIX2LONG(j)));
+    }
+    return val;
+#endif
+}
+
+static inline VALUE
+opt_plus_func(VALUE recv, VALUE obj)
+{
+    if (FIXNUM_2_P(recv, obj) &&
+   BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) {
+   return fixnum_plus_fixnum(recv, obj);
+    }
+    else if (FLONUM_2_P(recv, obj) &&
+        BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+   return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
+    }
+    else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
+   if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
+       BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+       return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
+   }
+   else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString &&
+        BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) {
+       return rb_str_plus(recv, obj);
+   }
+   else if (RBASIC_CLASS(recv) == rb_cArray &&
+        BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) {
+       return rb_ary_plus(recv, obj);
+   }
+   else {
+       return Qundef;
+   }
+    }
+    else {
+   return Qundef;
+    }
+}
+
 static VALUE vm_call0(rb_thread_t*, VALUE, ID, int, const VALUE*, const rb_callable_method_entry_t *);

 static VALUE
-- 
2.8.3


Related issues

Related to Ruby master - Feature #12607: Ruby needs an atomic integerFeedbackko1 (Koichi Sasada)Actions
Actions

Also available in: Atom PDF