Project

General

Profile

Actions

Feature #12463

closed

ruby lacks plus-plus

Added by shyouhei (Shyouhei Urabe) almost 8 years ago. Updated almost 7 years ago.

Status:
Rejected
Target version:
-
[ruby-core:75859]

Description

From c0f0d0c5f5c58d56fec95ca4303a0f5db5b54d56 Mon Sep 17 00:00:00 2001
From: "Urabe, Shyouhei"
Date: Mon, 6 Jun 2016 17:07:11 +0900
Subject: [PATCH 1/1] ruby lacks plus-plus operation.

This commit adds opt_plusplus VM instruction. Before this, a ruby
snippet foo += 1 was converted to up to 4 VM instructions. By
unifying these instructions into one, we can avoid unnecessary stack
manipulations and can issue dedicated machine instruction, if any.

This speeds up things. A set of microbenchmark results shows that it
gains up to 180% boost on a very tight loop.


raw data:

[["loop_whileloop",
  [[0.624962, 0.579907, 0.562363, 0.579649, 0.589442, 0.595911, 0.583657],
   [0.317089, 0.337159, 0.311376, 0.311721, 0.317585, 0.310999, 0.313107]]],
 ["loop_whileloop2",
  [[0.129038, 0.137306, 0.13944, 0.121268, 0.120862, 0.121065, 0.135851],
   [0.072271, 0.074998, 0.080031, 0.078602, 0.07527, 0.07627, 0.072029]]]]

Elapsed time: 7.775304 (sec)


benchmark results:
minimum results in each 7 measurements.
Execution time (sec)

name | trunk| ours
---------------+------+--------
loop_whileloop | 0.562| 0.311
loop_whileloop2| 0.121| 0.072

Speedup ratio: compare with the result of `trunk' (greater is better)

name | ours
---------------+------
loop_whileloop | 1.808
loop_whileloop2| 1.678


Also for a nontrivial benchmark, optcarrot speeds up from 31.14 fps to
31.98 fps (102.6%). This is not that huge gain (disappointed), but it
does speeds up more or less.


raw data:

[["optcarrot",
  [[30.403074843558894, 30.310258944580042, 32.71944350007326],
   [31.959367571755536, 31.721515210358586, 32.26915908959967]]]]

	* configure.in (__builtin_add_overflow): check existence of this
	  compiler intrinsic which is available in recent gcc and clang,
	  with expectedly-optimal implementation of overflow detection.

	* insns.def (opt_plusplus): new instruction.  This unified
	  instruction is expected to appear when some local variable is
	  `+=1`ed.  That was converted to 4 separate instructions before
	  (load variable, load immediate one, call plus, store variable).

	* insns.def (opt_plus): refactor move implementation to share with
	  opt_plusplus.

	* vm_insnhelper.c (opt_plus_func): moved to here.

	* compile.c (iseq_peephole_optimize): generate opt_plusplus when
	  possible.

Signed-off-by: Urabe, Shyouhei <shyouhei@ruby-lang.org>
---
 compile.c       |  46 ++++++++++++++++++++++++
 configure.in    |   1 +
 insns.def       | 109 +++++++++++++++++++++++++++++++++++---------------------
 vm_insnhelper.c |  62 ++++++++++++++++++++++++++++++++
 4 files changed, 177 insertions(+), 41 deletions(-)

diff --git a/compile.c b/compile.c
index cc496cb..6d99d0b 100644
--- a/compile.c
+++ b/compile.c
@@ -2254,6 +2254,52 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
 	}
     }
 
+    if (IS_INSN_ID(iobj, getlocal)) {
+	/*
+	 *  getlocal_OP__WC__0
+	 *  putobject_OP_INT2FIX_O_1_C_
+	 *  opt_plus
+	 *  setlocal_OP__WC__0
+	 * =>
+	 *  opt_plusplus
+	 */
+	INSN *nobj = (INSN *)iobj->link.next;
+	VALUE idx = OPERAND_AT(iobj, 0);
+	VALUE lev = OPERAND_AT(iobj, 1);
+	if (IS_INSN((LINK_ELEMENT *)nobj) &&
+	    IS_INSN_ID(nobj, putobject)) {
+	    if(OPERAND_AT(nobj, 0) == INT2FIX(1)) {
+		nobj = (INSN * )nobj->link.next;
+		if (IS_INSN((LINK_ELEMENT *)nobj) &&
+		    IS_INSN_ID(nobj, send)) {
+		    struct rb_call_info *ci = (void *)OPERAND_AT(nobj, 0);
+		    struct rb_call_cache *cc = (void *)OPERAND_AT(nobj, 1);
+		    if ((ci->flag & VM_CALL_ARGS_SIMPLE) &&
+			(ci->orig_argc == 1) &&
+			(ci->mid == idPLUS)) {
+			nobj = (INSN *)nobj->link.next;
+			if (IS_INSN((LINK_ELEMENT *)nobj) &&
+			    IS_INSN_ID(nobj, setlocal)) {
+			    if ((OPERAND_AT(nobj, 0) == idx) &&
+				(OPERAND_AT(nobj, 1) == lev)) {
+				INSN *opt_plusplus = new_insn_body(
+				    iseq, iobj->line_no, BIN(opt_plusplus),
+				    4, idx, lev, (VALUE)ci, (VALUE)cc);
+				nobj = (INSN * )nobj->link.next;
+				REMOVE_ELEM(nobj->link.prev->prev->prev->prev);
+				REMOVE_ELEM(nobj->link.prev->prev->prev);
+				REMOVE_ELEM(nobj->link.prev->prev);
+				REMOVE_ELEM(nobj->link.prev);
+				INSERT_ELEM_PREV(
+				    &nobj->link, (LINK_ELEMENT *)opt_plusplus);
+			    }
+			}
+		    }
+		}
+	    }
+	}
+    }
+
     if (do_tailcallopt &&
 	(IS_INSN_ID(iobj, send) ||
 	 IS_INSN_ID(iobj, opt_aref_with) ||
diff --git a/configure.in b/configure.in
index 063d839..e0b7a91 100644
--- a/configure.in
+++ b/configure.in
@@ -2480,6 +2480,7 @@ if test x$rb_cv_builtin___builtin_choose_expr = xyes; then
     ])
 fi
 RUBY_CHECK_BUILTIN_FUNC(__builtin_types_compatible_p, [__builtin_types_compatible_p(int, int)])
+RUBY_CHECK_BUILTIN_FUNC(__builtin_add_overflow, [__builtin_add_overflow(0, 0, (int*)0)])
 
 if test "$ac_cv_func_qsort_r" != no; then
   AC_CACHE_CHECK(whether qsort_r is GNU version, rb_cv_gnu_qsort_r,
diff --git a/insns.def b/insns.def
index cdc8287..1438e29 100644
--- a/insns.def
+++ b/insns.def
@@ -1377,47 +1377,7 @@ opt_plus
 (VALUE recv, VALUE obj)
 (VALUE val)
 {
-    if (FIXNUM_2_P(recv, obj) &&
-	BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) {
-	/* fixnum + fixnum */
-#ifndef LONG_LONG_VALUE
-	val = (recv + (obj & (~1)));
-	if ((~(recv ^ obj) & (recv ^ val)) &
-	    ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) {
-	    val = rb_big_plus(rb_int2big(FIX2LONG(recv)),
-			      rb_int2big(FIX2LONG(obj)));
-	}
-#else
-	long a, b, c;
-	a = FIX2LONG(recv);
-	b = FIX2LONG(obj);
-	c = a + b;
-	val = LONG2NUM(c);
-#endif
-    }
-    else if (FLONUM_2_P(recv, obj) &&
-	     BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
-	val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
-    }
-    else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
-	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
-	    BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
-	    val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
-	}
-	else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString &&
-		 BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) {
-	    val = rb_str_plus(recv, obj);
-	}
-	else if (RBASIC_CLASS(recv) == rb_cArray &&
-		 BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) {
-	    val = rb_ary_plus(recv, obj);
-	}
-	else {
-	    goto INSN_LABEL(normal_dispatch);
-	}
-    }
-    else {
-	INSN_LABEL(normal_dispatch):
+    if ((val = opt_plus_func(recv, obj)) == Qundef) {
 	PUSH(recv);
 	PUSH(obj);
 	CALL_SIMPLE_METHOD(recv);
@@ -2163,6 +2123,73 @@ opt_call_c_function
 }
 
 /**
+  @c optimize
+  @e unified instruction that does getlocal + putobject + opt_plus + setlocal.
+ */
+DEFINE_INSN
+opt_plusplus
+(lindex_t idx, rb_num_t level, CALL_INFO ci, CALL_CACHE cc)
+()
+()
+{
+    int const lv = level;
+    VALUE *ep = GET_EP();
+    VALUE *ptr;
+    VALUE tmp;
+    int i;
+
+    for (i = 0; i < lv; i++) {
+	ep = GET_PREV_EP(ep);
+    }
+    ptr = ep - idx;
+
+    if (FIXNUM_P(*ptr) &&
+        BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) {
+#if defined LONG_LONG_VALUE
+        long a, b;
+        a = FIX2LONG(*ptr);
+        b = a + 1;
+        *ptr = LONG2NUM(b);
+#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW
+        const signed long one = INT2FIX(1) & ~RUBY_FIXNUM_FLAG;
+        signed long ret;
+        if (UNLIKELY(__builtin_saddl_overflow((signed long)*ptr, one, &ret))) {
+            *ptr = rb_big_plus(rb_int2big(FIX2LONG(*ptr)),
+                               rb_int2big(FIX2LONG(1)));
+        }
+        else {
+            *ptr = (VALUE)ret;
+        }
+#else
+        const VALUE a, b, c;
+        a = *ptr;
+        b = INT2FIX(1) & ~RUBY_FIXNUM_FLAG;
+        c = a + b;
+        if ((~(a ^ b) & (a ^ c)) &
+            ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) {
+            *ptr = rb_big_plus(rb_int2big(FIX2LONG(a)),
+                              rb_int2big(FIX2LONG(b)));
+        }
+        else {
+            *ptr = c;
+        }
+#endif
+    }
+    else if ((tmp = opt_plus_func(*ptr, INT2FIX(1))) != Qundef) {
+        *ptr = tmp;
+    }
+    else {
+        VALUE val;
+        VALUE recv = *ptr;
+
+	PUSH(recv);
+	PUSH(INT2FIX(1));
+	CALL_SIMPLE_METHOD(recv);
+        *ptr = val;
+    }
+}
+
+/**
   @c joke
   @e BLT
   @j BLT
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 691c37a..4ff21c5 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -1209,6 +1209,68 @@ rb_equal_opt(VALUE obj1, VALUE obj2)
     return opt_eq_func(obj1, obj2, &ci, &cc);
 }
 
+static inline VALUE
+fixnum_plus_fixnum(VALUE i, VALUE j)
+{
+#if defined LONG_LONG_VALUE
+    long a, b, c;
+    a = FIX2LONG(i);
+    b = FIX2LONG(j);
+    c = a + b;
+    return LONG2NUM(c);
+#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW
+    signed long ret;
+    if (__builtin_saddl_overflow(i, j & ~RUBY_FIXNUM_FLAG, &ret)) {
+	return rb_big_plus(rb_int2big(FIX2LONG(i)),
+			   rb_int2big(FIX2LONG(j)));
+    }
+    else {
+	return (VALUE)ret;
+    }
+#else
+    VALUE val = (i + (j & ~RUBY_FIXNUM_FLAG));
+    if ((~(i ^ j) & (i ^ val)) &
+	((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) {
+	val = rb_big_plus(rb_int2big(FIX2LONG(i)),
+			  rb_int2big(FIX2LONG(j)));
+    }
+    return val;
+#endif
+}
+
+static inline VALUE
+opt_plus_func(VALUE recv, VALUE obj)
+{
+    if (FIXNUM_2_P(recv, obj) &&
+	BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) {
+	return fixnum_plus_fixnum(recv, obj);
+    }
+    else if (FLONUM_2_P(recv, obj) &&
+	     BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+	return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
+    }
+    else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
+	if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
+	    BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+	    return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
+	}
+	else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString &&
+		 BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) {
+	    return rb_str_plus(recv, obj);
+	}
+	else if (RBASIC_CLASS(recv) == rb_cArray &&
+		 BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) {
+	    return rb_ary_plus(recv, obj);
+	}
+	else {
+	    return Qundef;
+	}
+    }
+    else {
+	return Qundef;
+    }
+}
+
 static VALUE vm_call0(rb_thread_t*, VALUE, ID, int, const VALUE*, const rb_callable_method_entry_t *);
 
 static VALUE
-- 
2.8.3


Related issues 1 (0 open1 closed)

Related to Ruby master - Feature #12607: Ruby needs an atomic integerFeedbackko1 (Koichi Sasada)Actions
Actions

Also available in: Atom PDF

Like0
Like0Like0Like0Like0Like0Like0Like0Like0Like0Like0Like0Like0Like0Like0