Feature #12463
closedruby lacks plus-plus
Description
From c0f0d0c5f5c58d56fec95ca4303a0f5db5b54d56 Mon Sep 17 00:00:00 2001
From: "Urabe, Shyouhei" shyouhei@ruby-lang.org
Date: Mon, 6 Jun 2016 17:07:11 +0900
Subject: [PATCH 1/1] ruby lacks plus-plus operation.
This commit adds opt_plusplus VM instruction. Before this, a ruby
snippet foo += 1
was converted to up to 4 VM instructions. By
unifying these instructions into one, we can avoid unnecessary stack
manipulations and can issue dedicated machine instruction, if any.
This speeds up things. A set of microbenchmark results shows that it
gains up to 180% boost on a very tight loop.
raw data:
[["loop_whileloop",
[[0.624962, 0.579907, 0.562363, 0.579649, 0.589442, 0.595911, 0.583657],
[0.317089, 0.337159, 0.311376, 0.311721, 0.317585, 0.310999, 0.313107]]],
["loop_whileloop2",
[[0.129038, 0.137306, 0.13944, 0.121268, 0.120862, 0.121065, 0.135851],
[0.072271, 0.074998, 0.080031, 0.078602, 0.07527, 0.07627, 0.072029]]]]
Elapsed time: 7.775304 (sec)
benchmark results:
minimum results in each 7 measurements.
Execution time (sec)
name | trunk| ours
---------------+------+--------
loop_whileloop | 0.562| 0.311
loop_whileloop2| 0.121| 0.072
Speedup ratio: compare with the result of `trunk' (greater is better)
name | ours
---------------+------
loop_whileloop | 1.808
loop_whileloop2| 1.678
Also for a nontrivial benchmark, optcarrot speeds up from 31.14 fps to
31.98 fps (102.6%). This is not that huge gain (disappointed), but it
does speeds up more or less.
raw data:
[["optcarrot",
[[30.403074843558894, 30.310258944580042, 32.71944350007326],
[31.959367571755536, 31.721515210358586, 32.26915908959967]]]]
* configure.in (__builtin_add_overflow): check existence of this
compiler intrinsic which is available in recent gcc and clang,
with expectedly-optimal implementation of overflow detection.
* insns.def (opt_plusplus): new instruction. This unified
instruction is expected to appear when some local variable is
`+=1`ed. That was converted to 4 separate instructions before
(load variable, load immediate one, call plus, store variable).
* insns.def (opt_plus): refactor move implementation to share with
opt_plusplus.
* vm_insnhelper.c (opt_plus_func): moved to here.
* compile.c (iseq_peephole_optimize): generate opt_plusplus when
possible.
Signed-off-by: Urabe, Shyouhei <shyouhei@ruby-lang.org>
---
compile.c | 46 ++++++++++++++++++++++++
configure.in | 1 +
insns.def | 109 +++++++++++++++++++++++++++++++++++---------------------
vm_insnhelper.c | 62 ++++++++++++++++++++++++++++++++
4 files changed, 177 insertions(+), 41 deletions(-)
diff --git a/compile.c b/compile.c
index cc496cb..6d99d0b 100644
--- a/compile.c
+++ b/compile.c
@@ -2254,6 +2254,52 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
}
}
+ if (IS_INSN_ID(iobj, getlocal)) {
+ /*
+ * getlocal_OP__WC__0
+ * putobject_OP_INT2FIX_O_1_C_
+ * opt_plus
+ * setlocal_OP__WC__0
+ * =>
+ * opt_plusplus
+ */
+ INSN *nobj = (INSN *)iobj->link.next;
+ VALUE idx = OPERAND_AT(iobj, 0);
+ VALUE lev = OPERAND_AT(iobj, 1);
+ if (IS_INSN((LINK_ELEMENT *)nobj) &&
+ IS_INSN_ID(nobj, putobject)) {
+ if(OPERAND_AT(nobj, 0) == INT2FIX(1)) {
+ nobj = (INSN * )nobj->link.next;
+ if (IS_INSN((LINK_ELEMENT *)nobj) &&
+ IS_INSN_ID(nobj, send)) {
+ struct rb_call_info *ci = (void *)OPERAND_AT(nobj, 0);
+ struct rb_call_cache *cc = (void *)OPERAND_AT(nobj, 1);
+ if ((ci->flag & VM_CALL_ARGS_SIMPLE) &&
+ (ci->orig_argc == 1) &&
+ (ci->mid == idPLUS)) {
+ nobj = (INSN *)nobj->link.next;
+ if (IS_INSN((LINK_ELEMENT *)nobj) &&
+ IS_INSN_ID(nobj, setlocal)) {
+ if ((OPERAND_AT(nobj, 0) == idx) &&
+ (OPERAND_AT(nobj, 1) == lev)) {
+ INSN *opt_plusplus = new_insn_body(
+ iseq, iobj->line_no, BIN(opt_plusplus),
+ 4, idx, lev, (VALUE)ci, (VALUE)cc);
+ nobj = (INSN * )nobj->link.next;
+ REMOVE_ELEM(nobj->link.prev->prev->prev->prev);
+ REMOVE_ELEM(nobj->link.prev->prev->prev);
+ REMOVE_ELEM(nobj->link.prev->prev);
+ REMOVE_ELEM(nobj->link.prev);
+ INSERT_ELEM_PREV(
+ &nobj->link, (LINK_ELEMENT *)opt_plusplus);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
if (do_tailcallopt &&
(IS_INSN_ID(iobj, send) ||
IS_INSN_ID(iobj, opt_aref_with) ||
diff --git a/configure.in b/configure.in
index 063d839..e0b7a91 100644
--- a/configure.in
+++ b/configure.in
@@ -2480,6 +2480,7 @@ if test x$rb_cv_builtin___builtin_choose_expr = xyes; then
])
fi
RUBY_CHECK_BUILTIN_FUNC(__builtin_types_compatible_p, [__builtin_types_compatible_p(int, int)])
+RUBY_CHECK_BUILTIN_FUNC(__builtin_add_overflow, [__builtin_add_overflow(0, 0, (int*)0)])
if test "$ac_cv_func_qsort_r" != no; then
AC_CACHE_CHECK(whether qsort_r is GNU version, rb_cv_gnu_qsort_r,
diff --git a/insns.def b/insns.def
index cdc8287..1438e29 100644
--- a/insns.def
+++ b/insns.def
@@ -1377,47 +1377,7 @@ opt_plus
(VALUE recv, VALUE obj)
(VALUE val)
{
- if (FIXNUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) {
- /* fixnum + fixnum */
-#ifndef LONG_LONG_VALUE
- val = (recv + (obj & (~1)));
- if ((~(recv ^ obj) & (recv ^ val)) &
- ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) {
- val = rb_big_plus(rb_int2big(FIX2LONG(recv)),
- rb_int2big(FIX2LONG(obj)));
- }
-#else
- long a, b, c;
- a = FIX2LONG(recv);
- b = FIX2LONG(obj);
- c = a + b;
- val = LONG2NUM(c);
-#endif
- }
- else if (FLONUM_2_P(recv, obj) &&
- BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
- val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
- }
- else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
- if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
- BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
- val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
- }
- else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString &&
- BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) {
- val = rb_str_plus(recv, obj);
- }
- else if (RBASIC_CLASS(recv) == rb_cArray &&
- BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) {
- val = rb_ary_plus(recv, obj);
- }
- else {
- goto INSN_LABEL(normal_dispatch);
- }
- }
- else {
- INSN_LABEL(normal_dispatch):
+ if ((val = opt_plus_func(recv, obj)) == Qundef) {
PUSH(recv);
PUSH(obj);
CALL_SIMPLE_METHOD(recv);
@@ -2163,6 +2123,73 @@ opt_call_c_function
}
/**
+ @c optimize
+ @e unified instruction that does getlocal + putobject + opt_plus + setlocal.
+ */
+DEFINE_INSN
+opt_plusplus
+(lindex_t idx, rb_num_t level, CALL_INFO ci, CALL_CACHE cc)
+()
+()
+{
+ int const lv = level;
+ VALUE *ep = GET_EP();
+ VALUE *ptr;
+ VALUE tmp;
+ int i;
+
+ for (i = 0; i < lv; i++) {
+ ep = GET_PREV_EP(ep);
+ }
+ ptr = ep - idx;
+
+ if (FIXNUM_P(*ptr) &&
+ BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) {
+#if defined LONG_LONG_VALUE
+ long a, b;
+ a = FIX2LONG(*ptr);
+ b = a + 1;
+ *ptr = LONG2NUM(b);
+#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW
+ const signed long one = INT2FIX(1) & ~RUBY_FIXNUM_FLAG;
+ signed long ret;
+ if (UNLIKELY(__builtin_saddl_overflow((signed long)*ptr, one, &ret))) {
+ *ptr = rb_big_plus(rb_int2big(FIX2LONG(*ptr)),
+ rb_int2big(FIX2LONG(1)));
+ }
+ else {
+ *ptr = (VALUE)ret;
+ }
+#else
+ const VALUE a, b, c;
+ a = *ptr;
+ b = INT2FIX(1) & ~RUBY_FIXNUM_FLAG;
+ c = a + b;
+ if ((~(a ^ b) & (a ^ c)) &
+ ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) {
+ *ptr = rb_big_plus(rb_int2big(FIX2LONG(a)),
+ rb_int2big(FIX2LONG(b)));
+ }
+ else {
+ *ptr = c;
+ }
+#endif
+ }
+ else if ((tmp = opt_plus_func(*ptr, INT2FIX(1))) != Qundef) {
+ *ptr = tmp;
+ }
+ else {
+ VALUE val;
+ VALUE recv = *ptr;
+
+ PUSH(recv);
+ PUSH(INT2FIX(1));
+ CALL_SIMPLE_METHOD(recv);
+ *ptr = val;
+ }
+}
+
+/**
@c joke
@e BLT
@j BLT
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 691c37a..4ff21c5 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -1209,6 +1209,68 @@ rb_equal_opt(VALUE obj1, VALUE obj2)
return opt_eq_func(obj1, obj2, &ci, &cc);
}
+static inline VALUE
+fixnum_plus_fixnum(VALUE i, VALUE j)
+{
+#if defined LONG_LONG_VALUE
+ long a, b, c;
+ a = FIX2LONG(i);
+ b = FIX2LONG(j);
+ c = a + b;
+ return LONG2NUM(c);
+#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW
+ signed long ret;
+ if (__builtin_saddl_overflow(i, j & ~RUBY_FIXNUM_FLAG, &ret)) {
+ return rb_big_plus(rb_int2big(FIX2LONG(i)),
+ rb_int2big(FIX2LONG(j)));
+ }
+ else {
+ return (VALUE)ret;
+ }
+#else
+ VALUE val = (i + (j & ~RUBY_FIXNUM_FLAG));
+ if ((~(i ^ j) & (i ^ val)) &
+ ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) {
+ val = rb_big_plus(rb_int2big(FIX2LONG(i)),
+ rb_int2big(FIX2LONG(j)));
+ }
+ return val;
+#endif
+}
+
+static inline VALUE
+opt_plus_func(VALUE recv, VALUE obj)
+{
+ if (FIXNUM_2_P(recv, obj) &&
+ BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) {
+ return fixnum_plus_fixnum(recv, obj);
+ }
+ else if (FLONUM_2_P(recv, obj) &&
+ BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+ return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
+ }
+ else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) {
+ if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat &&
+ BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) {
+ return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj));
+ }
+ else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString &&
+ BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) {
+ return rb_str_plus(recv, obj);
+ }
+ else if (RBASIC_CLASS(recv) == rb_cArray &&
+ BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) {
+ return rb_ary_plus(recv, obj);
+ }
+ else {
+ return Qundef;
+ }
+ }
+ else {
+ return Qundef;
+ }
+}
+
static VALUE vm_call0(rb_thread_t*, VALUE, ID, int, const VALUE*, const rb_callable_method_entry_t *);
static VALUE
--
2.8.3