Feature #12463
Updated by nobu (Nobuyoshi Nakada) over 8 years ago
``` From c0f0d0c5f5c58d56fec95ca4303a0f5db5b54d56 Mon Sep 17 00:00:00 2001 From: "Urabe, Shyouhei" <shyouhei@ruby-lang.org> Date: Mon, 6 Jun 2016 17:07:11 +0900 Subject: [PATCH 1/1] ruby lacks plus-plus operation. This commit adds opt_plusplus VM instruction. Before this, a ruby snippet `foo += 1` was converted to up to 4 VM instructions. By unifying these instructions into one, we can avoid unnecessary stack manipulations and can issue dedicated machine instruction, if any. This speeds up things. A set of microbenchmark results shows that it gains up to 180% boost on a very tight loop. ----------------------------------------------------------- raw data: ``` [["loop_whileloop", [[0.624962, 0.579907, 0.562363, 0.579649, 0.589442, 0.595911, 0.583657], [0.317089, 0.337159, 0.311376, 0.311721, 0.317585, 0.310999, 0.313107]]], ["loop_whileloop2", [[0.129038, 0.137306, 0.13944, 0.121268, 0.120862, 0.121065, 0.135851], [0.072271, 0.074998, 0.080031, 0.078602, 0.07527, 0.07627, 0.072029]]]] ``` Elapsed time: 7.775304 (sec) ----------------------------------------------------------- benchmark results: minimum results in each 7 measurements. Execution time (sec) name | trunk| trunk ours ---------------+------+-------- loop_whileloop | 0.562| 0.562 0.311 loop_whileloop2| 0.121| loop_whileloop2 0.121 0.072 Speedup ratio: compare with the result of `trunk' (greater is better) name | ours ---------------+------ loop_whileloop | 1.808 loop_whileloop2| loop_whileloop2 1.678 ----------------------------------------------------------- Also for a nontrivial benchmark, optcarrot speeds up from 31.14 fps to 31.98 fps (102.6%). This is not that huge gain (disappointed), but it does speeds up more or less. ----------------------------------------------------------- raw data: ``` [["optcarrot", [[30.403074843558894, 30.310258944580042, 32.71944350007326], [31.959367571755536, 31.721515210358586, 32.26915908959967]]]] ``` ----------------------------------------------------------- ```diff * configure.in (__builtin_add_overflow): check existence of this compiler intrinsic which is available in recent gcc and clang, with expectedly-optimal implementation of overflow detection. * insns.def (opt_plusplus): new instruction. This unified instruction is expected to appear when some local variable is `+=1`ed. That was converted to 4 separate instructions before (load variable, load immediate one, call plus, store variable). * insns.def (opt_plus): refactor move implementation to share with opt_plusplus. * vm_insnhelper.c (opt_plus_func): moved to here. * compile.c (iseq_peephole_optimize): generate opt_plusplus when possible. Signed-off-by: Urabe, Shyouhei <shyouhei@ruby-lang.org> --- compile.c | 46 ++++++++++++++++++++++++ configure.in | 1 + insns.def | 109 +++++++++++++++++++++++++++++++++++--------------------- vm_insnhelper.c | 62 ++++++++++++++++++++++++++++++++ 4 files changed, 177 insertions(+), 41 deletions(-) diff --git a/compile.c b/compile.c index cc496cb..6d99d0b 100644 --- a/compile.c +++ b/compile.c @@ -2254,6 +2254,52 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal } } + if (IS_INSN_ID(iobj, getlocal)) { + /* + * getlocal_OP__WC__0 + * putobject_OP_INT2FIX_O_1_C_ + * opt_plus + * setlocal_OP__WC__0 + * => + * opt_plusplus + */ + INSN *nobj = (INSN *)iobj->link.next; + VALUE idx = OPERAND_AT(iobj, 0); + VALUE lev = OPERAND_AT(iobj, 1); + if (IS_INSN((LINK_ELEMENT *)nobj) && + IS_INSN_ID(nobj, putobject)) { + if(OPERAND_AT(nobj, 0) == INT2FIX(1)) { + nobj = (INSN * )nobj->link.next; + if (IS_INSN((LINK_ELEMENT *)nobj) && + IS_INSN_ID(nobj, send)) { + struct rb_call_info *ci = (void *)OPERAND_AT(nobj, 0); + struct rb_call_cache *cc = (void *)OPERAND_AT(nobj, 1); + if ((ci->flag & VM_CALL_ARGS_SIMPLE) && + (ci->orig_argc == 1) && + (ci->mid == idPLUS)) { + nobj = (INSN *)nobj->link.next; + if (IS_INSN((LINK_ELEMENT *)nobj) && + IS_INSN_ID(nobj, setlocal)) { + if ((OPERAND_AT(nobj, 0) == idx) && + (OPERAND_AT(nobj, 1) == lev)) { + INSN *opt_plusplus = new_insn_body( + iseq, iobj->line_no, BIN(opt_plusplus), + 4, idx, lev, (VALUE)ci, (VALUE)cc); + nobj = (INSN * )nobj->link.next; + REMOVE_ELEM(nobj->link.prev->prev->prev->prev); + REMOVE_ELEM(nobj->link.prev->prev->prev); + REMOVE_ELEM(nobj->link.prev->prev); + REMOVE_ELEM(nobj->link.prev); + INSERT_ELEM_PREV( + &nobj->link, (LINK_ELEMENT *)opt_plusplus); + } + } + } + } + } + } + } + if (do_tailcallopt && (IS_INSN_ID(iobj, send) || IS_INSN_ID(iobj, opt_aref_with) || diff --git a/configure.in b/configure.in index 063d839..e0b7a91 100644 --- a/configure.in +++ b/configure.in @@ -2480,6 +2480,7 @@ if test x$rb_cv_builtin___builtin_choose_expr = xyes; then ]) fi RUBY_CHECK_BUILTIN_FUNC(__builtin_types_compatible_p, [__builtin_types_compatible_p(int, int)]) +RUBY_CHECK_BUILTIN_FUNC(__builtin_add_overflow, [__builtin_add_overflow(0, 0, (int*)0)]) if test "$ac_cv_func_qsort_r" != no; then AC_CACHE_CHECK(whether qsort_r is GNU version, rb_cv_gnu_qsort_r, diff --git a/insns.def b/insns.def index cdc8287..1438e29 100644 --- a/insns.def +++ b/insns.def @@ -1377,47 +1377,7 @@ opt_plus (VALUE recv, VALUE obj) (VALUE val) { - if (FIXNUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) { - /* fixnum + fixnum */ -#ifndef LONG_LONG_VALUE - val = (recv + (obj & (~1))); - if ((~(recv ^ obj) & (recv ^ val)) & - ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) { - val = rb_big_plus(rb_int2big(FIX2LONG(recv)), - rb_int2big(FIX2LONG(obj))); - } -#else - long a, b, c; - a = FIX2LONG(recv); - b = FIX2LONG(obj); - c = a + b; - val = LONG2NUM(c); -#endif - } - else if (FLONUM_2_P(recv, obj) && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { - val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); - } - else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { - if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { - val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); - } - else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) { - val = rb_str_plus(recv, obj); - } - else if (RBASIC_CLASS(recv) == rb_cArray && - BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) { - val = rb_ary_plus(recv, obj); - } - else { - goto INSN_LABEL(normal_dispatch); - } - } - else { - INSN_LABEL(normal_dispatch): + if ((val = opt_plus_func(recv, obj)) == Qundef) { PUSH(recv); PUSH(obj); CALL_SIMPLE_METHOD(recv); @@ -2163,6 +2123,73 @@ opt_call_c_function } /** + @c optimize + @e unified instruction that does getlocal + putobject + opt_plus + setlocal. + */ +DEFINE_INSN +opt_plusplus +(lindex_t idx, rb_num_t level, CALL_INFO ci, CALL_CACHE cc) +() +() +{ + int const lv = level; + VALUE *ep = GET_EP(); + VALUE *ptr; + VALUE tmp; + int i; + + for (i = 0; i < lv; i++) { + ep = GET_PREV_EP(ep); + } + ptr = ep - idx; + + if (FIXNUM_P(*ptr) && + BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) { +#if defined LONG_LONG_VALUE + long a, b; + a = FIX2LONG(*ptr); + b = a + 1; + *ptr = LONG2NUM(b); +#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW + const signed long one = INT2FIX(1) & ~RUBY_FIXNUM_FLAG; + signed long ret; + if (UNLIKELY(__builtin_saddl_overflow((signed long)*ptr, one, &ret))) { + *ptr = rb_big_plus(rb_int2big(FIX2LONG(*ptr)), + rb_int2big(FIX2LONG(1))); + } + else { + *ptr = (VALUE)ret; + } +#else + const VALUE a, b, c; + a = *ptr; + b = INT2FIX(1) & ~RUBY_FIXNUM_FLAG; + c = a + b; + if ((~(a ^ b) & (a ^ c)) & + ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) { + *ptr = rb_big_plus(rb_int2big(FIX2LONG(a)), + rb_int2big(FIX2LONG(b))); + } + else { + *ptr = c; + } +#endif + } + else if ((tmp = opt_plus_func(*ptr, INT2FIX(1))) != Qundef) { + *ptr = tmp; + } + else { + VALUE val; + VALUE recv = *ptr; + + PUSH(recv); + PUSH(INT2FIX(1)); + CALL_SIMPLE_METHOD(recv); + *ptr = val; + } +} + +/** @c joke @e BLT @j BLT diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 691c37a..4ff21c5 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -1209,6 +1209,68 @@ rb_equal_opt(VALUE obj1, VALUE obj2) return opt_eq_func(obj1, obj2, &ci, &cc); } +static inline VALUE +fixnum_plus_fixnum(VALUE i, VALUE j) +{ +#if defined LONG_LONG_VALUE + long a, b, c; + a = FIX2LONG(i); + b = FIX2LONG(j); + c = a + b; + return LONG2NUM(c); +#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW + signed long ret; + if (__builtin_saddl_overflow(i, j & ~RUBY_FIXNUM_FLAG, &ret)) { + return rb_big_plus(rb_int2big(FIX2LONG(i)), + rb_int2big(FIX2LONG(j))); + } + else { + return (VALUE)ret; + } +#else + VALUE val = (i + (j & ~RUBY_FIXNUM_FLAG)); + if ((~(i ^ j) & (i ^ val)) & + ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) { + val = rb_big_plus(rb_int2big(FIX2LONG(i)), + rb_int2big(FIX2LONG(j))); + } + return val; +#endif +} + +static inline VALUE +opt_plus_func(VALUE recv, VALUE obj) +{ + if (FIXNUM_2_P(recv, obj) && + BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) { + return fixnum_plus_fixnum(recv, obj); + } + else if (FLONUM_2_P(recv, obj) && + BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { + return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); + } + else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { + if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && + BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { + return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); + } + else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString && + BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) { + return rb_str_plus(recv, obj); + } + else if (RBASIC_CLASS(recv) == rb_cArray && + BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) { + return rb_ary_plus(recv, obj); + } + else { + return Qundef; + } + } + else { + return Qundef; + } +} + static VALUE vm_call0(rb_thread_t*, VALUE, ID, int, const VALUE*, const rb_callable_method_entry_t *); static VALUE -- 2.8.3 ```