Project

General

Profile

Feature #12463

Updated by nobu (Nobuyoshi Nakada) almost 8 years ago

``` 
 From c0f0d0c5f5c58d56fec95ca4303a0f5db5b54d56 Mon Sep 17 00:00:00 2001 
 From: "Urabe, Shyouhei" <shyouhei@ruby-lang.org> 
 Date: Mon, 6 Jun 2016 17:07:11 +0900 
 Subject: [PATCH 1/1] ruby lacks plus-plus operation. 

 This commit adds opt_plusplus VM instruction.    Before this, a ruby 
 snippet `foo += 1` was converted to up to 4 VM instructions.    By 
 unifying these instructions into one, we can avoid unnecessary stack 
 manipulations and can issue dedicated machine instruction, if any. 

 This speeds up things.    A set of microbenchmark results shows that it 
 gains up to 180% boost on a very tight loop. 

 ----------------------------------------------------------- 
 raw data: 

 ``` 
 [["loop_whileloop", 
   [[0.624962, 0.579907, 0.562363, 0.579649, 0.589442, 0.595911, 0.583657], 
    [0.317089, 0.337159, 0.311376, 0.311721, 0.317585, 0.310999, 0.313107]]], 
  ["loop_whileloop2", 
   [[0.129038, 0.137306, 0.13944, 0.121268, 0.120862, 0.121065, 0.135851], 
    [0.072271, 0.074998, 0.080031, 0.078602, 0.07527, 0.07627, 0.072029]]]] 
 ``` 

 Elapsed time: 7.775304 (sec) 

 
 ----------------------------------------------------------- 
 benchmark results: 
 minimum results in each 7 measurements. 
 Execution time (sec) 

 
 name             | trunk|      trunk     ours 
 ---------------+------+-------- 
 loop_whileloop | 0.562|    0.562     0.311 
 loop_whileloop2| 0.121| loop_whileloop2 0.121     0.072 

 Speedup ratio: compare with the result of `trunk' (greater is better) 

 
 name             |      ours 
 ---------------+------ 
 loop_whileloop |    1.808 
 loop_whileloop2| loop_whileloop2 1.678 

 ----------------------------------------------------------- 

 Also for a nontrivial benchmark, optcarrot speeds up from 31.14 fps to 
 31.98 fps (102.6%).    This is not that huge gain (disappointed), but it 
 does speeds up more or less. 

 ----------------------------------------------------------- 
 raw data: 

 ``` 
 [["optcarrot", 
   [[30.403074843558894, 30.310258944580042, 32.71944350007326], 
    [31.959367571755536, 31.721515210358586, 32.26915908959967]]]] 
 ``` 

 ----------------------------------------------------------- 

 ```diff 
	 

	 * configure.in (__builtin_add_overflow): check existence of this 
	   compiler intrinsic which is available in recent gcc and clang, 
	   with expectedly-optimal implementation of overflow detection. 

	 * insns.def (opt_plusplus): new instruction.    This unified 
	   instruction is expected to appear when some local variable is 
	   `+=1`ed.    That was converted to 4 separate instructions before 
	   (load variable, load immediate one, call plus, store variable). 

	 * insns.def (opt_plus): refactor move implementation to share with 
	   opt_plusplus. 

	 * vm_insnhelper.c (opt_plus_func): moved to here. 

	 * compile.c (iseq_peephole_optimize): generate opt_plusplus when 
	   possible. 

 Signed-off-by: Urabe, Shyouhei <shyouhei@ruby-lang.org> 
 --- 
  compile.c         |    46 ++++++++++++++++++++++++ 
  configure.in      |     1 + 
  insns.def         | 109 +++++++++++++++++++++++++++++++++++--------------------- 
  vm_insnhelper.c |    62 ++++++++++++++++++++++++++++++++ 
  4 files changed, 177 insertions(+), 41 deletions(-) 

 diff --git a/compile.c b/compile.c 
 index cc496cb..6d99d0b 100644 
 --- a/compile.c 
 +++ b/compile.c 
 @@ -2254,6 +2254,52 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal 
 	 } 
      } 
 
 +      if (IS_INSN_ID(iobj, getlocal)) { 
 + 	 /* 
 + 	  *    getlocal_OP__WC__0 
 + 	  *    putobject_OP_INT2FIX_O_1_C_ 
 + 	  *    opt_plus 
 + 	  *    setlocal_OP__WC__0 
 + 	  * => 
 + 	  *    opt_plusplus 
 + 	  */ 
 + 	 INSN *nobj = (INSN *)iobj->link.next; 
 + 	 VALUE idx = OPERAND_AT(iobj, 0); 
 + 	 VALUE lev = OPERAND_AT(iobj, 1); 
 + 	 if (IS_INSN((LINK_ELEMENT *)nobj) && 
 + 	     IS_INSN_ID(nobj, putobject)) { 
 + 	     if(OPERAND_AT(nobj, 0) == INT2FIX(1)) { 
 + 		 nobj = (INSN * )nobj->link.next; 
 + 		 if (IS_INSN((LINK_ELEMENT *)nobj) && 
 + 		     IS_INSN_ID(nobj, send)) { 
 + 		     struct rb_call_info *ci = (void *)OPERAND_AT(nobj, 0); 
 + 		     struct rb_call_cache *cc = (void *)OPERAND_AT(nobj, 1); 
 + 		     if ((ci->flag & VM_CALL_ARGS_SIMPLE) && 
 + 			 (ci->orig_argc == 1) && 
 + 			 (ci->mid == idPLUS)) { 
 + 			 nobj = (INSN *)nobj->link.next; 
 + 			 if (IS_INSN((LINK_ELEMENT *)nobj) && 
 + 			     IS_INSN_ID(nobj, setlocal)) { 
 + 			     if ((OPERAND_AT(nobj, 0) == idx) && 
 + 				 (OPERAND_AT(nobj, 1) == lev)) { 
 + 				 INSN *opt_plusplus = new_insn_body( 
 + 				     iseq, iobj->line_no, BIN(opt_plusplus), 
 + 				     4, idx, lev, (VALUE)ci, (VALUE)cc); 
 + 				 nobj = (INSN * )nobj->link.next; 
 + 				 REMOVE_ELEM(nobj->link.prev->prev->prev->prev); 
 + 				 REMOVE_ELEM(nobj->link.prev->prev->prev); 
 + 				 REMOVE_ELEM(nobj->link.prev->prev); 
 + 				 REMOVE_ELEM(nobj->link.prev); 
 + 				 INSERT_ELEM_PREV( 
 + 				     &nobj->link, (LINK_ELEMENT *)opt_plusplus); 
 + 			     } 
 + 			 } 
 + 		     } 
 + 		 } 
 + 	     } 
 + 	 } 
 +      } 
 + 
      if (do_tailcallopt && 
 	 (IS_INSN_ID(iobj, send) || 
 	  IS_INSN_ID(iobj, opt_aref_with) || 
 diff --git a/configure.in b/configure.in 
 index 063d839..e0b7a91 100644 
 --- a/configure.in 
 +++ b/configure.in 
 @@ -2480,6 +2480,7 @@ if test x$rb_cv_builtin___builtin_choose_expr = xyes; then 
      ]) 
  fi 
  RUBY_CHECK_BUILTIN_FUNC(__builtin_types_compatible_p, [__builtin_types_compatible_p(int, int)]) 
 +RUBY_CHECK_BUILTIN_FUNC(__builtin_add_overflow, [__builtin_add_overflow(0, 0, (int*)0)]) 
 
  if test "$ac_cv_func_qsort_r" != no; then 
    AC_CACHE_CHECK(whether qsort_r is GNU version, rb_cv_gnu_qsort_r, 
 diff --git a/insns.def b/insns.def 
 index cdc8287..1438e29 100644 
 --- a/insns.def 
 +++ b/insns.def 
 @@ -1377,47 +1377,7 @@ opt_plus 
  (VALUE recv, VALUE obj) 
  (VALUE val) 
  { 
 -      if (FIXNUM_2_P(recv, obj) && 
 - 	 BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) { 
 - 	 /* fixnum + fixnum */ 
 -#ifndef LONG_LONG_VALUE 
 - 	 val = (recv + (obj & (~1))); 
 - 	 if ((~(recv ^ obj) & (recv ^ val)) & 
 - 	     ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) { 
 - 	     val = rb_big_plus(rb_int2big(FIX2LONG(recv)), 
 - 			       rb_int2big(FIX2LONG(obj))); 
 - 	 } 
 -#else 
 - 	 long a, b, c; 
 - 	 a = FIX2LONG(recv); 
 - 	 b = FIX2LONG(obj); 
 - 	 c = a + b; 
 - 	 val = LONG2NUM(c); 
 -#endif 
 -      } 
 -      else if (FLONUM_2_P(recv, obj) && 
 - 	      BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { 
 - 	 val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); 
 -      } 
 -      else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { 
 - 	 if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && 
 - 	     BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { 
 - 	     val = DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); 
 - 	 } 
 - 	 else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString && 
 - 		  BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) { 
 - 	     val = rb_str_plus(recv, obj); 
 - 	 } 
 - 	 else if (RBASIC_CLASS(recv) == rb_cArray && 
 - 		  BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) { 
 - 	     val = rb_ary_plus(recv, obj); 
 - 	 } 
 - 	 else { 
 - 	     goto INSN_LABEL(normal_dispatch); 
 - 	 } 
 -      } 
 -      else { 
 - 	 INSN_LABEL(normal_dispatch): 
 +      if ((val = opt_plus_func(recv, obj)) == Qundef) { 
 	 PUSH(recv); 
 	 PUSH(obj); 
 	 CALL_SIMPLE_METHOD(recv); 
 @@ -2163,6 +2123,73 @@ opt_call_c_function 
  } 
 
  /** 
 +    @c optimize 
 +    @e unified instruction that does getlocal + putobject + opt_plus + setlocal. 
 + */ 
 +DEFINE_INSN 
 +opt_plusplus 
 +(lindex_t idx, rb_num_t level, CALL_INFO ci, CALL_CACHE cc) 
 +() 
 +() 
 +{ 
 +      int const lv = level; 
 +      VALUE *ep = GET_EP(); 
 +      VALUE *ptr; 
 +      VALUE tmp; 
 +      int i; 
 + 
 +      for (i = 0; i < lv; i++) { 
 + 	 ep = GET_PREV_EP(ep); 
 +      } 
 +      ptr = ep - idx; 
 + 
 +      if (FIXNUM_P(*ptr) && 
 +          BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) { 
 +#if defined LONG_LONG_VALUE 
 +          long a, b; 
 +          a = FIX2LONG(*ptr); 
 +          b = a + 1; 
 +          *ptr = LONG2NUM(b); 
 +#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW 
 +          const signed long one = INT2FIX(1) & ~RUBY_FIXNUM_FLAG; 
 +          signed long ret; 
 +          if (UNLIKELY(__builtin_saddl_overflow((signed long)*ptr, one, &ret))) { 
 +              *ptr = rb_big_plus(rb_int2big(FIX2LONG(*ptr)), 
 +                                 rb_int2big(FIX2LONG(1))); 
 +          } 
 +          else { 
 +              *ptr = (VALUE)ret; 
 +          } 
 +#else 
 +          const VALUE a, b, c; 
 +          a = *ptr; 
 +          b = INT2FIX(1) & ~RUBY_FIXNUM_FLAG; 
 +          c = a + b; 
 +          if ((~(a ^ b) & (a ^ c)) & 
 +              ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) { 
 +              *ptr = rb_big_plus(rb_int2big(FIX2LONG(a)), 
 +                                rb_int2big(FIX2LONG(b))); 
 +          } 
 +          else { 
 +              *ptr = c; 
 +          } 
 +#endif 
 +      } 
 +      else if ((tmp = opt_plus_func(*ptr, INT2FIX(1))) != Qundef) { 
 +          *ptr = tmp; 
 +      } 
 +      else { 
 +          VALUE val; 
 +          VALUE recv = *ptr; 
 + 
 + 	 PUSH(recv); 
 + 	 PUSH(INT2FIX(1)); 
 + 	 CALL_SIMPLE_METHOD(recv); 
 +          *ptr = val; 
 +      } 
 +} 
 + 
 +/** 
    @c joke 
    @e BLT 
    @j BLT 
 diff --git a/vm_insnhelper.c b/vm_insnhelper.c 
 index 691c37a..4ff21c5 100644 
 --- a/vm_insnhelper.c 
 +++ b/vm_insnhelper.c 
 @@ -1209,6 +1209,68 @@ rb_equal_opt(VALUE obj1, VALUE obj2) 
      return opt_eq_func(obj1, obj2, &ci, &cc); 
  } 
 
 +static inline VALUE 
 +fixnum_plus_fixnum(VALUE i, VALUE j) 
 +{ 
 +#if defined LONG_LONG_VALUE 
 +      long a, b, c; 
 +      a = FIX2LONG(i); 
 +      b = FIX2LONG(j); 
 +      c = a + b; 
 +      return LONG2NUM(c); 
 +#elif defined HAVE_BUILTIN___BUILTIN_ADD_OVERFLOW 
 +      signed long ret; 
 +      if (__builtin_saddl_overflow(i, j & ~RUBY_FIXNUM_FLAG, &ret)) { 
 + 	 return rb_big_plus(rb_int2big(FIX2LONG(i)), 
 + 			    rb_int2big(FIX2LONG(j))); 
 +      } 
 +      else { 
 + 	 return (VALUE)ret; 
 +      } 
 +#else 
 +      VALUE val = (i + (j & ~RUBY_FIXNUM_FLAG)); 
 +      if ((~(i ^ j) & (i ^ val)) & 
 + 	 ((VALUE)0x01 << ((sizeof(VALUE) * CHAR_BIT) - 1))) { 
 + 	 val = rb_big_plus(rb_int2big(FIX2LONG(i)), 
 + 			   rb_int2big(FIX2LONG(j))); 
 +      } 
 +      return val; 
 +#endif 
 +} 
 + 
 +static inline VALUE 
 +opt_plus_func(VALUE recv, VALUE obj) 
 +{ 
 +      if (FIXNUM_2_P(recv, obj) && 
 + 	 BASIC_OP_UNREDEFINED_P(BOP_PLUS,INTEGER_REDEFINED_OP_FLAG)) { 
 + 	 return fixnum_plus_fixnum(recv, obj); 
 +      } 
 +      else if (FLONUM_2_P(recv, obj) && 
 + 	      BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { 
 + 	 return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); 
 +      } 
 +      else if (!SPECIAL_CONST_P(recv) && !SPECIAL_CONST_P(obj)) { 
 + 	 if (RBASIC_CLASS(recv) == rb_cFloat && RBASIC_CLASS(obj) == rb_cFloat && 
 + 	     BASIC_OP_UNREDEFINED_P(BOP_PLUS, FLOAT_REDEFINED_OP_FLAG)) { 
 + 	     return DBL2NUM(RFLOAT_VALUE(recv) + RFLOAT_VALUE(obj)); 
 + 	 } 
 + 	 else if (RBASIC_CLASS(recv) == rb_cString && RBASIC_CLASS(obj) == rb_cString && 
 + 		  BASIC_OP_UNREDEFINED_P(BOP_PLUS, STRING_REDEFINED_OP_FLAG)) { 
 + 	     return rb_str_plus(recv, obj); 
 + 	 } 
 + 	 else if (RBASIC_CLASS(recv) == rb_cArray && 
 + 		  BASIC_OP_UNREDEFINED_P(BOP_PLUS, ARRAY_REDEFINED_OP_FLAG)) { 
 + 	     return rb_ary_plus(recv, obj); 
 + 	 } 
 + 	 else { 
 + 	     return Qundef; 
 + 	 } 
 +      } 
 +      else { 
 + 	 return Qundef; 
 +      } 
 +} 
 + 
  static VALUE vm_call0(rb_thread_t*, VALUE, ID, int, const VALUE*, const rb_callable_method_entry_t *); 
 
  static VALUE 
 --  
 2.8.3 

 ``` 

Back