Project

General

Profile

Feature #9826 ยป slice_between.patch

akr (Akira Tanaka), 05/10/2014 11:54 AM

View differences:

enum.c (working copy)
3083 3083
    return enumerator;
3084 3084
}
3085 3085

  
3086

  
3087
struct slicebetween_arg {
3088
    VALUE pat1;
3089
    VALUE pat2;
3090
    VALUE pred;
3091
    VALUE prev_elt;
3092
    VALUE prev_elts;
3093
    VALUE yielder;
3094
};
3095

  
3096
static VALUE
3097
slicebetween_ii(RB_BLOCK_CALL_FUNC_ARGLIST(i, _memo))
3098
{
3099
#define UPDATE_MEMO ((memo = MEMO_FOR(struct slicebetween_arg, _memo)), 1)
3100
    struct slicebetween_arg *memo;
3101
    int split_p;
3102
    UPDATE_MEMO;
3103

  
3104
    ENUM_WANT_SVALUE();
3105

  
3106
    if (NIL_P(memo->prev_elts)) {
3107
        /* The first element */
3108
        memo->prev_elt = i;
3109
        memo->prev_elts = rb_ary_new3(1, i);
3110
    }
3111
    else {
3112
        split_p = 1;
3113
        if (!NIL_P(memo->pat1)) {
3114
            split_p = RTEST(rb_funcall(memo->pat1, id_eqq, 1, memo->prev_elt));
3115
            UPDATE_MEMO;
3116
        }
3117
        if (split_p && !NIL_P(memo->pat2)) {
3118
            split_p = RTEST(rb_funcall(memo->pat2, id_eqq, 1, i));
3119
            UPDATE_MEMO;
3120
        }
3121
        if (split_p && !NIL_P(memo->pred)) {
3122
            split_p = RTEST(rb_funcall(memo->pred, id_call, 2, memo->prev_elt, i));
3123
            UPDATE_MEMO;
3124
        }
3125

  
3126
        if (split_p) {
3127
            rb_funcall(memo->yielder, id_lshift, 1, memo->prev_elts);
3128
            UPDATE_MEMO;
3129
            memo->prev_elts = rb_ary_new3(1, i);
3130
        }
3131
        else {
3132
            rb_ary_push(memo->prev_elts, i);
3133
        }
3134

  
3135
        memo->prev_elt = i;
3136
    }
3137

  
3138
    return Qnil;
3139
#undef UPDATE_MEMO
3140
}
3141

  
3142
static VALUE
3143
slicebetween_i(RB_BLOCK_CALL_FUNC_ARGLIST(yielder, enumerator))
3144
{
3145
    VALUE enumerable;
3146
    VALUE arg;
3147
    struct slicebetween_arg *memo = NEW_MEMO_FOR(struct slicebetween_arg, arg);
3148

  
3149
    enumerable = rb_ivar_get(enumerator, rb_intern("slicebetween_enum"));
3150
    memo->pat1 = rb_ivar_get(enumerator, rb_intern("slicebetween_pat1"));
3151
    memo->pat2 = rb_ivar_get(enumerator, rb_intern("slicebetween_pat2"));
3152
    memo->pred = rb_attr_get(enumerator, rb_intern("slicebetween_pred"));
3153
    memo->prev_elt = Qnil;
3154
    memo->prev_elts = Qnil;
3155
    memo->yielder = yielder;
3156

  
3157
    rb_block_call(enumerable, id_each, 0, 0, slicebetween_ii, arg);
3158
    memo = MEMO_FOR(struct slicebetween_arg, arg);
3159
    if (!NIL_P(memo->prev_elts))
3160
        rb_funcall(memo->yielder, id_lshift, 1, memo->prev_elts);
3161
    return Qnil;
3162
}
3163

  
3164
/*
3165
 *  call-seq:
3166
 *     enum.slice_between(pattern_before, pattern_after=nil) -> an_enumerator
3167
 *     enum.slice_between {|elt_before, elt_after| bool }        -> an_enumerator
3168
 *
3169
 *  Creates an enumerator for each chunked elements.
3170
 *  The beginnings of chunks are defined by _pattern_before_ and _pattern_after_, or the block.
3171
 *
3172
 *  One of _pattern_before_ and _pattern_after_ should be non-nil.
3173
 *  ArgumentError is raised otherwise.
3174
 *
3175
 *  This method split each chunk using adjacent elements, _elt_before_ and _elt_after_,
3176
 *  in the receiver enumerator.
3177
 *  If the patterns are given,
3178
 *  this method split chunks between _elt_before_ and _elt_after_ where
3179
 *  <code>_pattern_before_ === _elt_before_</code> and
3180
 *  <code>_pattern_after_ === _elt_after_</code>.
3181
 *  (Prguments not given are ignored to this test.
3182
 *  If the block is given.
3183
 *  this method split chunks between _elt_before_ and _elt_after_ where
3184
 *  the block returns true.
3185
 *
3186
 *  For each split opportunity, _pattern_before_ test follows _pattern_after_ test.
3187
 *  If _pattern_before_ is failed, _pattern_after_ is not tested.
3188
 *
3189
 *  _pattern_before_ is not tested for the last element.
3190
 *  _pattern_after_ is not tested for the first element.
3191
 *  The block is called the length of the receiver enumerator minus one.
3192
 *
3193
 *  The result enumerator yields the chunked elements as an array.
3194
 *  So +each+ method can be called as follows:
3195
 *
3196
 *    enum.slice_between(pattern_before, pattern_after).each { |ary| ... }
3197
 *    enum.slice_between { |elt_before, elt_after| bool }.each { |ary| ... }
3198
 *
3199
 *  Other methods of the Enumerator class and Enumerable module,
3200
 *  such as map, etc., are also usable.
3201
 *
3202
 *  For example, one-by-one increasing subsequence can be chunked as follows:
3203
 *
3204
 *    a = [1,2,4,9,10,11,12,15,16,19,20,21]
3205
 *    b = a.slice_between {|i, j| i+1 != j }
3206
 *    p b.to_a #=> [[1, 2], [4], [9, 10, 11, 12], [15, 16], [19, 20, 21]]
3207
 *    c = b.map {|a| a.length < 3 ? a : "#{a.first}-#{a.last}" }
3208
 *    p c #=> [[1, 2], [4], "9-12", [15, 16], "19-21"]
3209
 *    d = c.join(",")
3210
 *    p d #=> "1,2,4,9-12,15,16,19-21"
3211
 *
3212
 *  Increasing subsequence can be chunked as follows:
3213
 *
3214
 *    a = [0, 9, 2, 2, 3, 2, 7, 5, 9, 5]
3215
 *    p a.slice_between {|i, j| i > j }.to_a
3216
 *    #=> [[0, 9], [2, 2, 3], [2, 7], [5, 9], [5]]
3217
 *
3218
 *  Adjacent evens and odds can be chunked as follows:
3219
 *  (Enumerable#chunk is another way to do it.)
3220
 *
3221
 *    a = [7, 5, 9, 2, 0, 7, 9, 4, 2, 0]
3222
 *    p a.slice_between {|i, j| i.even? != j.even? }.to_a
3223
 *    #=> [[7, 5, 9], [2, 0], [7, 9], [4, 2, 0]]
3224
 *
3225
 *  Mbox contains series of mails which start with Unix From line and end
3226
 *  with an empty line.
3227
 *  So each mail can be extracted by slice after an empty line before Unix From line.
3228
 *
3229
 *    # split mails in mbox (slice before Unix From line after an empty line)
3230
 *    open("mbox") { |f|
3231
 *      f.slice_between("\n", /\AFrom /).each { |mail|
3232
 *        mail.pop if mail.last == "\n"
3233
 *        pp mail
3234
 *      }
3235
 *    }
3236
 *
3237
 */
3238
static VALUE
3239
enum_slice_between(int argc, VALUE *argv, VALUE enumerable)
3240
{
3241
    VALUE enumerator;
3242
    VALUE pat1, pat2, pred = Qnil;
3243

  
3244
    rb_scan_args(argc, argv, "02", &pat1, &pat2);
3245
    if (rb_block_given_p()) {
3246
        pred = rb_block_proc();
3247
    }
3248

  
3249
    if (NIL_P(pat1) && NIL_P(pat2) && NIL_P(pred)) {
3250
	rb_raise(rb_eArgError, "no pattan/block given");
3251
    }
3252

  
3253
    if ((!NIL_P(pat1) || !NIL_P(pat2)) && !NIL_P(pred)) {
3254
	rb_raise(rb_eArgError, "both pattan and block are given");
3255
    }
3256

  
3257
    enumerator = rb_obj_alloc(rb_cEnumerator);
3258
    rb_ivar_set(enumerator, rb_intern("slicebetween_pat1"), pat1);
3259
    rb_ivar_set(enumerator, rb_intern("slicebetween_pat2"), pat2);
3260
    rb_ivar_set(enumerator, rb_intern("slicebetween_pred"), pred);
3261
    rb_ivar_set(enumerator, rb_intern("slicebetween_enum"), enumerable);
3262

  
3263
    rb_block_call(enumerator, idInitialize, 0, 0, slicebetween_i, enumerator);
3264
    return enumerator;
3265
}
3266

  
3086 3267
/*
3087 3268
 *  The <code>Enumerable</code> mixin provides collection classes with
3088 3269
 *  several traversal and searching methods, and with the ability to
......
3151 3332
    rb_define_method(rb_mEnumerable, "cycle", enum_cycle, -1);
3152 3333
    rb_define_method(rb_mEnumerable, "chunk", enum_chunk, -1);
3153 3334
    rb_define_method(rb_mEnumerable, "slice_before", enum_slice_before, -1);
3335
    rb_define_method(rb_mEnumerable, "slice_between", enum_slice_between, -1);
3154 3336

  
3155 3337
    id_next = rb_intern("next");
3156 3338
    id_call = rb_intern("call");
enumerator.c (working copy)
2036 2036
    rb_define_method(rb_cLazy, "lazy", lazy_lazy, 0);
2037 2037
    rb_define_method(rb_cLazy, "chunk", lazy_super, -1);
2038 2038
    rb_define_method(rb_cLazy, "slice_before", lazy_super, -1);
2039
    rb_define_method(rb_cLazy, "slice_between", lazy_super, -1);
2039 2040

  
2040 2041
    rb_define_alias(rb_cLazy, "force", "to_a");
2041 2042

  
test/ruby/test_enum.rb (working copy)
531 531
    assert_not_warn{ss.slice_before(/\A...\z/).to_a}
532 532
  end
533 533

  
534
  def test_slice_between0
535
    assert_raise(ArgumentError) { [].slice_between }
536
  end
537

  
538
  def test_slice_between1
539
    e = [].slice_between {|a, b| flunk "should not be called" }
540
    assert_equal([], e.to_a)
541

  
542
    e = [1,2].slice_between(1)
543
    assert_equal([[1], [2]], e.to_a)
544

  
545
    e = [1,2].slice_between(3)
546
    assert_equal([[1, 2]], e.to_a)
547

  
548
    e = [1,2].slice_between(nil, 2)
549
    assert_equal([[1], [2]], e.to_a)
550

  
551
    e = [1,2].slice_between(nil, 3)
552
    assert_equal([[1, 2]], e.to_a)
553

  
554
    e = [1,2].slice_between {|a,b| true }
555
    assert_equal([[1], [2]], e.to_a)
556

  
557
    e = [1,2].slice_between {|a,b| false }
558
    assert_equal([[1, 2]], e.to_a)
559
  end
560

  
561
  def test_slice_between2
562
    e = [1,2].slice_between(1, 2)
563
    assert_equal([[1], [2]], e.to_a)
564

  
565
    e = [1,2].slice_between(3, 2)
566
    assert_equal([[1, 2]], e.to_a)
567

  
568
    e = [1,2].slice_between(1, 3)
569
    assert_equal([[1, 2]], e.to_a)
570
  end
571

  
572
  def test_slice_between_both_pattern_and_block
573
    assert_raise(ArgumentError) { [].slice_between(1, 2) {|a, b| true } }
574
    assert_raise(ArgumentError) { [].slice_between(1) {|a, b| true } }
575
    assert_raise(ArgumentError) { [].slice_between(nil, 2) {|a, b| true } }
576
  end
577

  
578
  def test_slice_between_contiguously_increasing_integers
579
    e = [1,4,9,10,11,12,15,16,19,20,21].slice_between {|i, j| i+1 != j }
580
    assert_equal([[1], [4], [9,10,11,12], [15,16], [19,20,21]], e.to_a)
581
  end
582

  
583
  def test_slice_between_mails
584
    mail1 = ["From foo\n",
585
             "\n",
586
             "Body start\n",
587
             "From x to b, baz\n",
588
             "Body end\n",
589
             "\n"]
590
    mail2 = ["From foo\n",
591
             "\n",
592
             "Body start\n",
593
             "Another text\n",
594
             "Body end\n",
595
             "\n"]
596
    e = (mail1 + mail2).slice_between("\n", /\AFrom /)
597
    assert_equal([mail1, mail2], e.to_a)
598
  end
599

  
534 600
  def test_detect
535 601
    @obj = ('a'..'z')
536 602
    assert_equal('c', @obj.detect {|x| x == 'c' })
test/ruby/test_lazy_enumerator.rb (working copy)
470 470
    bug7507 = '[ruby-core:51510]'
471 471
    {
472 472
      slice_before: //,
473
      slice_between: //,
473 474
      with_index: nil,
474 475
      cycle: nil,
475 476
      each_with_object: 42,