Project

General

Profile

Feature #9826 ยป slice_between3.patch

akr (Akira Tanaka), 05/18/2014 04:06 AM

View differences:

enum.c (working copy)
3207 3207
    return enumerator;
3208 3208
}
3209 3209

  
3210
struct slicebetween_arg {
3211
    VALUE pat1;
3212
    VALUE pat2;
3213
    VALUE pred;
3214
    VALUE prev_elt;
3215
    VALUE prev_elts;
3216
    VALUE yielder;
3217
};
3218

  
3219
static VALUE
3220
slicebetween_ii(RB_BLOCK_CALL_FUNC_ARGLIST(i, _memo))
3221
{
3222
#define UPDATE_MEMO ((void)(memo = MEMO_FOR(struct slicebetween_arg, _memo)))
3223
    struct slicebetween_arg *memo;
3224
    int split_p;
3225
    UPDATE_MEMO;
3226

  
3227
    ENUM_WANT_SVALUE();
3228

  
3229
    if (NIL_P(memo->prev_elts)) {
3230
        /* The first element */
3231
        memo->prev_elt = i;
3232
        memo->prev_elts = rb_ary_new3(1, i);
3233
    }
3234
    else {
3235
        if (NIL_P(memo->pred)) {
3236
            split_p = RTEST(rb_funcall(memo->pat1, id_eqq, 1, memo->prev_elt));
3237
            UPDATE_MEMO;
3238
            if (split_p) {
3239
                split_p = RTEST(rb_funcall(memo->pat2, id_eqq, 1, i));
3240
                UPDATE_MEMO;
3241
            }
3242
        }
3243
        else {
3244
            split_p = RTEST(rb_funcall(memo->pred, id_call, 2, memo->prev_elt, i));
3245
            UPDATE_MEMO;
3246
        }
3247

  
3248
        if (split_p) {
3249
            rb_funcall(memo->yielder, id_lshift, 1, memo->prev_elts);
3250
            UPDATE_MEMO;
3251
            memo->prev_elts = rb_ary_new3(1, i);
3252
        }
3253
        else {
3254
            rb_ary_push(memo->prev_elts, i);
3255
        }
3256

  
3257
        memo->prev_elt = i;
3258
    }
3259

  
3260
    return Qnil;
3261
#undef UPDATE_MEMO
3262
}
3263

  
3264
static VALUE
3265
slicebetween_i(RB_BLOCK_CALL_FUNC_ARGLIST(yielder, enumerator))
3266
{
3267
    VALUE enumerable;
3268
    VALUE arg;
3269
    struct slicebetween_arg *memo = NEW_MEMO_FOR(struct slicebetween_arg, arg);
3270

  
3271
    enumerable = rb_ivar_get(enumerator, rb_intern("slicebetween_enum"));
3272
    memo->pat1 = rb_ivar_get(enumerator, rb_intern("slicebetween_pat1"));
3273
    memo->pat2 = rb_ivar_get(enumerator, rb_intern("slicebetween_pat2"));
3274
    memo->pred = rb_attr_get(enumerator, rb_intern("slicebetween_pred"));
3275
    memo->prev_elt = Qnil;
3276
    memo->prev_elts = Qnil;
3277
    memo->yielder = yielder;
3278

  
3279
    rb_block_call(enumerable, id_each, 0, 0, slicebetween_ii, arg);
3280
    memo = MEMO_FOR(struct slicebetween_arg, arg);
3281
    if (!NIL_P(memo->prev_elts))
3282
        rb_funcall(memo->yielder, id_lshift, 1, memo->prev_elts);
3283
    return Qnil;
3284
}
3285

  
3286
/*
3287
 *  call-seq:
3288
 *     enum.slice_between(pattern_before, pattern_after)  -> an_enumerator
3289
 *     enum.slice_between {|elt_before, elt_after| bool } -> an_enumerator
3290
 *
3291
 *  Creates an enumerator for each chunked elements.
3292
 *  The beginnings of chunks are defined by _pattern_before_ and _pattern_after_, or the block.
3293
 *
3294
 *  This method split each chunk using adjacent elements, _elt_before_ and _elt_after_,
3295
 *  in the receiver enumerator.
3296
 *  If the patterns are given,
3297
 *  this method split chunks between _elt_before_ and _elt_after_ where
3298
 *  <code>_pattern_before_ === _elt_before_</code> and
3299
 *  <code>_pattern_after_ === _elt_after_</code>.
3300
 *  If the block is given,
3301
 *  this method split chunks between _elt_before_ and _elt_after_ where
3302
 *  the block returns true.
3303
 *
3304
 *  For each split opportunity, _pattern_before_ test follows _pattern_after_ test.
3305
 *  If _pattern_before_ is failed, _pattern_after_ is not tested.
3306
 *
3307
 *  _pattern_before_ is not tested for the last element.
3308
 *  _pattern_after_ is not tested for the first element.
3309
 *  The block is called the length of the receiver enumerator minus one.
3310
 *
3311
 *  The result enumerator yields the chunked elements as an array.
3312
 *  So +each+ method can be called as follows:
3313
 *
3314
 *    enum.slice_between(pattern_before, pattern_after).each { |ary| ... }
3315
 *    enum.slice_between { |elt_before, elt_after| bool }.each { |ary| ... }
3316
 *
3317
 *  Other methods of the Enumerator class and Enumerable module,
3318
 *  such as +map+, etc., are also usable.
3319
 *
3320
 *  For example, one-by-one increasing subsequence can be chunked as follows:
3321
 *
3322
 *    a = [1,2,4,9,10,11,12,15,16,19,20,21]
3323
 *    b = a.slice_between {|i, j| i+1 != j }
3324
 *    p b.to_a #=> [[1, 2], [4], [9, 10, 11, 12], [15, 16], [19, 20, 21]]
3325
 *    c = b.map {|a| a.length < 3 ? a : "#{a.first}-#{a.last}" }
3326
 *    p c #=> [[1, 2], [4], "9-12", [15, 16], "19-21"]
3327
 *    d = c.join(",")
3328
 *    p d #=> "1,2,4,9-12,15,16,19-21"
3329
 *
3330
 *  Increasing subsequence can be chunked as follows:
3331
 *
3332
 *    a = [0, 9, 2, 2, 3, 2, 7, 5, 9, 5]
3333
 *    p a.slice_between {|i, j| i > j }.to_a
3334
 *    #=> [[0, 9], [2, 2, 3], [2, 7], [5, 9], [5]]
3335
 *
3336
 *  Adjacent evens and odds can be chunked as follows:
3337
 *  (Enumerable#chunk is another way to do it.)
3338
 *
3339
 *    a = [7, 5, 9, 2, 0, 7, 9, 4, 2, 0]
3340
 *    p a.slice_between {|i, j| i.even? != j.even? }.to_a
3341
 *    #=> [[7, 5, 9], [2, 0], [7, 9], [4, 2, 0]]
3342
 *
3343
 *  Paragraphs (non-empty lines with trailing empty lines) can be chunked as follows:
3344
 *  (See Enumerable#chunk to ignore empty lines.)
3345
 *
3346
 *    lines = ["foo\n", "bar\n", "\n", "baz\n", "qux\n"]
3347
 *    p lines.slice_between(/\A\s*\z/, /\S/).to_a
3348
 *    #=> [["foo\n", "bar\n", "\n"], ["baz\n", "qux\n"]]
3349
 *
3350
 *  Mbox contains series of mails which start with Unix From line and end
3351
 *  with an empty line.
3352
 *  So each mail can be extracted by slice after an empty line before Unix From line.
3353
 *
3354
 *    # split mails in mbox (slice before Unix From line after an empty line)
3355
 *    open("mbox") { |f|
3356
 *      f.slice_between("\n", /\AFrom /).each { |mail|
3357
 *        mail.pop if mail.last == "\n"
3358
 *        pp mail
3359
 *      }
3360
 *    }
3361
 *
3362
 */
3363
static VALUE
3364
enum_slice_between(int argc, VALUE *argv, VALUE enumerable)
3365
{
3366
    VALUE enumerator;
3367
    VALUE pat1 = Qnil, pat2 = Qnil, pred = Qnil;
3368

  
3369
    if (rb_block_given_p()) {
3370
        pred = rb_block_proc();
3371
        if (0 < argc)
3372
            rb_raise(rb_eArgError, "both pattan and block are given");
3373
    }
3374
    else {
3375
        rb_scan_args(argc, argv, "2", &pat1, &pat2);
3376
    }
3377

  
3378
    enumerator = rb_obj_alloc(rb_cEnumerator);
3379
    rb_ivar_set(enumerator, rb_intern("slicebetween_enum"), enumerable);
3380
    rb_ivar_set(enumerator, rb_intern("slicebetween_pat1"), pat1);
3381
    rb_ivar_set(enumerator, rb_intern("slicebetween_pat2"), pat2);
3382
    rb_ivar_set(enumerator, rb_intern("slicebetween_pred"), pred);
3383

  
3384
    rb_block_call(enumerator, idInitialize, 0, 0, slicebetween_i, enumerator);
3385
    return enumerator;
3386
}
3387

  
3388

  
3389

  
3210 3390
/*
3211 3391
 *  The <code>Enumerable</code> mixin provides collection classes with
3212 3392
 *  several traversal and searching methods, and with the ability to
......
3276 3456
    rb_define_method(rb_mEnumerable, "chunk", enum_chunk, -1);
3277 3457
    rb_define_method(rb_mEnumerable, "slice_before", enum_slice_before, -1);
3278 3458
    rb_define_method(rb_mEnumerable, "slice_after", enum_slice_after, -1);
3459
    rb_define_method(rb_mEnumerable, "slice_between", enum_slice_between, -1);
3279 3460

  
3280 3461
    id_next = rb_intern("next");
3281 3462
    id_call = rb_intern("call");
enumerator.c (working copy)
2037 2037
    rb_define_method(rb_cLazy, "chunk", lazy_super, -1);
2038 2038
    rb_define_method(rb_cLazy, "slice_before", lazy_super, -1);
2039 2039
    rb_define_method(rb_cLazy, "slice_after", lazy_super, -1);
2040
    rb_define_method(rb_cLazy, "slice_between", lazy_super, -1);
2040 2041

  
2041 2042
    rb_define_alias(rb_cLazy, "force", "to_a");
2042 2043

  
test/ruby/test_enum.rb (working copy)
574 574
    assert_equal([["foo", ""], ["bar"]], e.to_a)
575 575
  end
576 576

  
577
  def test_slice_between1
578
    e = [].slice_between {|a, b| flunk "should not be called" }
579
    assert_equal([], e.to_a)
580

  
581
    e = [1,2].slice_between {|a,b| true }
582
    assert_equal([[1], [2]], e.to_a)
583

  
584
    e = [1,2].slice_between {|a,b| false }
585
    assert_equal([[1, 2]], e.to_a)
586

  
587
  end
588

  
589
  def test_slice_between2
590
    e = [1,2].slice_between(1, 2)
591
    assert_equal([[1], [2]], e.to_a)
592

  
593
    e = [1,2].slice_between(3, 2)
594
    assert_equal([[1, 2]], e.to_a)
595

  
596
    e = [1,2].slice_between(1, 3)
597
    assert_equal([[1, 2]], e.to_a)
598
  end
599

  
600
  def test_slice_between_argerror
601
    assert_raise(ArgumentError) { [].slice_between }
602
    assert_raise(ArgumentError) { [].slice_between(1) }
603
    assert_raise(ArgumentError) { [].slice_between(1) {|a, b| true } }
604
    assert_raise(ArgumentError) { [].slice_between(1, 2) {|a, b| true } }
605
    assert_raise(ArgumentError) { [].slice_between(nil, 2) {|a, b| true } }
606
  end
607

  
608
  def test_slice_between_contiguously_increasing_integers
609
    e = [1,4,9,10,11,12,15,16,19,20,21].slice_between {|i, j| i+1 != j }
610
    assert_equal([[1], [4], [9,10,11,12], [15,16], [19,20,21]], e.to_a)
611
  end
612

  
613
  def test_slice_between_mails
614
    mail1 = ["From foo\n",
615
             "\n",
616
             "Body start\n",
617
             "From x to b, baz\n",
618
             "Body end\n",
619
             "\n"]
620
    mail2 = ["From foo\n",
621
             "\n",
622
             "Body start\n",
623
             "Another text\n",
624
             "Body end\n",
625
             "\n"]
626
    e = (mail1 + mail2).slice_between("\n", /\AFrom /)
627
    assert_equal([mail1, mail2], e.to_a)
628
  end
629

  
577 630
  def test_detect
578 631
    @obj = ('a'..'z')
579 632
    assert_equal('c', @obj.detect {|x| x == 'c' })
test/ruby/test_lazy_enumerator.rb (working copy)
471 471
    {
472 472
      slice_before: //,
473 473
      slice_after: //,
474
      slice_between: [//, //],
474 475
      with_index: nil,
475 476
      cycle: nil,
476 477
      each_with_object: 42,