3083 |
3083 |
return enumerator;
|
3084 |
3084 |
}
|
3085 |
3085 |
|
|
3086 |
|
|
3087 |
struct slicebetween_arg {
|
|
3088 |
VALUE pat1;
|
|
3089 |
VALUE pat2;
|
|
3090 |
VALUE pred;
|
|
3091 |
VALUE prev_elt;
|
|
3092 |
VALUE prev_elts;
|
|
3093 |
VALUE yielder;
|
|
3094 |
};
|
|
3095 |
|
|
3096 |
static VALUE
|
|
3097 |
slicebetween_ii(RB_BLOCK_CALL_FUNC_ARGLIST(i, _memo))
|
|
3098 |
{
|
|
3099 |
#define UPDATE_MEMO ((memo = MEMO_FOR(struct slicebetween_arg, _memo)), 1)
|
|
3100 |
struct slicebetween_arg *memo;
|
|
3101 |
int split_p;
|
|
3102 |
UPDATE_MEMO;
|
|
3103 |
|
|
3104 |
ENUM_WANT_SVALUE();
|
|
3105 |
|
|
3106 |
if (NIL_P(memo->prev_elts)) {
|
|
3107 |
/* The first element */
|
|
3108 |
memo->prev_elt = i;
|
|
3109 |
memo->prev_elts = rb_ary_new3(1, i);
|
|
3110 |
}
|
|
3111 |
else {
|
|
3112 |
if (NIL_P(memo->pred)) {
|
|
3113 |
split_p = RTEST(rb_funcall(memo->pat1, id_eqq, 1, memo->prev_elt));
|
|
3114 |
UPDATE_MEMO;
|
|
3115 |
if (split_p) {
|
|
3116 |
split_p = RTEST(rb_funcall(memo->pat2, id_eqq, 1, i));
|
|
3117 |
UPDATE_MEMO;
|
|
3118 |
}
|
|
3119 |
}
|
|
3120 |
else {
|
|
3121 |
split_p = RTEST(rb_funcall(memo->pred, id_call, 2, memo->prev_elt, i));
|
|
3122 |
UPDATE_MEMO;
|
|
3123 |
}
|
|
3124 |
|
|
3125 |
if (split_p) {
|
|
3126 |
rb_funcall(memo->yielder, id_lshift, 1, memo->prev_elts);
|
|
3127 |
UPDATE_MEMO;
|
|
3128 |
memo->prev_elts = rb_ary_new3(1, i);
|
|
3129 |
}
|
|
3130 |
else {
|
|
3131 |
rb_ary_push(memo->prev_elts, i);
|
|
3132 |
}
|
|
3133 |
|
|
3134 |
memo->prev_elt = i;
|
|
3135 |
}
|
|
3136 |
|
|
3137 |
return Qnil;
|
|
3138 |
#undef UPDATE_MEMO
|
|
3139 |
}
|
|
3140 |
|
|
3141 |
static VALUE
|
|
3142 |
slicebetween_i(RB_BLOCK_CALL_FUNC_ARGLIST(yielder, enumerator))
|
|
3143 |
{
|
|
3144 |
VALUE enumerable;
|
|
3145 |
VALUE arg;
|
|
3146 |
struct slicebetween_arg *memo = NEW_MEMO_FOR(struct slicebetween_arg, arg);
|
|
3147 |
|
|
3148 |
enumerable = rb_ivar_get(enumerator, rb_intern("slicebetween_enum"));
|
|
3149 |
memo->pat1 = rb_ivar_get(enumerator, rb_intern("slicebetween_pat1"));
|
|
3150 |
memo->pat2 = rb_ivar_get(enumerator, rb_intern("slicebetween_pat2"));
|
|
3151 |
memo->pred = rb_attr_get(enumerator, rb_intern("slicebetween_pred"));
|
|
3152 |
memo->prev_elt = Qnil;
|
|
3153 |
memo->prev_elts = Qnil;
|
|
3154 |
memo->yielder = yielder;
|
|
3155 |
|
|
3156 |
rb_block_call(enumerable, id_each, 0, 0, slicebetween_ii, arg);
|
|
3157 |
memo = MEMO_FOR(struct slicebetween_arg, arg);
|
|
3158 |
if (!NIL_P(memo->prev_elts))
|
|
3159 |
rb_funcall(memo->yielder, id_lshift, 1, memo->prev_elts);
|
|
3160 |
return Qnil;
|
|
3161 |
}
|
|
3162 |
|
|
3163 |
/*
|
|
3164 |
* call-seq:
|
|
3165 |
* enum.slice_between(pattern_before, pattern_after) -> an_enumerator
|
|
3166 |
* enum.slice_between {|elt_before, elt_after| bool } -> an_enumerator
|
|
3167 |
*
|
|
3168 |
* Creates an enumerator for each chunked elements.
|
|
3169 |
* The beginnings of chunks are defined by _pattern_before_ and _pattern_after_, or the block.
|
|
3170 |
*
|
|
3171 |
* This method split each chunk using adjacent elements, _elt_before_ and _elt_after_,
|
|
3172 |
* in the receiver enumerator.
|
|
3173 |
* If the patterns are given,
|
|
3174 |
* this method split chunks between _elt_before_ and _elt_after_ where
|
|
3175 |
* <code>_pattern_before_ === _elt_before_</code> and
|
|
3176 |
* <code>_pattern_after_ === _elt_after_</code>.
|
|
3177 |
* If the block is given,
|
|
3178 |
* this method split chunks between _elt_before_ and _elt_after_ where
|
|
3179 |
* the block returns true.
|
|
3180 |
*
|
|
3181 |
* For each split opportunity, _pattern_before_ test follows _pattern_after_ test.
|
|
3182 |
* If _pattern_before_ is failed, _pattern_after_ is not tested.
|
|
3183 |
*
|
|
3184 |
* _pattern_before_ is not tested for the last element.
|
|
3185 |
* _pattern_after_ is not tested for the first element.
|
|
3186 |
* The block is called the length of the receiver enumerator minus one.
|
|
3187 |
*
|
|
3188 |
* The result enumerator yields the chunked elements as an array.
|
|
3189 |
* So +each+ method can be called as follows:
|
|
3190 |
*
|
|
3191 |
* enum.slice_between(pattern_before, pattern_after).each { |ary| ... }
|
|
3192 |
* enum.slice_between { |elt_before, elt_after| bool }.each { |ary| ... }
|
|
3193 |
*
|
|
3194 |
* Other methods of the Enumerator class and Enumerable module,
|
|
3195 |
* such as +map+, etc., are also usable.
|
|
3196 |
*
|
|
3197 |
* For example, one-by-one increasing subsequence can be chunked as follows:
|
|
3198 |
*
|
|
3199 |
* a = [1,2,4,9,10,11,12,15,16,19,20,21]
|
|
3200 |
* b = a.slice_between {|i, j| i+1 != j }
|
|
3201 |
* p b.to_a #=> [[1, 2], [4], [9, 10, 11, 12], [15, 16], [19, 20, 21]]
|
|
3202 |
* c = b.map {|a| a.length < 3 ? a : "#{a.first}-#{a.last}" }
|
|
3203 |
* p c #=> [[1, 2], [4], "9-12", [15, 16], "19-21"]
|
|
3204 |
* d = c.join(",")
|
|
3205 |
* p d #=> "1,2,4,9-12,15,16,19-21"
|
|
3206 |
*
|
|
3207 |
* Increasing subsequence can be chunked as follows:
|
|
3208 |
*
|
|
3209 |
* a = [0, 9, 2, 2, 3, 2, 7, 5, 9, 5]
|
|
3210 |
* p a.slice_between {|i, j| i > j }.to_a
|
|
3211 |
* #=> [[0, 9], [2, 2, 3], [2, 7], [5, 9], [5]]
|
|
3212 |
*
|
|
3213 |
* Adjacent evens and odds can be chunked as follows:
|
|
3214 |
* (Enumerable#chunk is another way to do it.)
|
|
3215 |
*
|
|
3216 |
* a = [7, 5, 9, 2, 0, 7, 9, 4, 2, 0]
|
|
3217 |
* p a.slice_between {|i, j| i.even? != j.even? }.to_a
|
|
3218 |
* #=> [[7, 5, 9], [2, 0], [7, 9], [4, 2, 0]]
|
|
3219 |
*
|
|
3220 |
* Paragraphs (non-empty lines with trailing empty lines) can be chunked as follows:
|
|
3221 |
* (See Enumerable#chunk to ignore empty lines.)
|
|
3222 |
*
|
|
3223 |
* lines = ["foo\n", "bar\n", "\n", "baz\n", "qux\n"]
|
|
3224 |
* p lines.slice_between(/\A\s*\z/, /\S/).to_a
|
|
3225 |
* #=> [["foo\n", "bar\n", "\n"], ["baz\n", "qux\n"]]
|
|
3226 |
*
|
|
3227 |
* Mbox contains series of mails which start with Unix From line and end
|
|
3228 |
* with an empty line.
|
|
3229 |
* So each mail can be extracted by slice after an empty line before Unix From line.
|
|
3230 |
*
|
|
3231 |
* # split mails in mbox (slice before Unix From line after an empty line)
|
|
3232 |
* open("mbox") { |f|
|
|
3233 |
* f.slice_between("\n", /\AFrom /).each { |mail|
|
|
3234 |
* mail.pop if mail.last == "\n"
|
|
3235 |
* pp mail
|
|
3236 |
* }
|
|
3237 |
* }
|
|
3238 |
*
|
|
3239 |
*/
|
|
3240 |
static VALUE
|
|
3241 |
enum_slice_between(int argc, VALUE *argv, VALUE enumerable)
|
|
3242 |
{
|
|
3243 |
VALUE enumerator;
|
|
3244 |
VALUE pat1 = Qnil, pat2 = Qnil, pred = Qnil;
|
|
3245 |
|
|
3246 |
if (rb_block_given_p()) {
|
|
3247 |
pred = rb_block_proc();
|
|
3248 |
if (0 < argc)
|
|
3249 |
rb_raise(rb_eArgError, "both pattan and block are given");
|
|
3250 |
}
|
|
3251 |
else {
|
|
3252 |
rb_scan_args(argc, argv, "2", &pat1, &pat2);
|
|
3253 |
}
|
|
3254 |
|
|
3255 |
enumerator = rb_obj_alloc(rb_cEnumerator);
|
|
3256 |
rb_ivar_set(enumerator, rb_intern("slicebetween_enum"), enumerable);
|
|
3257 |
rb_ivar_set(enumerator, rb_intern("slicebetween_pat1"), pat1);
|
|
3258 |
rb_ivar_set(enumerator, rb_intern("slicebetween_pat2"), pat2);
|
|
3259 |
rb_ivar_set(enumerator, rb_intern("slicebetween_pred"), pred);
|
|
3260 |
|
|
3261 |
rb_block_call(enumerator, idInitialize, 0, 0, slicebetween_i, enumerator);
|
|
3262 |
return enumerator;
|
|
3263 |
}
|
|
3264 |
|
3086 |
3265 |
/*
|
3087 |
3266 |
* The <code>Enumerable</code> mixin provides collection classes with
|
3088 |
3267 |
* several traversal and searching methods, and with the ability to
|
... | ... | |
3151 |
3330 |
rb_define_method(rb_mEnumerable, "cycle", enum_cycle, -1);
|
3152 |
3331 |
rb_define_method(rb_mEnumerable, "chunk", enum_chunk, -1);
|
3153 |
3332 |
rb_define_method(rb_mEnumerable, "slice_before", enum_slice_before, -1);
|
|
3333 |
rb_define_method(rb_mEnumerable, "slice_between", enum_slice_between, -1);
|
3154 |
3334 |
|
3155 |
3335 |
id_next = rb_intern("next");
|
3156 |
3336 |
id_call = rb_intern("call");
|