Feature #7368 » patch3.diff
| string.c | ||
|---|---|---|
|
rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
|
||
|
{
|
||
|
rb_encoding *enc;
|
||
|
VALUE rs;
|
||
|
unsigned int newline;
|
||
|
const char *p, *pend, *s, *ptr;
|
||
|
long len, rslen;
|
||
|
VALUE line;
|
||
|
int n;
|
||
|
VALUE orig = str;
|
||
|
VALUE line, rs, orig = str;
|
||
|
const char *ptr, *pend, *subptr, *subend, *rsptr, *adjusted;
|
||
|
long pos, len, rslen;
|
||
|
int paragraph_mode = 0;
|
||
|
VALUE UNINITIALIZED_VAR(ary);
|
||
|
if (argc == 0) {
|
||
|
if (argc == 0)
|
||
|
rs = rb_rs;
|
||
|
}
|
||
|
else {
|
||
|
else
|
||
|
rb_scan_args(argc, argv, "01", &rs);
|
||
|
}
|
||
|
if (rb_block_given_p()) {
|
||
|
if (wantarray) {
|
||
| ... | ... | |
|
return orig;
|
||
|
}
|
||
|
}
|
||
|
str = rb_str_new4(str);
|
||
|
ptr = p = s = RSTRING_PTR(str);
|
||
|
pend = p + RSTRING_LEN(str);
|
||
|
ptr = subptr = RSTRING_PTR(str);
|
||
|
pend = RSTRING_END(str);
|
||
|
len = RSTRING_LEN(str);
|
||
|
StringValue(rs);
|
||
|
rslen = RSTRING_LEN(rs);
|
||
|
if (rs == rb_default_rs) {
|
||
|
enc = rb_enc_get(str);
|
||
|
while (p < pend) {
|
||
|
char *p0;
|
||
|
p = memchr(p, '\n', pend - p);
|
||
|
if (!p) break;
|
||
|
p0 = rb_enc_left_char_head(s, p, pend, enc);
|
||
|
if (!rb_enc_is_newline(p0, pend, enc)) {
|
||
|
p++;
|
||
|
continue;
|
||
|
}
|
||
|
p = p0 + rb_enc_mbclen(p0, pend, enc);
|
||
|
line = rb_str_subseq(str, s - ptr, p - s);
|
||
|
if (wantarray)
|
||
|
rb_ary_push(ary, line);
|
||
|
else
|
||
|
rb_yield(line);
|
||
|
str_mod_check(str, ptr, len);
|
||
|
s = p;
|
||
|
}
|
||
|
goto finish;
|
||
|
}
|
||
|
else {
|
||
|
enc = rb_enc_check(str, rs);
|
||
|
}
|
||
|
enc = rb_enc_check(str, rs);
|
||
|
rslen = RSTRING_LEN(rs);
|
||
|
if (rslen == 0) {
|
||
|
newline = '\n';
|
||
|
rsptr = "\n\n";
|
||
|
rslen = 2;
|
||
|
paragraph_mode = 1;
|
||
|
}
|
||
|
else {
|
||
|
newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
|
||
|
rsptr = RSTRING_PTR(rs);
|
||
|
}
|
||
|
while (p < pend) {
|
||
|
unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc);
|
||
|
if ((rs == rb_default_rs || paragraph_mode) && !rb_enc_asciicompat(enc)) {
|
||
|
rs = rb_str_new(rsptr, rslen);
|
||
|
rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil);
|
||
|
rsptr = RSTRING_PTR(rs);
|
||
|
rslen = RSTRING_LEN(rs);
|
||
|
}
|
||
|
again:
|
||
|
if (rslen == 0 && c == newline) {
|
||
|
p += n;
|
||
|
if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) {
|
||
|
goto again;
|
||
|
}
|
||
|
while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) {
|
||
|
p += n;
|
||
|
}
|
||
|
p -= n;
|
||
|
while (subptr < pend) {
|
||
|
pos = rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
|
||
|
if (pos == -1) break;
|
||
|
subend = subptr + pos;
|
||
|
adjusted = rb_enc_right_char_head(subptr, subend, pend, enc);
|
||
|
if (subend != adjusted) {
|
||
|
subptr = adjusted;
|
||
|
continue;
|
||
|
}
|
||
|
if (c == newline &&
|
||
|
(rslen <= 1 ||
|
||
|
(pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) {
|
||
|
const char *pp = p + (rslen ? rslen : n);
|
||
|
line = rb_str_subseq(str, s - ptr, pp - s);
|
||
|
if (wantarray)
|
||
|
rb_ary_push(ary, line);
|
||
|
else
|
||
|
rb_yield(line);
|
||
|
str_mod_check(str, ptr, len);
|
||
|
s = pp;
|
||
|
subend += rb_enc_mbclen(subend, pend, enc);
|
||
|
if (paragraph_mode) {
|
||
|
while (subend < pend && rb_enc_is_newline(subend, pend, enc)) {
|
||
|
subend += rb_enc_mbclen(subend, pend, enc);
|
||
|
}
|
||
|
}
|
||
|
p += n;
|
||
|
line = rb_str_subseq(str, subptr - ptr, subend - subptr);
|
||
|
if (wantarray)
|
||
|
rb_ary_push(ary, line);
|
||
|
else
|
||
|
rb_yield(line);
|
||
|
str_mod_check(str, ptr, len);
|
||
|
subptr = subend;
|
||
|
}
|
||
|
finish:
|
||
|
if (s != pend) {
|
||
|
line = rb_str_subseq(str, s - ptr, pend - s);
|
||
|
if (subptr != pend) {
|
||
|
line = rb_str_subseq(str, subptr - ptr, pend - subptr);
|
||
|
if (wantarray)
|
||
|
rb_ary_push(ary, line);
|
||
|
else
|
||
- « Previous
- 1
- 2
- 3
- Next »