Feature #7368 » patch2.diff
| string.c | ||
|---|---|---|
|
return rb_str_split_m(1, &sep, str);
|
||
|
}
|
||
|
static VALUE rb_str_valid_encoding_p(VALUE str);
|
||
|
static void
|
||
|
line_yield(VALUE str, const char *sub, const char *subend)
|
||
|
{
|
||
|
long len = RSTRING_LEN(str);
|
||
|
const char *ptr = RSTRING_PTR(str);
|
||
|
VALUE line = rb_str_new5(str, sub, subend - sub);
|
||
|
OBJ_INFECT(line, str);
|
||
|
rb_enc_cr_str_copy_for_substr(line, str);
|
||
|
rb_yield(line);
|
||
|
str_mod_check(str, ptr, len);
|
||
|
return;
|
||
|
}
|
||
|
static void
|
||
|
str_each_line_valid(VALUE str, VALUE rs, unsigned int newline, rb_encoding *enc)
|
||
|
{
|
||
|
int n, rspara = 0;
|
||
|
long pos, rslen;
|
||
|
const char *pend, *sub, *subend, *search_start, *hit=NULL;
|
||
|
const char *adjusted, *rsptr;
|
||
|
search_start = sub = subend = RSTRING_PTR(str);
|
||
|
pend = RSTRING_END(str);
|
||
|
rslen = RSTRING_LEN(rs);
|
||
|
if (rslen == 0) {
|
||
|
rspara = 1;
|
||
|
rs = rb_usascii_str_new("\n\n", 2);
|
||
|
if (!rb_enc_asciicompat(enc))
|
||
|
rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil);
|
||
|
rslen = RSTRING_LEN(rs);
|
||
|
}
|
||
|
rsptr = RSTRING_PTR(rs);
|
||
|
while (search_start < pend) {
|
||
|
pos = rb_memsearch(rsptr, rslen, search_start, pend - search_start, enc);
|
||
|
if (pos < 0) break;
|
||
|
hit = search_start + pos;
|
||
|
adjusted = rb_enc_right_char_head(sub, hit, pend, enc);
|
||
|
if (hit == adjusted) {
|
||
|
subend = hit + rslen;
|
||
|
if (rspara) {
|
||
|
rb_enc_codepoint_len(subend, pend, &n, enc);
|
||
|
while (subend < pend && rb_enc_codepoint(subend, pend, enc) == newline)
|
||
|
subend += n;
|
||
|
}
|
||
|
line_yield(str, sub, subend);
|
||
|
search_start = sub = subend;
|
||
|
}
|
||
|
else {
|
||
|
search_start = adjusted;
|
||
|
}
|
||
|
}
|
||
|
if (subend < pend) line_yield(str, subend, pend);
|
||
|
return;
|
||
|
}
|
||
|
static void
|
||
|
str_each_line_invalid(VALUE str, VALUE rs, unsigned int newline, rb_encoding *enc)
|
||
|
{
|
||
|
int n;
|
||
|
long rslen;
|
||
|
const char *sub, *subend, *pend, *rsptr;
|
||
|
sub = subend = RSTRING_PTR(str);
|
||
|
pend = RSTRING_END(str);
|
||
|
rsptr = RSTRING_PTR(rs);
|
||
|
rslen = RSTRING_LEN(rs);
|
||
|
while (sub < pend) {
|
||
|
unsigned int c = rb_enc_codepoint_len(sub, pend, &n, enc);
|
||
|
again:
|
||
|
if (rslen == 0 && c == newline) {
|
||
|
subend += n;
|
||
|
if (subend < pend && (c = rb_enc_codepoint_len(subend, pend, &n, enc)) != newline) {
|
||
|
goto again;
|
||
|
}
|
||
|
while (subend < pend && rb_enc_codepoint(subend, pend, enc) == newline) {
|
||
|
subend += n;
|
||
|
}
|
||
|
subend -= n;
|
||
|
}
|
||
|
if (c == newline &&
|
||
|
(rslen <= 1 ||
|
||
|
(pend - subend >= rslen && memcmp(rsptr, subend, rslen) == 0))) {
|
||
|
subend += rslen ? rslen : n;
|
||
|
line_yield(str, sub, subend);
|
||
|
sub = subend;
|
||
|
}
|
||
|
subend += n;
|
||
|
}
|
||
|
if (subend < pend) line_yield(str, subend, pend);
|
||
|
return;
|
||
|
}
|
||
|
/*
|
||
|
* call-seq:
|
||
| ... | ... | |
|
rb_encoding *enc;
|
||
|
VALUE rs;
|
||
|
unsigned int newline;
|
||
|
const char *p, *pend, *s, *ptr;
|
||
|
long len, rslen;
|
||
|
VALUE line;
|
||
|
int n;
|
||
|
VALUE orig = str;
|
||
|
if (argc == 0) {
|
||
|
if (argc == 0)
|
||
|
rs = rb_rs;
|
||
|
}
|
||
|
else {
|
||
|
else
|
||
|
rb_scan_args(argc, argv, "01", &rs);
|
||
|
}
|
||
|
RETURN_ENUMERATOR(str, argc, argv);
|
||
|
if (NIL_P(rs)) {
|
||
|
rb_yield(str);
|
||
|
return orig;
|
||
|
}
|
||
|
str = rb_str_new4(str);
|
||
|
ptr = p = s = RSTRING_PTR(str);
|
||
|
pend = p + RSTRING_LEN(str);
|
||
|
len = RSTRING_LEN(str);
|
||
|
StringValue(rs);
|
||
|
if (rs == rb_default_rs) {
|
||
|
enc = rb_enc_get(str);
|
||
|
while (p < pend) {
|
||
|
char *p0;
|
||
|
p = memchr(p, '\n', pend - p);
|
||
|
if (!p) break;
|
||
|
p0 = rb_enc_left_char_head(s, p, pend, enc);
|
||
|
if (!rb_enc_is_newline(p0, pend, enc)) {
|
||
|
p++;
|
||
|
continue;
|
||
|
}
|
||
|
p = p0 + rb_enc_mbclen(p0, pend, enc);
|
||
|
line = rb_str_new5(str, s, p - s);
|
||
|
OBJ_INFECT(line, str);
|
||
|
rb_enc_cr_str_copy_for_substr(line, str);
|
||
|
rb_yield(line);
|
||
|
str_mod_check(str, ptr, len);
|
||
|
s = p;
|
||
|
}
|
||
|
goto finish;
|
||
|
}
|
||
|
str = rb_str_new4(str);
|
||
|
enc = rb_enc_check(str, rs);
|
||
|
rslen = RSTRING_LEN(rs);
|
||
|
if (rslen == 0) {
|
||
|
newline = '\n';
|
||
|
if (rs == rb_rs) {
|
||
|
enc = rb_enc_get(str);
|
||
|
rs = rb_str_encode(rb_rs, rb_enc_from_encoding(enc), 0, Qnil);
|
||
|
}
|
||
|
else {
|
||
|
newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
|
||
|
enc = rb_enc_check(str, rs);
|
||
|
}
|
||
|
while (p < pend) {
|
||
|
unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc);
|
||
|
again:
|
||
|
if (rslen == 0 && c == newline) {
|
||
|
p += n;
|
||
|
if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) {
|
||
|
goto again;
|
||
|
}
|
||
|
while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) {
|
||
|
p += n;
|
||
|
}
|
||
|
p -= n;
|
||
|
}
|
||
|
if (c == newline &&
|
||
|
(rslen <= 1 ||
|
||
|
(pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) {
|
||
|
line = rb_str_new5(str, s, p - s + (rslen ? rslen : n));
|
||
|
OBJ_INFECT(line, str);
|
||
|
rb_enc_cr_str_copy_for_substr(line, str);
|
||
|
rb_yield(line);
|
||
|
str_mod_check(str, ptr, len);
|
||
|
s = p + (rslen ? rslen : n);
|
||
|
}
|
||
|
p += n;
|
||
|
}
|
||
|
if (RSTRING_LEN(rs) == 0)
|
||
|
newline = '\n';
|
||
|
else
|
||
|
newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
|
||
|
finish:
|
||
|
if (s != pend) {
|
||
|
line = rb_str_new5(str, s, pend - s);
|
||
|
OBJ_INFECT(line, str);
|
||
|
rb_enc_cr_str_copy_for_substr(line, str);
|
||
|
rb_yield(line);
|
||
|
RB_GC_GUARD(str);
|
||
|
}
|
||
|
if (rb_str_valid_encoding_p(str) && rb_str_valid_encoding_p(rs))
|
||
|
str_each_line_valid(str, rs, newline, enc);
|
||
|
else
|
||
|
str_each_line_invalid(str, rs, newline, enc);
|
||
|
return orig;
|
||
|
}
|
||