Feature #7368 » patch2.diff
string.c | ||
---|---|---|
return rb_str_split_m(1, &sep, str);
|
||
}
|
||
static VALUE rb_str_valid_encoding_p(VALUE str);
|
||
static void
|
||
line_yield(VALUE str, const char *sub, const char *subend)
|
||
{
|
||
long len = RSTRING_LEN(str);
|
||
const char *ptr = RSTRING_PTR(str);
|
||
VALUE line = rb_str_new5(str, sub, subend - sub);
|
||
OBJ_INFECT(line, str);
|
||
rb_enc_cr_str_copy_for_substr(line, str);
|
||
rb_yield(line);
|
||
str_mod_check(str, ptr, len);
|
||
return;
|
||
}
|
||
static void
|
||
str_each_line_valid(VALUE str, VALUE rs, unsigned int newline, rb_encoding *enc)
|
||
{
|
||
int n, rspara = 0;
|
||
long pos, rslen;
|
||
const char *pend, *sub, *subend, *search_start, *hit=NULL;
|
||
const char *adjusted, *rsptr;
|
||
search_start = sub = subend = RSTRING_PTR(str);
|
||
pend = RSTRING_END(str);
|
||
rslen = RSTRING_LEN(rs);
|
||
if (rslen == 0) {
|
||
rspara = 1;
|
||
rs = rb_usascii_str_new("\n\n", 2);
|
||
if (!rb_enc_asciicompat(enc))
|
||
rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil);
|
||
rslen = RSTRING_LEN(rs);
|
||
}
|
||
rsptr = RSTRING_PTR(rs);
|
||
while (search_start < pend) {
|
||
pos = rb_memsearch(rsptr, rslen, search_start, pend - search_start, enc);
|
||
if (pos < 0) break;
|
||
hit = search_start + pos;
|
||
adjusted = rb_enc_right_char_head(sub, hit, pend, enc);
|
||
if (hit == adjusted) {
|
||
subend = hit + rslen;
|
||
if (rspara) {
|
||
rb_enc_codepoint_len(subend, pend, &n, enc);
|
||
while (subend < pend && rb_enc_codepoint(subend, pend, enc) == newline)
|
||
subend += n;
|
||
}
|
||
line_yield(str, sub, subend);
|
||
search_start = sub = subend;
|
||
}
|
||
else {
|
||
search_start = adjusted;
|
||
}
|
||
}
|
||
if (subend < pend) line_yield(str, subend, pend);
|
||
return;
|
||
}
|
||
static void
|
||
str_each_line_invalid(VALUE str, VALUE rs, unsigned int newline, rb_encoding *enc)
|
||
{
|
||
int n;
|
||
long rslen;
|
||
const char *sub, *subend, *pend, *rsptr;
|
||
sub = subend = RSTRING_PTR(str);
|
||
pend = RSTRING_END(str);
|
||
rsptr = RSTRING_PTR(rs);
|
||
rslen = RSTRING_LEN(rs);
|
||
while (sub < pend) {
|
||
unsigned int c = rb_enc_codepoint_len(sub, pend, &n, enc);
|
||
again:
|
||
if (rslen == 0 && c == newline) {
|
||
subend += n;
|
||
if (subend < pend && (c = rb_enc_codepoint_len(subend, pend, &n, enc)) != newline) {
|
||
goto again;
|
||
}
|
||
while (subend < pend && rb_enc_codepoint(subend, pend, enc) == newline) {
|
||
subend += n;
|
||
}
|
||
subend -= n;
|
||
}
|
||
if (c == newline &&
|
||
(rslen <= 1 ||
|
||
(pend - subend >= rslen && memcmp(rsptr, subend, rslen) == 0))) {
|
||
subend += rslen ? rslen : n;
|
||
line_yield(str, sub, subend);
|
||
sub = subend;
|
||
}
|
||
subend += n;
|
||
}
|
||
if (subend < pend) line_yield(str, subend, pend);
|
||
return;
|
||
}
|
||
/*
|
||
* call-seq:
|
||
... | ... | |
rb_encoding *enc;
|
||
VALUE rs;
|
||
unsigned int newline;
|
||
const char *p, *pend, *s, *ptr;
|
||
long len, rslen;
|
||
VALUE line;
|
||
int n;
|
||
VALUE orig = str;
|
||
if (argc == 0) {
|
||
if (argc == 0)
|
||
rs = rb_rs;
|
||
}
|
||
else {
|
||
else
|
||
rb_scan_args(argc, argv, "01", &rs);
|
||
}
|
||
RETURN_ENUMERATOR(str, argc, argv);
|
||
if (NIL_P(rs)) {
|
||
rb_yield(str);
|
||
return orig;
|
||
}
|
||
str = rb_str_new4(str);
|
||
ptr = p = s = RSTRING_PTR(str);
|
||
pend = p + RSTRING_LEN(str);
|
||
len = RSTRING_LEN(str);
|
||
StringValue(rs);
|
||
if (rs == rb_default_rs) {
|
||
enc = rb_enc_get(str);
|
||
while (p < pend) {
|
||
char *p0;
|
||
p = memchr(p, '\n', pend - p);
|
||
if (!p) break;
|
||
p0 = rb_enc_left_char_head(s, p, pend, enc);
|
||
if (!rb_enc_is_newline(p0, pend, enc)) {
|
||
p++;
|
||
continue;
|
||
}
|
||
p = p0 + rb_enc_mbclen(p0, pend, enc);
|
||
line = rb_str_new5(str, s, p - s);
|
||
OBJ_INFECT(line, str);
|
||
rb_enc_cr_str_copy_for_substr(line, str);
|
||
rb_yield(line);
|
||
str_mod_check(str, ptr, len);
|
||
s = p;
|
||
}
|
||
goto finish;
|
||
}
|
||
str = rb_str_new4(str);
|
||
enc = rb_enc_check(str, rs);
|
||
rslen = RSTRING_LEN(rs);
|
||
if (rslen == 0) {
|
||
newline = '\n';
|
||
if (rs == rb_rs) {
|
||
enc = rb_enc_get(str);
|
||
rs = rb_str_encode(rb_rs, rb_enc_from_encoding(enc), 0, Qnil);
|
||
}
|
||
else {
|
||
newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
|
||
enc = rb_enc_check(str, rs);
|
||
}
|
||
while (p < pend) {
|
||
unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc);
|
||
again:
|
||
if (rslen == 0 && c == newline) {
|
||
p += n;
|
||
if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) {
|
||
goto again;
|
||
}
|
||
while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) {
|
||
p += n;
|
||
}
|
||
p -= n;
|
||
}
|
||
if (c == newline &&
|
||
(rslen <= 1 ||
|
||
(pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) {
|
||
line = rb_str_new5(str, s, p - s + (rslen ? rslen : n));
|
||
OBJ_INFECT(line, str);
|
||
rb_enc_cr_str_copy_for_substr(line, str);
|
||
rb_yield(line);
|
||
str_mod_check(str, ptr, len);
|
||
s = p + (rslen ? rslen : n);
|
||
}
|
||
p += n;
|
||
}
|
||
if (RSTRING_LEN(rs) == 0)
|
||
newline = '\n';
|
||
else
|
||
newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
|
||
finish:
|
||
if (s != pend) {
|
||
line = rb_str_new5(str, s, pend - s);
|
||
OBJ_INFECT(line, str);
|
||
rb_enc_cr_str_copy_for_substr(line, str);
|
||
rb_yield(line);
|
||
RB_GC_GUARD(str);
|
||
}
|
||
if (rb_str_valid_encoding_p(str) && rb_str_valid_encoding_p(rs))
|
||
str_each_line_valid(str, rs, newline, enc);
|
||
else
|
||
str_each_line_invalid(str, rs, newline, enc);
|
||
return orig;
|
||
}
|