diff --git a/string.c b/string.c index 5110f45baa..911e7ffccf 100644 --- a/string.c +++ b/string.c @@ -8318,6 +8318,130 @@ rb_str_each_char(VALUE str) return rb_str_enumerate_chars(str, 0); } +static long +each_chunk_size(VALUE str, VALUE rb_len) +{ + long len, str_len, size; + + len = NUM2LONG(rb_len); + + if (len <= 0) { + return 0; + } + + str_len = rb_str_strlen(str); + + size = (str_len + (len - 1)) / len; /* ceil */ + + return size; +} + +static VALUE +rb_str_each_chunk_size(VALUE str, VALUE args, VALUE eobj) +{ + VALUE rb_len = RARRAY_AREF(args, 0); + long size = each_chunk_size(str, rb_len); + + if (size > 0) { + return LONG2NUM(size); + } else { + return Qnil; + } +} + +static VALUE rb_str_strip(VALUE str); + +static VALUE +rb_str_enumerate_chunk(int argc, VALUE *argv, VALUE str, VALUE ary) +{ + VALUE rb_len, opts, substr, strip = Qfalse, orig = str; + long i, len, str_len; + + rb_scan_args(argc, argv, "1:", &rb_len, &opts); + + len = NUM2LONG(rb_len); + + if (len <= 0) { + rb_raise(rb_eArgError, "length of chunk should be a positive number"); + } + + if (!NIL_P(opts)) { + static ID keywords[1]; + if (!keywords[0]) { + keywords[0] = rb_intern_const("strip"); + } + rb_get_kwargs(opts, keywords, 0, 1, &strip); + strip = (strip!= Qundef && RTEST(strip)); + } + + str = rb_str_new_frozen(str); + str_len = rb_str_strlen(str); + + for (i = 0; i*len < str_len; i++) { + substr = rb_str_substr(str, i*len, len); + if (strip) { + substr = rb_str_strip(substr); + } + ENUM_ELEM(ary, substr); + } + + RB_GC_GUARD(str); + + if (ary) + return ary; + else + return orig; +} + +/* + * call-seq: + * str.each_chunk(length, strip: false) {|chunk| block } -> str + * str.each_chunk(length, strip: false) -> an_enumerator + * + * Passes each chunk size of length in str to the given block, + * or returns an enumerator if no block is given. + * + * "hello hello\nhello".each_chunk(6) {|c| p c } + * "hello hello\nhello".each_chunk(6, strip: true) {|c| p c } + * + * produces: + * + * "hello " + * "hello\n" + * "hello" + * + * "hello" + * "hello" + * "hello" + */ + +static VALUE +rb_str_each_chunk(int argc, VALUE *argv, VALUE str) +{ + RETURN_SIZED_ENUMERATOR(str, argc, argv, rb_str_each_chunk_size); + return rb_str_enumerate_chunk(argc, argv, str, 0); +} + +/* + * call-seq: + * str.chunks(length, strip: false) -> an_array + * + * Returns an array of chunks size of length in str. This is a + * shorthand for str.each_chunk(length, strip: bool).to_a. + * + * "hello hello\nhello".chunks(6) #=> ["hello ", "hello\n", "hello"] + * "hello hello\nhello".chunks(6, strip: true) #=> ["hello", "hello", "hello"] + * + */ +static VALUE +rb_str_chunks(int argc, VALUE *argv, VALUE str) +{ + VALUE rb_len, opts, ary; + rb_scan_args(argc, argv, "1:", &rb_len, &opts); + ary = WANTARRAY("chunks", each_chunk_size(str, rb_len)); + return rb_str_enumerate_chunk(argc, argv, str, ary); +} + /* * call-seq: * str.chars -> an_array @@ -11034,6 +11158,7 @@ Init_String(void) rb_define_method(rb_cString, "chars", rb_str_chars, 0); rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0); rb_define_method(rb_cString, "grapheme_clusters", rb_str_grapheme_clusters, 0); + rb_define_method(rb_cString, "chunks", rb_str_chunks, -1); rb_define_method(rb_cString, "reverse", rb_str_reverse, 0); rb_define_method(rb_cString, "reverse!", rb_str_reverse_bang, 0); rb_define_method(rb_cString, "concat", rb_str_concat_multi, -1); @@ -11090,6 +11215,7 @@ Init_String(void) rb_define_method(rb_cString, "each_char", rb_str_each_char, 0); rb_define_method(rb_cString, "each_codepoint", rb_str_each_codepoint, 0); rb_define_method(rb_cString, "each_grapheme_cluster", rb_str_each_grapheme_cluster, 0); + rb_define_method(rb_cString, "each_chunk", rb_str_each_chunk, -1); rb_define_method(rb_cString, "sum", rb_str_sum, -1); diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index f308cccbcd..7e2057cc3e 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -1146,6 +1146,63 @@ def test_lines end end + def test_each_chunk + s = S("hello world\nhello") + + res = [] + s.each_chunk(6) {|x| res << x } + assert_equal([S("hello "), S("world\n"), S("hello")], res) + + res = [] + s.each_chunk(6, strip: true) {|x| res << x } + assert_equal([S("hello"), S("world"), S("hello")], res) + + assert_equal s.each_chunk(s.size).to_a, [s] + assert_equal s.each_chunk(65536).to_a, [s] + + assert_equal s.each_chunk(6).size, 3 + assert_equal s.each_chunk(0).size, nil + assert_equal s.each_chunk(-1).size, nil + assert_equal s.each_chunk(s.size).size, 1 + assert_equal s.each_chunk(65536).size, 1 + + assert_raise(ArgumentError) { s.each_chunk(0).each {} } + assert_raise(ArgumentError) { s.each_chunk(-1).each {} } + + s = S("\u{3053 3093 306b 3061 306f 0020 3053 3093 306b 3061 306f 000a 3053 3093 306b 3061 306f}") + + res = [] + s.each_chunk(6) {|x| res << x } + assert_equal([S("\u{3053 3093 306b 3061 306f 0020}"), S("\u{3053 3093 306b 3061 306f 000a}"), S("\u{3053 3093 306b 3061 306f}")], res) + + res = [] + s.each_chunk(6, strip: true) {|x| res << x } + assert_equal([S("\u{3053 3093 306b 3061 306f}"), S("\u{3053 3093 306b 3061 306f}"), S("\u{3053 3093 306b 3061 306f}")], res) + + assert_equal s.each_chunk(6).size, 3 + assert_equal s.each_chunk(s.size).size, 1 + end + + def test_chunks + s = S("hello world\nhello") + assert_equal ["hello ", "world\n", "hello"], s.chunks(6) + assert_equal ["hello", "world", "hello"], s.chunks(6, strip: true) + assert_equal s.chunks(1), s.chars + assert_equal s.chunks(65536), [s] + + s = S("\u{3053 3093 306b 3061 306f 0020 3053 3093 306b 3061 306f 000a 3053 3093 306b 3061 306f}") + assert_equal [S("\u{3053 3093 306b 3061 306f 0020}"), S("\u{3053 3093 306b 3061 306f 000a}"), S("\u{3053 3093 306b 3061 306f}")], s.chunks(6) + assert_equal [S("\u{3053 3093 306b 3061 306f}"), S("\u{3053 3093 306b 3061 306f}"), S("\u{3053 3093 306b 3061 306f}")], s.chunks(6, strip: true) + assert_equal s.chunks(1), s.chars + + s = S("hello world\nhello") + assert_raise(ArgumentError) { s.chunks(0) } + assert_raise(ArgumentError) { s.chunks(-1) } + + s = S("") + assert_equal s.chunks(1), [] + end + def test_empty? assert_empty(S("")) assert_not_empty(S("not"))