Bug #14363 » each_grapheme_cluster_size_real.patch
string.c | ||
---|---|---|
return rb_str_enumerate_codepoints(str, ary);
|
||
}
|
||
static VALUE
|
||
rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
|
||
{
|
||
long grapheme_cluster_count = 0;
|
||
regex_t *reg_grapheme_cluster = NULL;
|
||
static regex_t *reg_grapheme_cluster_utf8 = NULL;
|
||
int encidx = ENCODING_GET(str);
|
||
rb_encoding *enc = rb_enc_from_index(encidx);
|
||
int unicode_p = rb_enc_unicode_p(enc);
|
||
const char *ptr, *end;
|
||
if (!unicode_p || single_byte_optimizable(str)) {
|
||
return rb_str_length(str);
|
||
}
|
||
/* synchronize */
|
||
if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) {
|
||
reg_grapheme_cluster = reg_grapheme_cluster_utf8;
|
||
}
|
||
if (!reg_grapheme_cluster) {
|
||
const OnigUChar source[] = "\\X";
|
||
int r = onig_new(®_grapheme_cluster, source, source + sizeof(source) - 1,
|
||
ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, NULL);
|
||
if (r) {
|
||
rb_bug("cannot compile grapheme cluster regexp");
|
||
}
|
||
if (encidx == rb_utf8_encindex()) {
|
||
reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
|
||
}
|
||
}
|
||
ptr = RSTRING_PTR(str);
|
||
end = RSTRING_END(str);
|
||
while (ptr < end) {
|
||
OnigPosition len = onig_match(reg_grapheme_cluster,
|
||
(const OnigUChar *)ptr, (const OnigUChar *)end,
|
||
(const OnigUChar *)ptr, NULL, 0);
|
||
if (len == 0) break;
|
||
if (len < 0) {
|
||
break;
|
||
}
|
||
grapheme_cluster_count++;
|
||
ptr += len;
|
||
}
|
||
RB_GC_GUARD(str);
|
||
return LONG2NUM(grapheme_cluster_count);
|
||
}
|
||
static VALUE
|
||
rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
|
||
{
|
||
... | ... | |
static VALUE
|
||
rb_str_each_grapheme_cluster(VALUE str)
|
||
{
|
||
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size);
|
||
RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_grapheme_cluster_size);
|
||
return rb_str_enumerate_grapheme_clusters(str, 0);
|
||
}
|
||
test/ruby/test_string.rb | ||
---|---|---|
"\u{1f469 200d 2764 fe0f 200d 1f469}",
|
||
].each do |g|
|
||
assert_equal [g], g.each_grapheme_cluster.to_a
|
||
assert_equal 1, g.each_grapheme_cluster.size
|
||
end
|
||
[
|
||
["\u{a 308}", ["\u000A", "\u0308"]],
|
||
["\u{d 308}", ["\u000D", "\u0308"]],
|
||
["abc", ["a", "b", "c"]],
|
||
].each do |str, grapheme_clusters|
|
||
assert_equal grapheme_clusters, str.each_grapheme_cluster.to_a
|
||
assert_equal grapheme_clusters.size, str.each_grapheme_cluster.size
|
||
end
|
||
assert_equal ["\u000A", "\u0308"], "\u{a 308}".each_grapheme_cluster.to_a
|
||
assert_equal ["\u000D", "\u0308"], "\u{d 308}".each_grapheme_cluster.to_a
|
||
assert_equal ["a", "b", "c"], "abc".b.each_grapheme_cluster.to_a
|
||
s = ("x"+"\u{10ABCD}"*250000)
|
||
assert_empty(s.each_grapheme_cluster {s.clear})
|
||
end
|
- « Previous
- 1
- 2
- Next »