Project

General

Profile

Feature #11814 ยป valid_encoding.patch

usa (Usaku NAKAMURA), 12/13/2015 12:38 PM

View differences:

string.c (working copy)
ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
}
static int
make_coderange(VALUE str, rb_encoding *enc)
{
if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc)) {
return ENC_CODERANGE_BROKEN;
}
else {
return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str),
get_actual_encoding(rb_enc_to_index(enc), str));
}
}
int
rb_enc_str_coderange(VALUE str)
{
......
if (cr == ENC_CODERANGE_UNKNOWN) {
int encidx = ENCODING_GET(str);
rb_encoding *enc = rb_enc_from_index(encidx);
if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc)) {
cr = ENC_CODERANGE_BROKEN;
}
else {
cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str),
get_actual_encoding(encidx, str));
}
cr = make_coderange(str, enc);
ENC_CODERANGE_SET(str, cr);
}
return cr;
......
/*
* call-seq:
* str.valid_encoding? -> true or false
* str.valid_encoding?(encoding = nil) -> true or false
*
* Returns true for a string which encoded correctly.
*
* "\xc2\xa1".force_encoding("UTF-8").valid_encoding? #=> true
* "\xc2".force_encoding("UTF-8").valid_encoding? #=> false
* "\x80".force_encoding("UTF-8").valid_encoding? #=> false
*
* If `encoding` is specified, validate with the encoding.
*
* "\xc2\xa1".valid_encoding?("UTF-8") #=> true
* "\xc2".valid_encoding?("UTF-8") #=> false
* "\x80".valid_encoding?("UTF-8") #=> false
*/
static VALUE
rb_str_valid_encoding_p(VALUE str)
rb_str_valid_encoding_p(int argc, VALUE *argv, VALUE str)
{
int cr = rb_enc_str_coderange(str);
VALUE enc = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
int cr;
if (NIL_P(enc)) {
cr = rb_enc_str_coderange(str);
}
else {
cr = make_coderange(str, rb_to_encoding(enc));
}
return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue;
}
......
rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
rb_define_method(rb_cString, "b", rb_str_b, 0);
rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, -1);
rb_define_method(rb_cString, "ascii_only?", rb_str_is_ascii_only_p, 0);
rb_fs = Qnil;
test/ruby/test_m17n.rb (working copy)
assert_equal(false, s.valid_encoding?, bug6190)
end
def test_valid_encoding_with_arg
s = "\xa1"
assert_equal(false, s.valid_encoding?("euc-jp"))
assert_equal(true, (s+s).valid_encoding?("euc-jp"), "[ruby-dev:33826]")
assert_equal(true, (s*2).valid_encoding?("euc-jp"), "[ruby-dev:33826]")
assert_equal(true, ("%s%s" % [s, s]).valid_encoding?("euc-jp"))
assert_equal(true, (s.dup << s).valid_encoding?("euc-jp"))
assert_equal(true, "".center(2, s).valid_encoding?("euc-jp"))
s = "\xa1\xa1\x8f"
assert_equal(false, s.valid_encoding?("euc-jp"))
assert_equal(true, s.reverse.valid_encoding?("euc-jp"))
bug4018 = '[ruby-core:33027]'
s = "\xa1\xa1"
assert_equal(true, s.valid_encoding?("euc-jp"))
s << "\x8f"
assert_equal(false, s.valid_encoding?("euc-jp"), bug4018)
s = "aa"
assert_equal(true, s.valid_encoding?("utf-16be"))
s << "\xff"
assert_equal(false, s.valid_encoding?("utf-16be"), bug4018)
bug6190 = '[ruby-core:43557]'
s = "\xe9"
s = s.encode("utf-8", "utf-8")
s.force_encoding("ascii-8bit")
assert_equal(false, s.valid_encoding?("utf-8"), bug6190)
s = "\xe9"
s.encode!("utf-8", "utf-8")
s.force_encoding("ascii-8bit")
assert_equal(false, s.valid_encoding?("utf-8"), bug6190)
end
def test_getbyte
assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(2))
assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(-4))
    (1-1/1)