Feature #11814 ยป valid_encoding.patch
| string.c (working copy) | ||
|---|---|---|
|
ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
|
||
|
}
|
||
|
static int
|
||
|
make_coderange(VALUE str, rb_encoding *enc)
|
||
|
{
|
||
|
if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc)) {
|
||
|
return ENC_CODERANGE_BROKEN;
|
||
|
}
|
||
|
else {
|
||
|
return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str),
|
||
|
get_actual_encoding(rb_enc_to_index(enc), str));
|
||
|
}
|
||
|
}
|
||
|
int
|
||
|
rb_enc_str_coderange(VALUE str)
|
||
|
{
|
||
| ... | ... | |
|
if (cr == ENC_CODERANGE_UNKNOWN) {
|
||
|
int encidx = ENCODING_GET(str);
|
||
|
rb_encoding *enc = rb_enc_from_index(encidx);
|
||
|
if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc)) {
|
||
|
cr = ENC_CODERANGE_BROKEN;
|
||
|
}
|
||
|
else {
|
||
|
cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str),
|
||
|
get_actual_encoding(encidx, str));
|
||
|
}
|
||
|
cr = make_coderange(str, enc);
|
||
|
ENC_CODERANGE_SET(str, cr);
|
||
|
}
|
||
|
return cr;
|
||
| ... | ... | |
|
/*
|
||
|
* call-seq:
|
||
|
* str.valid_encoding? -> true or false
|
||
|
* str.valid_encoding?(encoding = nil) -> true or false
|
||
|
*
|
||
|
* Returns true for a string which encoded correctly.
|
||
|
*
|
||
|
* "\xc2\xa1".force_encoding("UTF-8").valid_encoding? #=> true
|
||
|
* "\xc2".force_encoding("UTF-8").valid_encoding? #=> false
|
||
|
* "\x80".force_encoding("UTF-8").valid_encoding? #=> false
|
||
|
*
|
||
|
* If `encoding` is specified, validate with the encoding.
|
||
|
*
|
||
|
* "\xc2\xa1".valid_encoding?("UTF-8") #=> true
|
||
|
* "\xc2".valid_encoding?("UTF-8") #=> false
|
||
|
* "\x80".valid_encoding?("UTF-8") #=> false
|
||
|
*/
|
||
|
static VALUE
|
||
|
rb_str_valid_encoding_p(VALUE str)
|
||
|
rb_str_valid_encoding_p(int argc, VALUE *argv, VALUE str)
|
||
|
{
|
||
|
int cr = rb_enc_str_coderange(str);
|
||
|
VALUE enc = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
|
||
|
int cr;
|
||
|
if (NIL_P(enc)) {
|
||
|
cr = rb_enc_str_coderange(str);
|
||
|
}
|
||
|
else {
|
||
|
cr = make_coderange(str, rb_to_encoding(enc));
|
||
|
}
|
||
|
return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue;
|
||
|
}
|
||
| ... | ... | |
|
rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
|
||
|
rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
|
||
|
rb_define_method(rb_cString, "b", rb_str_b, 0);
|
||
|
rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
|
||
|
rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, -1);
|
||
|
rb_define_method(rb_cString, "ascii_only?", rb_str_is_ascii_only_p, 0);
|
||
|
rb_fs = Qnil;
|
||
| test/ruby/test_m17n.rb (working copy) | ||
|---|---|---|
|
assert_equal(false, s.valid_encoding?, bug6190)
|
||
|
end
|
||
|
def test_valid_encoding_with_arg
|
||
|
s = "\xa1"
|
||
|
assert_equal(false, s.valid_encoding?("euc-jp"))
|
||
|
assert_equal(true, (s+s).valid_encoding?("euc-jp"), "[ruby-dev:33826]")
|
||
|
assert_equal(true, (s*2).valid_encoding?("euc-jp"), "[ruby-dev:33826]")
|
||
|
assert_equal(true, ("%s%s" % [s, s]).valid_encoding?("euc-jp"))
|
||
|
assert_equal(true, (s.dup << s).valid_encoding?("euc-jp"))
|
||
|
assert_equal(true, "".center(2, s).valid_encoding?("euc-jp"))
|
||
|
s = "\xa1\xa1\x8f"
|
||
|
assert_equal(false, s.valid_encoding?("euc-jp"))
|
||
|
assert_equal(true, s.reverse.valid_encoding?("euc-jp"))
|
||
|
bug4018 = '[ruby-core:33027]'
|
||
|
s = "\xa1\xa1"
|
||
|
assert_equal(true, s.valid_encoding?("euc-jp"))
|
||
|
s << "\x8f"
|
||
|
assert_equal(false, s.valid_encoding?("euc-jp"), bug4018)
|
||
|
s = "aa"
|
||
|
assert_equal(true, s.valid_encoding?("utf-16be"))
|
||
|
s << "\xff"
|
||
|
assert_equal(false, s.valid_encoding?("utf-16be"), bug4018)
|
||
|
bug6190 = '[ruby-core:43557]'
|
||
|
s = "\xe9"
|
||
|
s = s.encode("utf-8", "utf-8")
|
||
|
s.force_encoding("ascii-8bit")
|
||
|
assert_equal(false, s.valid_encoding?("utf-8"), bug6190)
|
||
|
s = "\xe9"
|
||
|
s.encode!("utf-8", "utf-8")
|
||
|
s.force_encoding("ascii-8bit")
|
||
|
assert_equal(false, s.valid_encoding?("utf-8"), bug6190)
|
||
|
end
|
||
|
def test_getbyte
|
||
|
assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(2))
|
||
|
assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(-4))
|
||