Feature #11814 ยป valid_encoding.patch
string.c (working copy) | ||
---|---|---|
ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
|
||
}
|
||
static int
|
||
make_coderange(VALUE str, rb_encoding *enc)
|
||
{
|
||
if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc)) {
|
||
return ENC_CODERANGE_BROKEN;
|
||
}
|
||
else {
|
||
return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str),
|
||
get_actual_encoding(rb_enc_to_index(enc), str));
|
||
}
|
||
}
|
||
int
|
||
rb_enc_str_coderange(VALUE str)
|
||
{
|
||
... | ... | |
if (cr == ENC_CODERANGE_UNKNOWN) {
|
||
int encidx = ENCODING_GET(str);
|
||
rb_encoding *enc = rb_enc_from_index(encidx);
|
||
if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc)) {
|
||
cr = ENC_CODERANGE_BROKEN;
|
||
}
|
||
else {
|
||
cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str),
|
||
get_actual_encoding(encidx, str));
|
||
}
|
||
cr = make_coderange(str, enc);
|
||
ENC_CODERANGE_SET(str, cr);
|
||
}
|
||
return cr;
|
||
... | ... | |
/*
|
||
* call-seq:
|
||
* str.valid_encoding? -> true or false
|
||
* str.valid_encoding?(encoding = nil) -> true or false
|
||
*
|
||
* Returns true for a string which encoded correctly.
|
||
*
|
||
* "\xc2\xa1".force_encoding("UTF-8").valid_encoding? #=> true
|
||
* "\xc2".force_encoding("UTF-8").valid_encoding? #=> false
|
||
* "\x80".force_encoding("UTF-8").valid_encoding? #=> false
|
||
*
|
||
* If `encoding` is specified, validate with the encoding.
|
||
*
|
||
* "\xc2\xa1".valid_encoding?("UTF-8") #=> true
|
||
* "\xc2".valid_encoding?("UTF-8") #=> false
|
||
* "\x80".valid_encoding?("UTF-8") #=> false
|
||
*/
|
||
static VALUE
|
||
rb_str_valid_encoding_p(VALUE str)
|
||
rb_str_valid_encoding_p(int argc, VALUE *argv, VALUE str)
|
||
{
|
||
int cr = rb_enc_str_coderange(str);
|
||
VALUE enc = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
|
||
int cr;
|
||
if (NIL_P(enc)) {
|
||
cr = rb_enc_str_coderange(str);
|
||
}
|
||
else {
|
||
cr = make_coderange(str, rb_to_encoding(enc));
|
||
}
|
||
return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue;
|
||
}
|
||
... | ... | |
rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
|
||
rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
|
||
rb_define_method(rb_cString, "b", rb_str_b, 0);
|
||
rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
|
||
rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, -1);
|
||
rb_define_method(rb_cString, "ascii_only?", rb_str_is_ascii_only_p, 0);
|
||
rb_fs = Qnil;
|
test/ruby/test_m17n.rb (working copy) | ||
---|---|---|
assert_equal(false, s.valid_encoding?, bug6190)
|
||
end
|
||
def test_valid_encoding_with_arg
|
||
s = "\xa1"
|
||
assert_equal(false, s.valid_encoding?("euc-jp"))
|
||
assert_equal(true, (s+s).valid_encoding?("euc-jp"), "[ruby-dev:33826]")
|
||
assert_equal(true, (s*2).valid_encoding?("euc-jp"), "[ruby-dev:33826]")
|
||
assert_equal(true, ("%s%s" % [s, s]).valid_encoding?("euc-jp"))
|
||
assert_equal(true, (s.dup << s).valid_encoding?("euc-jp"))
|
||
assert_equal(true, "".center(2, s).valid_encoding?("euc-jp"))
|
||
s = "\xa1\xa1\x8f"
|
||
assert_equal(false, s.valid_encoding?("euc-jp"))
|
||
assert_equal(true, s.reverse.valid_encoding?("euc-jp"))
|
||
bug4018 = '[ruby-core:33027]'
|
||
s = "\xa1\xa1"
|
||
assert_equal(true, s.valid_encoding?("euc-jp"))
|
||
s << "\x8f"
|
||
assert_equal(false, s.valid_encoding?("euc-jp"), bug4018)
|
||
s = "aa"
|
||
assert_equal(true, s.valid_encoding?("utf-16be"))
|
||
s << "\xff"
|
||
assert_equal(false, s.valid_encoding?("utf-16be"), bug4018)
|
||
bug6190 = '[ruby-core:43557]'
|
||
s = "\xe9"
|
||
s = s.encode("utf-8", "utf-8")
|
||
s.force_encoding("ascii-8bit")
|
||
assert_equal(false, s.valid_encoding?("utf-8"), bug6190)
|
||
s = "\xe9"
|
||
s.encode!("utf-8", "utf-8")
|
||
s.force_encoding("ascii-8bit")
|
||
assert_equal(false, s.valid_encoding?("utf-8"), bug6190)
|
||
end
|
||
def test_getbyte
|
||
assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(2))
|
||
assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(-4))
|