ruby19-rexml-encoding-mismatch.diff
| lib/rexml/source.rb (作業コピー) | ||
|---|---|---|
| 162 | 162 |
@line_break = ">" |
| 163 | 163 |
end |
| 164 | 164 |
super( @source.eof? ? str : str+@source.readline( @line_break ) ) |
| 165 | ||
| 166 |
if !@to_utf and |
|
| 167 |
@buffer.respond_to?(:force_encoding) and |
|
| 168 |
@source.respond_to?(:external_encoding) and |
|
| 169 |
@source.external_encoding != ::Encoding::UTF_8 |
|
| 170 |
@force_utf8 = true |
|
| 171 |
else |
|
| 172 |
@force_utf8 = false |
|
| 173 |
end |
|
| 165 | 174 |
end |
| 166 | 175 | |
| 167 | 176 |
def scan(pattern, cons=false) |
| ... | ... | |
| 174 | 183 |
if rv.size == 0 |
| 175 | 184 |
until @buffer =~ pattern or @source.nil? |
| 176 | 185 |
begin |
| 177 |
# READLINE OPT |
|
| 178 |
#str = @source.read(@block_size) |
|
| 179 |
str = @source.readline(@line_break) |
|
| 180 |
str = decode(str) if @to_utf and str |
|
| 181 |
@buffer << str |
|
| 186 |
@buffer << readline |
|
| 182 | 187 |
rescue Iconv::IllegalSequence |
| 183 | 188 |
raise |
| 184 | 189 |
rescue |
| ... | ... | |
| 193 | 198 | |
| 194 | 199 |
def read |
| 195 | 200 |
begin |
| 196 |
str = @source.readline(@line_break) |
|
| 197 |
str = decode(str) if @to_utf and str |
|
| 198 |
@buffer << str |
|
| 199 |
if not @to_utf and @buffer.respond_to? :force_encoding |
|
| 200 |
@buffer.force_encoding Encoding::UTF_8 |
|
| 201 |
end |
|
| 201 |
@buffer << readline |
|
| 202 | 202 |
rescue Exception, NameError |
| 203 | 203 |
@source = nil |
| 204 | 204 |
end |
| ... | ... | |
| 213 | 213 |
@buffer = $' if cons and rv |
| 214 | 214 |
while !rv and @source |
| 215 | 215 |
begin |
| 216 |
str = @source.readline(@line_break) |
|
| 217 |
str = decode(str) if @to_utf and str |
|
| 218 |
@buffer << str |
|
| 216 |
@buffer << readline |
|
| 219 | 217 |
rv = pattern.match(@buffer) |
| 220 | 218 |
@buffer = $' if cons and rv |
| 221 | 219 |
rescue |
| ... | ... | |
| 254 | 252 |
end |
| 255 | 253 |
[pos, lineno, line] |
| 256 | 254 |
end |
| 255 | ||
| 256 |
private |
|
| 257 |
def readline |
|
| 258 |
str = @source.readline(@line_break) |
|
| 259 |
return nil if str.nil? |
|
| 260 | ||
| 261 |
if @to_utf |
|
| 262 |
decode(str) |
|
| 263 |
else |
|
| 264 |
str.force_encoding(::Encoding::UTF_8) if @force_utf8 |
|
| 265 |
str |
|
| 266 |
end |
|
| 267 |
end |
|
| 257 | 268 |
end |
| 258 | 269 |
end |
| test/rexml/test_document.rb (作業コピー) | ||
|---|---|---|
| 1 |
# -*- coding: utf-8 -*- |
|
| 2 | ||
| 1 | 3 |
require "rexml/document" |
| 2 | 4 |
require "test/unit" |
| 3 | 5 | |
| ... | ... | |
| 63 | 65 |
ensure |
| 64 | 66 |
REXML::Document.entity_expansion_limit = 10000 |
| 65 | 67 |
end |
| 68 | ||
| 69 |
def test_tag_in_cdata_with_not_ascii_only_but_ascii8bit_encoding_source |
|
| 70 |
tag = "<b>...</b>" |
|
| 71 |
message = "こんにちは、世界!" # Hello world! in Japanese |
|
| 72 |
xml = <<EOX |
|
| 73 |
<?xml version="1.0" encoding="UTF-8"?> |
|
| 74 |
<message><![CDATA[#{tag}#{message}]]></message>
|
|
| 75 |
EOX |
|
| 76 |
xml.force_encoding(Encoding::ASCII_8BIT) |
|
| 77 |
doc = REXML::Document.new(xml) |
|
| 78 |
assert_equal("#{tag}#{message}", doc.root.children.first.value)
|
|
| 79 |
end |
|
| 66 | 80 |
end |