ruby19-rexml-encoding-mismatch.diff

a test case for the problem and a patch to fix the problem. - kou (Kouhei Sutou), 05/09/2009 01:38 pm

Download (2.9 kB)

lib/rexml/source.rb (作業コピー)
162 162
        @line_break = ">"
163 163
      end
164 164
      super( @source.eof? ? str : str+@source.readline( @line_break ) )
165

  
166
      if !@to_utf and
167
          @buffer.respond_to?(:force_encoding) and
168
          @source.respond_to?(:external_encoding) and
169
          @source.external_encoding != ::Encoding::UTF_8
170
        @force_utf8 = true
171
      else
172
        @force_utf8 = false
173
      end
165 174
    end
166 175

  
167 176
    def scan(pattern, cons=false)
......
174 183
      if rv.size == 0
175 184
        until @buffer =~ pattern or @source.nil?
176 185
          begin
177
            # READLINE OPT
178
            #str = @source.read(@block_size)
179
            str = @source.readline(@line_break)
180
            str = decode(str) if @to_utf and str
181
            @buffer << str
186
            @buffer << readline
182 187
          rescue Iconv::IllegalSequence
183 188
            raise
184 189
          rescue
......
193 198

  
194 199
    def read
195 200
      begin
196
        str = @source.readline(@line_break)
197
        str = decode(str) if @to_utf and str
198
        @buffer << str
199
        if not @to_utf and @buffer.respond_to? :force_encoding
200
          @buffer.force_encoding Encoding::UTF_8
201
        end
201
        @buffer << readline
202 202
      rescue Exception, NameError
203 203
        @source = nil
204 204
      end
......
213 213
      @buffer = $' if cons and rv
214 214
      while !rv and @source
215 215
        begin
216
          str = @source.readline(@line_break)
217
          str = decode(str) if @to_utf and str
218
          @buffer << str
216
          @buffer << readline
219 217
          rv = pattern.match(@buffer)
220 218
          @buffer = $' if cons and rv
221 219
        rescue
......
254 252
      end
255 253
      [pos, lineno, line]
256 254
    end
255

  
256
    private
257
    def readline
258
      str = @source.readline(@line_break)
259
      return nil if str.nil?
260

  
261
      if @to_utf
262
        decode(str)
263
      else
264
        str.force_encoding(::Encoding::UTF_8) if @force_utf8
265
        str
266
      end
267
    end
257 268
  end
258 269
end
test/rexml/test_document.rb (作業コピー)
1
# -*- coding: utf-8 -*-
2

  
1 3
require "rexml/document"
2 4
require "test/unit"
3 5

  
......
63 65
  ensure
64 66
    REXML::Document.entity_expansion_limit = 10000
65 67
  end
68

  
69
  def test_tag_in_cdata_with_not_ascii_only_but_ascii8bit_encoding_source
70
    tag = "<b>...</b>"
71
    message = "こんにちは、世界!" # Hello world! in Japanese
72
    xml = <<EOX
73
<?xml version="1.0" encoding="UTF-8"?>
74
<message><![CDATA[#{tag}#{message}]]></message>
75
EOX
76
    xml.force_encoding(Encoding::ASCII_8BIT)
77
    doc = REXML::Document.new(xml)
78
    assert_equal("#{tag}#{message}", doc.root.children.first.value)
79
  end
66 80
end