Project

General

Profile

Backport #1182 ยป fixed_source.rb

Fixed REXML::Source class based on rexml/source.rb from 1.9.1 - gettalong (Thomas Leitner), 02/20/2009 04:35 PM

 
1
module REXML
2

    
3
  # A Source can be searched for patterns, and wraps buffers and other
4
  # objects and provides consumption of text
5
  class Source
6
    include Encoding
7
    # The current buffer (what we're going to read next)
8
    attr_reader :buffer
9
    # The line number of the last consumed text
10
    attr_reader :line
11
    attr_reader :encoding
12

    
13
    # Constructor
14
    # @param arg must be a String, and should be a valid XML document
15
    # @param encoding if non-null, sets the encoding of the source to this
16
    # value, overriding all encoding detection
17
    def initialize(arg, encoding=nil)
18
      @orig = @buffer = arg
19
      if encoding
20
        self.encoding = encoding
21
      else
22
        self.encoding = check_encoding( @buffer )
23
      end
24
      @line = 0
25
      @pos = 0
26
    end
27

    
28

    
29
    # Inherited from Encoding
30
    # Overridden to support optimized en/decoding
31
    def encoding=(enc)
32
      return unless super
33
      @line_break = encode( '>' )
34
      if enc != UTF_8
35
        @buffer = decode(@buffer)
36
        @to_utf = true
37
      else
38
        @to_utf = false
39
        if @buffer.respond_to? :force_encoding
40
          @buffer.force_encoding Encoding::UTF_8
41
        end
42
      end
43
    end
44

    
45
    # Scans the source for a given pattern.  Note, that this is not your
46
    # usual scan() method.  For one thing, the pattern argument has some
47
    # requirements; for another, the source can be consumed.  You can easily
48
    # confuse this method.  Originally, the patterns were easier
49
    # to construct and this method more robust, because this method
50
    # generated search regexes on the fly; however, this was
51
    # computationally expensive and slowed down the entire REXML package
52
    # considerably, since this is by far the most commonly called method.
53
    # @param pattern must be a Regexp, and must be in the form of
54
    # /^\s*(#{your pattern, with no groups})(.*)/.  The first group
55
    # will be returned; the second group is used if the consume flag is
56
    # set.
57
    # @param consume if true, the pattern returned will be consumed, leaving
58
    # everything after it in the Source.
59
    # @return the pattern, if found, or nil if the Source is empty or the
60
    # pattern is not found.
61
    def scan(pattern, cons=false)
62
      return nil if @buffer.nil?
63
      rv = @buffer.scan(pattern)
64
      (@pos += @buffer.length - $'.length; @buffer = $') if cons and rv.size>0
65
      rv
66
    end
67

    
68
    def read
69
    end
70

    
71
    def consume( pattern )
72
      (@pos += @buffer.length - $'.length; @buffer = $') if pattern.match( @buffer )
73
    end
74

    
75
    def match_to( char, pattern )
76
      return pattern.match(@buffer)
77
    end
78

    
79
    def match_to_consume( char, pattern )
80
      md = pattern.match(@buffer)
81
      @pos += @buffer.length - $'.length
82
      @buffer = $'
83
      return md
84
    end
85

    
86
    def match(pattern, cons=false)
87
      md = pattern.match(@buffer)
88
      (@pos += @buffer.length - $'.length; @buffer = $') if cons and md
89
      return md
90
    end
91

    
92
    # @return true if the Source is exhausted
93
    def empty?
94
      @buffer == ""
95
    end
96

    
97
    def position
98
      @pos
99
    end
100

    
101
    # @return the current line in the source
102
    def current_line
103
      lineno = 0
104
      curpos = 0
105
      @orig.each_line do |line|
106
        curpos += line.length
107
        line += 1
108
        break if curpos >= @pos
109
      end
110
      lineno
111
    end
112
  end
113

    
114
end