|
module REXML
|
|
|
|
# A Source can be searched for patterns, and wraps buffers and other
|
|
# objects and provides consumption of text
|
|
class Source
|
|
include Encoding
|
|
# The current buffer (what we're going to read next)
|
|
attr_reader :buffer
|
|
# The line number of the last consumed text
|
|
attr_reader :line
|
|
attr_reader :encoding
|
|
|
|
# Constructor
|
|
# @param arg must be a String, and should be a valid XML document
|
|
# @param encoding if non-null, sets the encoding of the source to this
|
|
# value, overriding all encoding detection
|
|
def initialize(arg, encoding=nil)
|
|
@orig = @buffer = arg
|
|
if encoding
|
|
self.encoding = encoding
|
|
else
|
|
self.encoding = check_encoding( @buffer )
|
|
end
|
|
@line = 0
|
|
@pos = 0
|
|
end
|
|
|
|
|
|
# Inherited from Encoding
|
|
# Overridden to support optimized en/decoding
|
|
def encoding=(enc)
|
|
return unless super
|
|
@line_break = encode( '>' )
|
|
if enc != UTF_8
|
|
@buffer = decode(@buffer)
|
|
@to_utf = true
|
|
else
|
|
@to_utf = false
|
|
if @buffer.respond_to? :force_encoding
|
|
@buffer.force_encoding Encoding::UTF_8
|
|
end
|
|
end
|
|
end
|
|
|
|
# Scans the source for a given pattern. Note, that this is not your
|
|
# usual scan() method. For one thing, the pattern argument has some
|
|
# requirements; for another, the source can be consumed. You can easily
|
|
# confuse this method. Originally, the patterns were easier
|
|
# to construct and this method more robust, because this method
|
|
# generated search regexes on the fly; however, this was
|
|
# computationally expensive and slowed down the entire REXML package
|
|
# considerably, since this is by far the most commonly called method.
|
|
# @param pattern must be a Regexp, and must be in the form of
|
|
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
|
# will be returned; the second group is used if the consume flag is
|
|
# set.
|
|
# @param consume if true, the pattern returned will be consumed, leaving
|
|
# everything after it in the Source.
|
|
# @return the pattern, if found, or nil if the Source is empty or the
|
|
# pattern is not found.
|
|
def scan(pattern, cons=false)
|
|
return nil if @buffer.nil?
|
|
rv = @buffer.scan(pattern)
|
|
(@pos += @buffer.length - $'.length; @buffer = $') if cons and rv.size>0
|
|
rv
|
|
end
|
|
|
|
def read
|
|
end
|
|
|
|
def consume( pattern )
|
|
(@pos += @buffer.length - $'.length; @buffer = $') if pattern.match( @buffer )
|
|
end
|
|
|
|
def match_to( char, pattern )
|
|
return pattern.match(@buffer)
|
|
end
|
|
|
|
def match_to_consume( char, pattern )
|
|
md = pattern.match(@buffer)
|
|
@pos += @buffer.length - $'.length
|
|
@buffer = $'
|
|
return md
|
|
end
|
|
|
|
def match(pattern, cons=false)
|
|
md = pattern.match(@buffer)
|
|
(@pos += @buffer.length - $'.length; @buffer = $') if cons and md
|
|
return md
|
|
end
|
|
|
|
# @return true if the Source is exhausted
|
|
def empty?
|
|
@buffer == ""
|
|
end
|
|
|
|
def position
|
|
@pos
|
|
end
|
|
|
|
# @return the current line in the source
|
|
def current_line
|
|
lineno = 0
|
|
curpos = 0
|
|
@orig.each_line do |line|
|
|
curpos += line.length
|
|
line += 1
|
|
break if curpos >= @pos
|
|
end
|
|
lineno
|
|
end
|
|
end
|
|
|
|
end
|