Project

General

Profile

Bug #12373 » new_csv_shift.rb

ksss (Yuki Kurihara), 05/12/2016 12:39 AM

 
class CSV
def shift
#########################################################################
### This method is purposefully kept a bit long as simple conditional ###
### checks are faster than numerous (expensive) method calls. ###
#########################################################################

# handle headers not based on document content
if header_row? and @return_headers and
[Array, String].include? @use_headers.class
if @unconverted_fields
return add_unconverted_fields(parse_headers, Array.new)
else
return parse_headers
end
end

#
# it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
# because of \r and/or \n characters embedded in quoted fields
#
in_extended_col = false
csv = Array.new

loop do
# add another read to the line
unless parse = @io.gets(@row_sep)
return nil
end

parse.sub!(@parsers[:line_end], "")

if csv.empty?
#
# I believe a blank line should be an <tt>Array.new</tt>, not Ruby 1.8
# CSV's <tt>[nil]</tt>
#
if parse.empty?
@lineno += 1
if @skip_blanks
next
elsif @unconverted_fields
return add_unconverted_fields(Array.new, Array.new)
elsif @use_headers
return self.class::Row.new(Array.new, Array.new)
else
return Array.new
end
end
end

next if @skip_lines and @skip_lines.match parse

parts = parse.split(@col_sep, -1)
if parts.empty?
if in_extended_col
csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop
else
csv << nil
end
end

# This loop is the hot path of csv parsing. Some things may be non-dry
# for a reason. Make sure to benchmark when refactoring.
parts.each do |part|
if in_extended_col
# If we are continuing a previous column
if part[-1] == @quote_char && part.count(@quote_char) % 2 != 0
# extended column ends
csv.last << part[0..-2]
if csv.last =~ @parsers[:stray_quote]
raise MalformedCSVError,
"Missing or stray quote in line #{lineno + 1}"
end
csv.last.gsub!(@quote_char * 2, @quote_char)
in_extended_col = false
else
csv.last << part
csv.last << @col_sep
end
elsif part.start_with?(@quote_char)
# If we are starting a new quoted column
if part.count(@quote_char) % 2 != 0
# start an extended column
csv << part[1..-1]
csv.last << @col_sep
in_extended_col = true
elsif part[-1] == @quote_char
# regular quoted column
csv << part[1..-2]
if csv.last =~ @parsers[:stray_quote]
raise MalformedCSVError,
"Missing or stray quote in line #{lineno + 1}"
end
csv.last.gsub!(@quote_char * 2, @quote_char)
elsif @liberal_parsing
csv << part
else
raise MalformedCSVError,
"Missing or stray quote in line #{lineno + 1}"
end
elsif part =~ @parsers[:quote_or_nl]
# Unquoted field with bad characters.
if part =~ @parsers[:nl_or_lf]
raise MalformedCSVError, "Unquoted fields do not allow " +
"\\r or \\n (line #{lineno + 1})."
else
if @liberal_parsing
csv << part
else
raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
end
end
else
# Regular ole unquoted field.
csv << (part.empty? ? nil : part)
end
end

# Replace tacked on @col_sep with @row_sep if we are still in an extended
# column.
csv[-1][-1] = @row_sep if in_extended_col

if in_extended_col
# if we're at eof?(), a quoted field wasn't closed...
if @io.eof?
raise MalformedCSVError,
"Unclosed quoted field on line #{lineno + 1}."
elsif @field_size_limit and csv.last.size >= @field_size_limit
raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
end
# otherwise, we need to loop and pull some more data to complete the row
else
@lineno += 1

# save fields unconverted fields, if needed...
unconverted = csv.dup if @unconverted_fields

# convert fields, if needed...
csv = convert_fields(csv) unless @use_headers or @converters.empty?
# parse out header rows and handle CSV::Row conversions...
csv = parse_headers(csv) if @use_headers

# inject unconverted fields and accessor, if requested...
if @unconverted_fields and not csv.respond_to? :unconverted_fields
add_unconverted_fields(csv, unconverted)
end

# return the results
break csv
end
end
end
end
(3-3/9)