ruby_19_csv_parser_split_methods.patch - Ruby master - Ruby Issue Tracking System

Feature #4017 » ruby_19_csv_parser_split_methods.patch

Patch 1/2 - ender672 (Timothy Elliott), 11/03/2010 09:43 AM

     #   CSV(csv = "")   { |csv_str| csv_str << %w{my data here} }  # to a String
     #   CSV($stderr)    { |csv_err| csv_err << %w{my data here} }  # to $stderr
     #   CSV($stdin)     { |csv_in|  csv_in.each { |row| p row } }  # from $stdin
+    #
+    #
     # == Advanced Usage
+    #
+    #
     # === Wrap an IO Object
+    #
+    #
     #   csv = CSV.new(io, options)
     #   # ... read (with gets() or each()) from and write (with <<) to csv here ...
+    #
-...
+        #
         # This method assumes you want the Table.headers(), unless you explicitly
         # pass <tt>:write_headers => false</tt>.
+        #
+        #
         def to_csv(options = Hash.new)
           wh = options.fetch(:write_headers, true)
           @table.inject(wh ? [headers.to_csv(options)] : [ ]) do |rows, row|
-...
       # <b><tt>:row_sep</tt></b>::            <tt>:auto</tt>
       # <b><tt>:quote_char</tt></b>::         <tt>'"'</tt>
       # <b><tt>:field_size_limit</tt></b>::   +nil+
       # <b><tt>:io_read_limit</tt></b>::      <tt>2048</tt>
       # <b><tt>:converters</tt></b>::         +nil+
       # <b><tt>:unconverted_fields</tt></b>:: +nil+
       # <b><tt>:headers</tt></b>::            +false+
-...
                           row_sep:            :auto,
                           quote_char:         '"',
                           field_size_limit:   nil,
                           io_read_limit:      2048,
                           converters:         nil,
                           unconverted_fields: nil,
                           headers:            false,
-...
         # track our own lineno since IO gets confused about line-ends is CSV fields
         @lineno = 0
         @data_buf = nil
       end
+      #
-...
       def rewind
         @headers = nil
         @lineno  = 0
         @data_buf = nil
         @io.rewind
       end
-...
       # The data source must be open for reading.
+      #
       def shift
         #########################################################################
         ### This method is purposefully kept a bit long as simple conditional ###
         ### checks are faster than numerous (expensive) method calls.         ###
         #########################################################################
         # handle headers not based on document content
         if header_row? and @return_headers and
            [Array, String].include? @use_headers.class
-...
           end
         end
+        #
         # it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
         # because of \r and/or \n characters embedded in quoted fields
+        #
         in_extended_col = false
         csv             = Array.new
         @lineno += 1
         csv = parse_csv_row
         return unless csv
         loop do
           # add another read to the line
           unless parse = @io.gets(@row_sep)
             return nil
         if csv == [nil]
           if @skip_blanks
             return shift
           elsif @unconverted_fields
             return add_unconverted_fields(Array.new, Array.new)
           elsif @use_headers
             return self.class::Row.new(Array.new, Array.new)
           else
             return Array.new
           end
         end
           parse.sub!(@parsers[:line_end], "")
         # save fields unconverted fields, if needed...
         unconverted = csv.dup if @unconverted_fields
           if csv.empty?
+            #
             # I believe a blank line should be an <tt>Array.new</tt>, not Ruby 1.8
             # CSV's <tt>[nil]</tt>
+            #
             if parse.empty?
               @lineno += 1
               if @skip_blanks
                 next
               elsif @unconverted_fields
                 return add_unconverted_fields(Array.new, Array.new)
               elsif @use_headers
                 return self.class::Row.new(Array.new, Array.new)
               else
                 return Array.new
               end
             end
           end
         # convert fields, if needed...
         csv = convert_fields(csv) unless @use_headers or @converters.empty?
         # parse out header rows and handle CSV::Row conversions...
         csv = parse_headers(csv)  if     @use_headers
           parts =  parse.split(@col_sep, -1)
           if parts.empty?
             if in_extended_col
               csv[-1] << @col_sep   # will be replaced with a @row_sep after the parts.each loop
             else
               csv << nil
             end
           end
         # inject unconverted fields and accessor, if requested...
         if @unconverted_fields and not csv.respond_to? :unconverted_fields
           add_unconverted_fields(csv, unconverted)
         end
           # This loop is the hot path of csv parsing. Some things may be non-dry
           # for a reason. Make sure to benchmark when refactoring.
           parts.each do |part|
             if in_extended_col
               # If we are continuing a previous column
               if part[-1] == @quote_char && part.count(@quote_char) % 2 != 0
                 # extended column ends
                 csv.last << part[0..-2]
                 raise MalformedCSVError if csv.last =~ @parsers[:stray_quote]
                 csv.last.gsub!(@quote_char * 2, @quote_char)
                 in_extended_col = false
               else
                 csv.last << part
                 csv.last << @col_sep
               end
             elsif part[0] == @quote_char
               # If we are staring a new quoted column
               if part[-1] != @quote_char || part.count(@quote_char) % 2 != 0
                 # start an extended column
                 csv             << part[1..-1]
                 csv.last        << @col_sep
                 in_extended_col =  true
               else
                 # regular quoted column
                 csv << part[1..-2]
                 raise MalformedCSVError if csv.last =~ @parsers[:stray_quote]
                 csv.last.gsub!(@quote_char * 2, @quote_char)
               end
             elsif part =~ @parsers[:quote_or_nl]
               # Unquoted field with bad characters.
               if part =~ @parsers[:nl_or_lf]
                 raise MalformedCSVError, "Unquoted fields do not allow " +
                                          "\\r or \\n (line #{lineno + 1})."
               else
                 raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
               end
             else
               # Regular ole unquoted field.
               csv << (part.empty? ? nil : part)
             end
           end
           # Replace tacked on @col_sep with @row_sep if we are still in an extended
           # column.
           csv[-1][-1] = @row_sep if in_extended_col
           if in_extended_col
             # if we're at eof?(), a quoted field wasn't closed...
             if @io.eof?
               raise MalformedCSVError,
                     "Unclosed quoted field on line #{lineno + 1}."
             elsif @field_size_limit and csv.last.size >= @field_size_limit
               raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
             end
             # otherwise, we need to loop and pull some more data to complete the row
           else
             @lineno += 1
             # save fields unconverted fields, if needed...
             unconverted = csv.dup if @unconverted_fields
             # convert fields, if needed...
             csv = convert_fields(csv) unless @use_headers or @converters.empty?
             # parse out header rows and handle CSV::Row conversions...
             csv = parse_headers(csv)  if     @use_headers
             # inject unconverted fields and accessor, if requested...
             if @unconverted_fields and not csv.respond_to? :unconverted_fields
               add_unconverted_fields(csv, unconverted)
             end
             # return the results
             break csv
           end
         end
         # return the results
         csv
       end
       alias_method :gets,     :shift
       alias_method :readline, :shift
-...
       private
       def parse_csv_row
         buf = io_get_unquoted
         return unless buf
         line = []
         loop do
           case buf
           when @quote_char
             line << io_get_quoted
             buf = io_get_unquoted
             case buf
             when nil, @row_sep
               return line
             when @col_sep
               return line << nil
             end
             break unless buf.slice!(0, @col_sep.size) == @col_sep
           when @row_sep
             return line << nil
           else
             newline = buf.chomp! @row_sep
             if buf.count(@nl_lf) > 0
               raise MalformedCSVError, "Unquoted fields do not allow " +
                                        "\\r or \\n (line #{@lineno})."
             end
             buf.split(@col_sep, -1).each{ |c| line << (c.empty? ? nil : c) }
             if newline
               return line
             elsif line.last == @quote_char
               buf = line.pop
             elsif data_buf_eof?
               return line
             else
               break
             end
           end
         end
         raise MalformedCSVError, "Illegal quoting on line #{@lineno}."
       end
       # Read @io and return everything until the next unescaped quote character.
       # Escaped quote characters are automatically unescaped. Raises an error if we
       # hit @io.eof? or @field_size_limit without encountering an ending quote.
+      #
       # Only successfully returns if the read ended with a @quote_char. This
       # prevents us from returning only part of a multibyte character.
       def io_get_quoted
         buf = @io.gets @quote_char, @field_size_limit
         while buf.chomp!(@quote_char) do
           @data_buf = @io.gets @quote_char, @io_read_limit
           return buf unless @data_buf == @quote_char
           break if @io.eof?
           buf << @quote_char + @io.gets(@quote_char, @field_size_limit)
         end
         raise MalformedCSVError, "Illegal quoting on line #{@lineno}."
       end
       # Read @io and return everything until we hit a newline or a quote character.
       # Raise an error if we exceed @field_size_limit.
+      #
       # Only successfully returns if the read contains @row_sep, ends with a
       # @quote_char, or reaches @io.eof? This prevents us from returning only part
       # of a multibyte character.
+      #
       # If we have multibyte encoding then it is possible that a @quote_char will be
       # truncated on @io.gets. We only check for this if we exhaust the data buffer.
       def io_get_unquoted
         unless @data_buf
           return unless @data_buf = @io.gets(@quote_char, @io_read_limit)
         end
         loop do
           if newline = @data_buf.index(@row_sep)
             break if newline == @data_buf.size - @row_sep.size
             return @data_buf.slice!(0, newline + @row_sep.size)
           end
           break if @io.eof? || @data_buf.end_with?(@quote_char)
           if @field_size_limit && @data_buf.size > @field_size_limit
             raise MalformedCSVError, "Field size exceeded on line #{@lineno}."
           end
           @data_buf += @io.gets(@quote_char, @io_read_limit) unless align_data_buf
         end
         return_buf = @data_buf
         @data_buf = nil
         return_buf
       end
       # Fetch up to 10 bytes into @data_buf until the string matches its encoding.
       # Returns a value only if we modified the buffer.
       def align_data_buf
         return if @data_buf.valid_encoding? || @io.eof?
 .times do
           break true if @data_buf.valid_encoding? || @io.eof?
           @data_buf += @io.read(1).force_encoding(raw_encoding)
         end
       end
       def data_buf_eof?
         @io.eof? && !@data_buf
       end
+      #
       # Stores the indicated separators for later use.
+      #
       # If auto-discovery was requested for <tt>@row_sep</tt>, this method will read
-...
         # store the parser behaviors
         @skip_blanks      = options.delete(:skip_blanks)
         @field_size_limit = options.delete(:field_size_limit)
         # prebuild Regexps for faster parsing
         esc_row_sep = escape_re(@row_sep)
         esc_quote   = escape_re(@quote_char)
         @parsers = {
           # for detecting parse errors
           quote_or_nl:    encode_re("[", esc_quote, "\r\n]"),
           nl_or_lf:       encode_re("[\r\n]"),
           stray_quote:    encode_re( "[^", esc_quote, "]", esc_quote,
                                      "[^", esc_quote, "]" ),
           # safer than chomp!()
           line_end:       encode_re(esc_row_sep, "\\z"),
           # illegal unquoted characters
           return_newline: encode_str("\r\n")
+        }
         @io_read_limit    = options.delete(:io_read_limit)
         @nl_lf            = encode_str("\r\n")
       end
+      #

« Previous
1
2
Next »

(1-1/2)

Project

General

Profile

Ruby » Ruby master

Feature #4017 » ruby_19_csv_parser_split_methods.patch