Project

General

Profile

csv_liberal_parsing.diff

bluej100 (Braden Anderson), 12/18/2015 01:47 AM

Download (5.5 KB)

View differences:

lib/csv.rb
1017 1017
  # <b><tt>:skip_blanks</tt></b>::        +false+
1018 1018
  # <b><tt>:force_quotes</tt></b>::       +false+
1019 1019
  # <b><tt>:skip_lines</tt></b>::         +nil+
1020
  # <b><tt>:liberal_parsing</tt></b>::    +false+
1020 1021
  #
1021 1022
  DEFAULT_OPTIONS = { col_sep:            ",",
1022 1023
                      row_sep:            :auto,
......
1029 1030
                      header_converters:  nil,
1030 1031
                      skip_blanks:        false,
1031 1032
                      force_quotes:       false,
1032
                      skip_lines:         nil }.freeze
1033
                      skip_lines:         nil,
1034
                      liberal_parsing:    false }.freeze
1033 1035

  
1034 1036
  #
1035 1037
  # This method will return a CSV instance, just like CSV::new(), but the
......
1495 1497
  #                                       a comment. If the passed object does
1496 1498
  #                                       not respond to <tt>match</tt>,
1497 1499
  #                                       <tt>ArgumentError</tt> is thrown.
1500
  # <b><tt>:liberal_parsing</tt></b>::    When set to a +true+ value, CSV will
1501
  #                                       attempt to parse input not conformant
1502
  #                                       with RFC 4180, such as double quotes
1503
  #                                       in unquoted fields.
1498 1504
  #
1499 1505
  # See CSV::DEFAULT_OPTIONS for the default settings.
1500 1506
  #
......
1618 1624
  def skip_blanks?()        @skip_blanks        end
1619 1625
  # Returns +true+ if all output fields are quoted. See CSV::new for details.
1620 1626
  def force_quotes?()       @force_quotes       end
1627
  # Returns +true+ if illegal input is handled. See CSV::new for details.
1628
  def liberal_parsing?()    @liberal_parsing    end
1621 1629

  
1622 1630
  #
1623 1631
  # The Encoding CSV is parsing or writing in.  This will be the Encoding you
......
1855 1863
            csv.last << @col_sep
1856 1864
          end
1857 1865
        elsif part[0] == @quote_char
1858
          # If we are staring a new quoted column
1859
          if part[-1] != @quote_char || part.count(@quote_char) % 2 != 0
1866
          # If we are starting a new quoted column
1867
          if part.count(@quote_char) % 2 != 0
1860 1868
            # start an extended column
1861 1869
            csv             << part[1..-1]
1862 1870
            csv.last        << @col_sep
1863 1871
            in_extended_col =  true
1864
          else
1872
          elsif part[-1] == @quote_char
1865 1873
            # regular quoted column
1866 1874
            csv << part[1..-2]
1867 1875
            if csv.last =~ @parsers[:stray_quote]
......
1869 1877
                    "Missing or stray quote in line #{lineno + 1}"
1870 1878
            end
1871 1879
            csv.last.gsub!(@quote_char * 2, @quote_char)
1880
          elsif @liberal_parsing
1881
            csv << part
1882
          else
1883
            raise MalformedCSVError,
1884
                  "Missing or stray quote in line #{lineno + 1}"
1872 1885
          end
1873 1886
        elsif part =~ @parsers[:quote_or_nl]
1874 1887
          # Unquoted field with bad characters.
......
1876 1889
            raise MalformedCSVError, "Unquoted fields do not allow " +
1877 1890
                                     "\\r or \\n (line #{lineno + 1})."
1878 1891
          else
1879
            raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
1892
            if @liberal_parsing
1893
              csv << part
1894
            else
1895
              raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
1896
            end
1880 1897
          end
1881 1898
        else
1882 1899
          # Regular ole unquoted field.
......
1941 1958
    str << " encoding:" << @encoding.name
1942 1959
    # show other attributes
1943 1960
    %w[ lineno     col_sep     row_sep
1944
        quote_char skip_blanks ].each do |attr_name|
1961
        quote_char skip_blanks liberal_parsing ].each do |attr_name|
1945 1962
      if a = instance_variable_get("@#{attr_name}")
1946 1963
        str << " " << attr_name << ":" << a.inspect
1947 1964
      end
......
2075 2092
    # store the parser behaviors
2076 2093
    @skip_blanks      = options.delete(:skip_blanks)
2077 2094
    @field_size_limit = options.delete(:field_size_limit)
2095
    @liberal_parsing  = options.delete(:liberal_parsing)
2078 2096

  
2079 2097
    # prebuild Regexps for faster parsing
2080 2098
    esc_row_sep = escape_re(@row_sep)
test/csv/test_features.rb
142 142
    assert_equal(3, count)
143 143
  end
144 144

  
145
  def test_liberal_parsing
146
    input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson'
147
    assert_raise(CSV::MalformedCSVError) do
148
        CSV.parse_line(input)
149
    end
150
    assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'],
151
                 CSV.parse_line(input, liberal_parsing: true))
152

  
153
    input = '"quoted" field'
154
    assert_raise(CSV::MalformedCSVError) do
155
        CSV.parse_line(input)
156
    end
157
    assert_equal(['"quoted" field'],
158
                 CSV.parse_line(input, liberal_parsing: true))
159

  
160
    assert_raise(CSV::MalformedCSVError) do
161
      CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true)
162
    end
163

  
164
    assert_equal(["is", 'this "three', ' or four"', "fields"],
165
      CSV.parse_line('is,this "three, or four",fields', liberal_parsing: true))
166
  end
167

  
145 168
  def test_csv_behavior_readers
146 169
    %w[ unconverted_fields return_headers write_headers
147 170
        skip_blanks        force_quotes ].each do |behavior|