Project

General

Profile

csv_liberal_parsing.diff

bluej100 (Braden Anderson), 12/19/2015 03:42 PM

Download (5.14 KB)

View differences:

lib/csv.rb
1019 1019
  # <b><tt>:skip_blanks</tt></b>::        +false+
1020 1020
  # <b><tt>:force_quotes</tt></b>::       +false+
1021 1021
  # <b><tt>:skip_lines</tt></b>::         +nil+
1022
  # <b><tt>:liberal_parsing</tt></b>::    +false+
1022 1023
  #
1023 1024
  DEFAULT_OPTIONS = {
1024 1025
    col_sep:            ",",
......
1033 1034
    skip_blanks:        false,
1034 1035
    force_quotes:       false,
1035 1036
    skip_lines:         nil,
1037
    liberal_parsing:    false,
1036 1038
  }.freeze
1037 1039

  
1038 1040
  #
......
1499 1501
  #                                       a comment. If the passed object does
1500 1502
  #                                       not respond to <tt>match</tt>,
1501 1503
  #                                       <tt>ArgumentError</tt> is thrown.
1504
  # <b><tt>:liberal_parsing</tt></b>::    When set to a +true+ value, CSV will
1505
  #                                       attempt to parse input not conformant
1506
  #                                       with RFC 4180, such as double quotes
1507
  #                                       in unquoted fields.
1502 1508
  #
1503 1509
  # See CSV::DEFAULT_OPTIONS for the default settings.
1504 1510
  #
......
1622 1628
  def skip_blanks?()        @skip_blanks        end
1623 1629
  # Returns +true+ if all output fields are quoted. See CSV::new for details.
1624 1630
  def force_quotes?()       @force_quotes       end
1631
  # Returns +true+ if illegal input is handled. See CSV::new for details.
1632
  def liberal_parsing?()    @liberal_parsing    end
1625 1633

  
1626 1634
  #
1627 1635
  # The Encoding CSV is parsing or writing in.  This will be the Encoding you
......
1860 1868
          end
1861 1869
        elsif part[0] == @quote_char
1862 1870
          # If we are starting a new quoted column
1863
          if part[-1] != @quote_char || part.count(@quote_char) % 2 != 0
1871
          if part.count(@quote_char) % 2 != 0
1864 1872
            # start an extended column
1865 1873
            csv             << part[1..-1]
1866 1874
            csv.last        << @col_sep
1867 1875
            in_extended_col =  true
1868
          else
1876
          elsif part[-1] == @quote_char
1869 1877
            # regular quoted column
1870 1878
            csv << part[1..-2]
1871 1879
            if csv.last =~ @parsers[:stray_quote]
......
1873 1881
                    "Missing or stray quote in line #{lineno + 1}"
1874 1882
            end
1875 1883
            csv.last.gsub!(@quote_char * 2, @quote_char)
1884
          elsif @liberal_parsing
1885
            csv << part
1886
          else
1887
            raise MalformedCSVError,
1888
                  "Missing or stray quote in line #{lineno + 1}"
1876 1889
          end
1877 1890
        elsif part =~ @parsers[:quote_or_nl]
1878 1891
          # Unquoted field with bad characters.
......
1880 1893
            raise MalformedCSVError, "Unquoted fields do not allow " +
1881 1894
                                     "\\r or \\n (line #{lineno + 1})."
1882 1895
          else
1883
            raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
1896
            if @liberal_parsing
1897
              csv << part
1898
            else
1899
              raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
1900
            end
1884 1901
          end
1885 1902
        else
1886 1903
          # Regular ole unquoted field.
......
1945 1962
    str << " encoding:" << @encoding.name
1946 1963
    # show other attributes
1947 1964
    %w[ lineno     col_sep     row_sep
1948
        quote_char skip_blanks ].each do |attr_name|
1965
        quote_char skip_blanks liberal_parsing ].each do |attr_name|
1949 1966
      if a = instance_variable_get("@#{attr_name}")
1950 1967
        str << " " << attr_name << ":" << a.inspect
1951 1968
      end
......
2079 2096
    # store the parser behaviors
2080 2097
    @skip_blanks      = options.delete(:skip_blanks)
2081 2098
    @field_size_limit = options.delete(:field_size_limit)
2099
    @liberal_parsing  = options.delete(:liberal_parsing)
2082 2100

  
2083 2101
    # prebuild Regexps for faster parsing
2084 2102
    esc_row_sep = escape_re(@row_sep)
test/csv/test_features.rb
142 142
    assert_equal(3, count)
143 143
  end
144 144

  
145
  def test_liberal_parsing
146
    input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson'
147
    assert_raise(CSV::MalformedCSVError) do
148
        CSV.parse_line(input)
149
    end
150
    assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'],
151
                 CSV.parse_line(input, liberal_parsing: true))
152

  
153
    input = '"quoted" field'
154
    assert_raise(CSV::MalformedCSVError) do
155
        CSV.parse_line(input)
156
    end
157
    assert_equal(['"quoted" field'],
158
                 CSV.parse_line(input, liberal_parsing: true))
159

  
160
    assert_raise(CSV::MalformedCSVError) do
161
      CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true)
162
    end
163

  
164
    assert_equal(["is", 'this "three', ' or four"', "fields"],
165
      CSV.parse_line('is,this "three, or four",fields', liberal_parsing: true))
166
  end
167

  
145 168
  def test_csv_behavior_readers
146 169
    %w[ unconverted_fields return_headers write_headers
147 170
        skip_blanks        force_quotes ].each do |behavior|