diff --git a/ChangeLog b/ChangeLog index f0de059..a94f3c1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,155 @@ +Mon Jul 18 13:36:47 2011 Aaron Patterson + + * ext/psych/lib/psych.rb: define a new BadAlias error class. + + * ext/psych/lib/psych/visitors/to_ruby.rb: raise an exception when + deserializing an alias that does not exist. + + * test/psych/test_merge_keys.rb: corresponding test. + +Fri Mar 9 06:29:22 2012 Aaron Patterson + + * ext/psych/lib/psych.rb (load, parse): stop parsing or loading after + the first document has been parsed. + + * test/psych/test_stream.rb: pertinent tests. + +Fri Mar 9 06:17:05 2012 Aaron Patterson + + * ext/psych/lib/psych.rb (parse_stream, load_stream): if a block is + given, documents will be yielded to the block as they are parsed. + [ruby-core:42404] [Bug #5978] + + * ext/psych/lib/psych/handlers/document_stream.rb: add a handler that + yields documents as they are parsed + + * test/psych/test_stream.rb: corresponding tests. + +Tue Mar 6 02:31:20 2012 Aaron Patterson + + * ext/psych/lib/psych/core_ext.rb: only extend Kernel if IRB is loaded + in order to stop method pollution. + +Tue Feb 28 10:28:51 2012 Aaron Patterson + + * ext/psych/lib/psych.rb: default open YAML files with utf8 external + encoding. [ruby-core:42967] + * test/psych/test_tainted.rb: ditto + +Fri Feb 24 13:54:33 2012 Aaron Patterson + + * ext/psych/parser.c: prevent a memory leak by protecting calls to + handler callbacks. + * test/psych/test_parser.rb: test to demonstrate leak. + +Fri Feb 24 08:08:38 2012 Aaron Patterson + + * ext/psych/parser.c: set parser encoding based on the YAML input + rather than user configuration. + * test/psych/test_encoding.rb: corresponding tests. + * test/psych/test_parser.rb: ditto + * test/psych/test_tainted.rb: ditto + +Fri Feb 10 03:41:31 2012 Aaron Patterson + + * ext/psych/parser.c: removed external encoding setter, allow parser + to be reused. + * ext/psych/lib/psych/parser.rb: added external encoding setter. + * test/psych/test_parser.rb: test parser reuse + +Wed Jan 18 12:49:15 2012 Aaron Patterson + + * ext/psych/lib/psych/visitors/to_ruby.rb: Added support for loading + subclasses of String with ivars + * ext/psych/lib/psych/visitors/yaml_tree.rb: Added support for dumping + subclasses of String with ivars + * test/psych/test_string.rb: corresponding tests + +Wed Jan 18 10:39:47 2012 Aaron Patterson + + * ext/psych/lib/psych/visitors/to_ruby.rb: Added ability to load array + subclasses with ivars. + * ext/psych/lib/psych/visitors/yaml_tree.rb: Added ability to dump + array subclasses with ivars. + * test/psych/test_array.rb: corresponding tests + +Wed Dec 21 02:25:36 2011 Aaron Patterson + + * ext/psych/emitter.c: fixing clang warnings. Thanks Joey! + +Sun Dec 18 12:42:48 2011 Aaron Patterson + + * ext/psych/lib/psych/visitors/to_ruby.rb: BigDecimals can be restored + from YAML. + * ext/psych/lib/psych/visitors/yaml_tree.rb: BigDecimals can be dumped + to YAML. + * test/psych/test_numeric.rb: tests for BigDecimal serialization + +Sun Dec 18 12:03:13 2011 Aaron Patterson + + * ext/psych/lib/psych/scalar_scanner.rb: Strings that look like dates + should be treated as strings and not dates. + + * test/psych/test_scalar_scanner.rb: corresponding tests. + +Wed Dec 7 08:04:31 2011 Aaron Patterson + + * ext/psych/lib/psych.rb (module Psych): parse and load methods take + an optional file name that is used when raising Psych::SyntaxError + exceptions + * ext/psych/lib/psych/syntax_error.rb (module Psych): allow nil file + names and handle nil file names in the exception message + * test/psych/test_exception.rb (module Psych): Tests for changes. + +Wed Nov 30 09:09:37 2011 Aaron Patterson + + * ext/psych/parser.c (parse): parse method can take an option file + name for use in exception messages. + * test/psych/test_parser.rb: corresponding tests. + +Tue Nov 22 04:46:22 2011 Aaron Patterson + + * ext/psych/lib/psych.rb: remove autoload from psych + * ext/psych/lib/psych/json.rb: ditto + +Wed Nov 9 04:52:16 2011 Aaron Patterson + + * ext/psych/lib/psych/tree_builder.rb: dump complex numbers, + rationals, etc with reference ids. + * ext/psych/lib/psych/visitors/yaml_tree.rb: ditto + * ext/psych/lib/psych/visitors/to_ruby.rb: loading complex numbers, + rationals, etc with reference ids. + * test/psych/test_object_references.rb: corresponding tests + +Mon Nov 7 20:31:52 2011 Aaron Patterson + + * ext/psych/lib/psych/scalar_scanner.rb: make sure strings that look + like base 60 numbers are serialized as quoted strings. + * test/psych/test_string.rb: test for change. + +Thu Oct 27 08:47:38 2011 Martin Bosslet + + * ext/psych/parser.c: remove unused variable. + +Wed Oct 5 02:50:27 2011 Aaron Patterson + + * ext/psych/lib/psych/syntax_error.rb: Add file, line, offset, and + message attributes during parse failure. + * ext/psych/parser.c: Update parser to raise exception with correct + values. + * test/psych/test_exception.rb: corresponding tests. + +Wed Oct 5 01:52:16 2011 Aaron Patterson + + * ext/psych/parser.c (parse): Use context_mark for indicating error + line and column. + +Wed Oct 5 01:22:08 2011 Aaron Patterson + + * ext/psych/lib/psych/scalar_scanner.rb: use normal begin / rescue + since postfix rescue cannot receive the exception class. Thanks + nagachika! + Tue Mar 27 22:22:50 2012 Nobuyoshi Nakada * configure.in (RUBY_STACK_GROW_DIRECTION): substitute CPU name as diff --git a/ext/psych/emitter.c b/ext/psych/emitter.c index a85fa45..15fdcfe 100644 --- a/ext/psych/emitter.c +++ b/ext/psych/emitter.c @@ -351,7 +351,7 @@ static VALUE start_mapping( (yaml_char_t *)(NIL_P(anchor) ? NULL : StringValuePtr(anchor)), (yaml_char_t *)(NIL_P(tag) ? NULL : StringValuePtr(tag)), implicit ? 1 : 0, - (yaml_sequence_style_t)NUM2INT(style) + (yaml_mapping_style_t)NUM2INT(style) ); emit(emitter, &event); diff --git a/ext/psych/lib/psych.rb b/ext/psych/lib/psych.rb index f9052f9..df3acc0 100644 --- a/ext/psych/lib/psych.rb +++ b/ext/psych/lib/psych.rb @@ -10,7 +10,10 @@ require 'psych/set' require 'psych/coder' require 'psych/core_ext' require 'psych/deprecated' -require 'psych/json' +require 'psych/stream' +require 'psych/json/tree_builder' +require 'psych/json/stream' +require 'psych/handlers/document_stream' ### # = Overview @@ -98,39 +101,66 @@ module Psych class Exception < RuntimeError end - autoload :Stream, 'psych/stream' + class BadAlias < Exception + end ### # Load +yaml+ in to a Ruby data structure. If multiple documents are # provided, the object contained in the first document will be returned. + # +filename+ will be used in the exception message if any exception is raised + # while parsing. + # + # Raises a Psych::SyntaxError when a YAML syntax error is detected. # # Example: # - # Psych.load("--- a") # => 'a' - # Psych.load("---\n - a\n - b") # => ['a', 'b'] - def self.load yaml - result = parse(yaml) + # Psych.load("--- a") # => 'a' + # Psych.load("---\n - a\n - b") # => ['a', 'b'] + # + # begin + # Psych.load("--- `", "file.txt") + # rescue Psych::SyntaxError => ex + # ex.file # => 'file.txt' + # ex.message # => "(foo.txt): found character that cannot start any token" + # end + def self.load yaml, filename = nil + result = parse(yaml, filename) result ? result.to_ruby : result end ### # Parse a YAML string in +yaml+. Returns the first object of a YAML AST. + # +filename+ is used in the exception message if a Psych::SyntaxError is + # raised. + # + # Raises a Psych::SyntaxError when a YAML syntax error is detected. # # Example: # # Psych.parse("---\n - a\n - b") # => # # + # begin + # Psych.parse("--- `", "file.txt") + # rescue Psych::SyntaxError => ex + # ex.file # => 'file.txt' + # ex.message # => "(foo.txt): found character that cannot start any token" + # end + # # See Psych::Nodes for more information about YAML AST. - def self.parse yaml - children = parse_stream(yaml).children - children.empty? ? false : children.first.children.first + def self.parse yaml, filename = nil + parse_stream(yaml, filename) do |node| + return node + end + false end ### # Parse a file at +filename+. Returns the YAML AST. + # + # Raises a Psych::SyntaxError when a YAML syntax error is detected. def self.parse_file filename - File.open filename do |f| - parse f + File.open filename, 'r:bom|utf-8' do |f| + parse f, filename end end @@ -143,16 +173,39 @@ module Psych ### # Parse a YAML string in +yaml+. Returns the full AST for the YAML document. # This method can handle multiple YAML documents contained in +yaml+. + # +filename+ is used in the exception message if a Psych::SyntaxError is + # raised. + # + # If a block is given, a Psych::Nodes::Document node will be yielded to the + # block as it's being parsed. + # + # Raises a Psych::SyntaxError when a YAML syntax error is detected. # # Example: # # Psych.parse_stream("---\n - a\n - b") # => # # + # Psych.parse_stream("--- a\n--- b") do |node| + # node # => # + # end + # + # begin + # Psych.parse_stream("--- `", "file.txt") + # rescue Psych::SyntaxError => ex + # ex.file # => 'file.txt' + # ex.message # => "(foo.txt): found character that cannot start any token" + # end + # # See Psych::Nodes for more information about YAML AST. - def self.parse_stream yaml - parser = self.parser - parser.parse yaml - parser.handler.root + def self.parse_stream yaml, filename = nil, &block + if block_given? + parser = Psych::Parser.new(Handlers::DocumentStream.new(&block)) + parser.parse yaml, filename + else + parser = self.parser + parser.parse yaml, filename + parser.handler.root + end end ### @@ -214,19 +267,34 @@ module Psych ### # Load multiple documents given in +yaml+. Returns the parsed documents - # as a list. For example: + # as a list. If a block is given, each document will be converted to ruby + # and passed to the block during parsing + # + # Example: # # Psych.load_stream("--- foo\n...\n--- bar\n...") # => ['foo', 'bar'] # - def self.load_stream yaml - parse_stream(yaml).children.map { |child| child.to_ruby } + # list = [] + # Psych.load_stream("--- foo\n...\n--- bar\n...") do |ruby| + # list << ruby + # end + # list # => ['foo', 'bar'] + # + def self.load_stream yaml, filename = nil + if block_given? + parse_stream(yaml, filename) do |node| + yield node.to_ruby + end + else + parse_stream(yaml, filename).children.map { |child| child.to_ruby } + end end ### # Load the document contained in +filename+. Returns the yaml contained in # +filename+ as a ruby object def self.load_file filename - File.open(filename) { |f| self.load f } + File.open(filename, 'r:bom|utf-8') { |f| self.load f, filename } end # :stopdoc: diff --git a/ext/psych/lib/psych/core_ext.rb b/ext/psych/lib/psych/core_ext.rb index 2ad75e1..4a04c2d 100644 --- a/ext/psych/lib/psych/core_ext.rb +++ b/ext/psych/lib/psych/core_ext.rb @@ -30,6 +30,7 @@ class Module alias :yaml_as :psych_yaml_as end +if defined?(::IRB) module Kernel def psych_y *objects puts Psych.dump_stream(*objects) @@ -38,3 +39,4 @@ module Kernel alias y psych_y private :y end +end diff --git a/ext/psych/lib/psych/handlers/document_stream.rb b/ext/psych/lib/psych/handlers/document_stream.rb new file mode 100644 index 0000000..e429993 --- /dev/null +++ b/ext/psych/lib/psych/handlers/document_stream.rb @@ -0,0 +1,22 @@ +require 'psych/tree_builder' + +module Psych + module Handlers + class DocumentStream < Psych::TreeBuilder # :nodoc: + def initialize &block + super + @block = block + end + + def start_document version, tag_directives, implicit + n = Nodes::Document.new version, tag_directives, implicit + push n + end + + def end_document implicit_end = !streaming? + @last.implicit_end = implicit_end + @block.call pop + end + end + end +end diff --git a/ext/psych/lib/psych/json.rb b/ext/psych/lib/psych/json.rb deleted file mode 100644 index 412ab27..0000000 --- a/ext/psych/lib/psych/json.rb +++ /dev/null @@ -1,6 +0,0 @@ -module Psych - module JSON - autoload :TreeBuilder, 'psych/json/tree_builder' - autoload :Stream, 'psych/json/stream' - end -end diff --git a/ext/psych/lib/psych/parser.rb b/ext/psych/lib/psych/parser.rb index 5d75605..84085f1 100644 --- a/ext/psych/lib/psych/parser.rb +++ b/ext/psych/lib/psych/parser.rb @@ -36,12 +36,16 @@ module Psych # The handler on which events will be called attr_accessor :handler + # Set the encoding for this parser to +encoding+ + attr_writer :external_encoding + ### # Creates a new Psych::Parser instance with +handler+. YAML events will # be called on +handler+. See Psych::Parser for more details. def initialize handler = Handler.new @handler = handler + @external_encoding = ANY end end end diff --git a/ext/psych/lib/psych/scalar_scanner.rb b/ext/psych/lib/psych/scalar_scanner.rb index 3e8acbb..fa2d385 100644 --- a/ext/psych/lib/psych/scalar_scanner.rb +++ b/ext/psych/lib/psych/scalar_scanner.rb @@ -46,9 +46,13 @@ module Psych end when TIME parse_time string - when /^\d{4}-\d{1,2}-\d{1,2}$/ + when /^\d{4}-(?:1[012]|0\d|\d)-(?:[12]\d|3[01]|0\d|\d)$/ require 'date' - Date.strptime(string, '%Y-%m-%d') + begin + Date.strptime(string, '%Y-%m-%d') + rescue ArgumentError + string + end when /^\.inf$/i 1 / 0.0 when /^-\.inf$/i @@ -61,7 +65,7 @@ module Psych else string.sub(/^:/, '').to_sym end - when /^[-+]?[1-9][0-9_]*(:[0-5]?[0-9])+$/ + when /^[-+]?[0-9][0-9_]*(:[0-5]?[0-9])+$/ i = 0 string.split(':').each_with_index do |n,e| i += (n.to_i * 60 ** (e - 2).abs) @@ -74,13 +78,19 @@ module Psych end i when FLOAT - return Float(string.gsub(/[,_]/, '')) rescue ArgumentError + begin + return Float(string.gsub(/[,_]/, '')) + rescue ArgumentError + end @string_cache[string] = true string else if string.count('.') < 2 - return Integer(string.gsub(/[,_]/, '')) rescue ArgumentError + begin + return Integer(string.gsub(/[,_]/, '')) + rescue ArgumentError + end end @string_cache[string] = true diff --git a/ext/psych/lib/psych/syntax_error.rb b/ext/psych/lib/psych/syntax_error.rb new file mode 100644 index 0000000..f79743d --- /dev/null +++ b/ext/psych/lib/psych/syntax_error.rb @@ -0,0 +1,19 @@ +module Psych + class SyntaxError < ::SyntaxError + attr_reader :file, :line, :column, :offset, :problem, :context + + def initialize file, line, col, offset, problem, context + err = [problem, context].compact.join ' ' + filename = file || '' + message = "(%s): %s at line %d column %d" % [filename, err, line, col] + + @file = file + @line = line + @column = col + @offset = offset + @problem = problem + @context = context + super(message) + end + end +end diff --git a/ext/psych/lib/psych/tree_builder.rb b/ext/psych/lib/psych/tree_builder.rb index 8b4e972..c8f3447 100644 --- a/ext/psych/lib/psych/tree_builder.rb +++ b/ext/psych/lib/psych/tree_builder.rb @@ -72,7 +72,9 @@ module Psych end def scalar value, anchor, tag, plain, quoted, style - @last.children << Nodes::Scalar.new(value,anchor,tag,plain,quoted,style) + s = Nodes::Scalar.new(value,anchor,tag,plain,quoted,style) + @last.children << s + s end def alias anchor diff --git a/ext/psych/lib/psych/visitors/to_ruby.rb b/ext/psych/lib/psych/visitors/to_ruby.rb index ca046c5..3db67a3 100644 --- a/ext/psych/lib/psych/visitors/to_ruby.rb +++ b/ext/psych/lib/psych/visitors/to_ruby.rb @@ -31,9 +31,7 @@ module Psych result end - def visit_Psych_Nodes_Scalar o - @st[o.anchor] = o.value if o.anchor - + def deserialize o if klass = Psych.load_tags[o.tag] instance = klass.allocate @@ -52,8 +50,16 @@ module Psych case o.tag when '!binary', 'tag:yaml.org,2002:binary' o.value.unpack('m').first - when '!str', 'tag:yaml.org,2002:str' - o.value + when /^!(?:str|ruby\/string)(?::(.*))?/, 'tag:yaml.org,2002:str' + klass = resolve_class($1) + if klass + klass.allocate.replace o.value + else + o.value + end + when '!ruby/object:BigDecimal' + require 'bigdecimal' + BigDecimal._load o.value when "!ruby/object:DateTime" require 'date' @ss.parse_time(o.value).to_datetime @@ -92,6 +98,11 @@ module Psych @ss.tokenize o.value end end + private :deserialize + + def visit_Psych_Nodes_Scalar o + register o, deserialize(o) + end def visit_Psych_Nodes_Sequence o if klass = Psych.load_tags[o.tag] @@ -108,15 +119,18 @@ module Psych case o.tag when '!omap', 'tag:yaml.org,2002:omap' - map = Psych::Omap.new - @st[o.anchor] = map if o.anchor + map = register(o, Psych::Omap.new) o.children.each { |a| map[accept(a.children.first)] = accept a.children.last } map + when /^!(?:seq|ruby\/array):(.*)$/ + klass = resolve_class($1) + list = register(o, klass.allocate) + o.children.each { |c| list.push accept c } + list else - list = [] - @st[o.anchor] = list if o.anchor + list = register(o, []) o.children.each { |c| list.push accept c } list end @@ -127,16 +141,33 @@ module Psych return revive_hash({}, o) unless o.tag case o.tag - when '!str', 'tag:yaml.org,2002:str' + when /^!(?:str|ruby\/string)(?::(.*))?/, 'tag:yaml.org,2002:str' + klass = resolve_class($1) members = Hash[*o.children.map { |c| accept c }] string = members.delete 'str' + + if klass + string = klass.allocate + string.replace string + end + init_with(string, members.map { |k,v| [k.to_s.sub(/^@/, ''),v] }, o) + when /^!ruby\/array:(.*)$/ + klass = resolve_class($1) + list = register(o, klass.allocate) + + members = Hash[o.children.map { |c| accept c }.each_slice(2).to_a] + list.replace members['internal'] + + members['ivars'].each do |ivar, v| + list.instance_variable_set ivar, v + end + list when /^!ruby\/struct:?(.*)?$/ klass = resolve_class($1) if klass - s = klass.allocate - @st[o.anchor] = s if o.anchor + s = register(o, klass.allocate) members = {} struct_members = s.members.map { |x| x.to_sym } @@ -158,7 +189,7 @@ module Psych when '!ruby/range' h = Hash[*o.children.map { |c| accept c }] - Range.new(h['begin'], h['end'], h['excl']) + register o, Range.new(h['begin'], h['end'], h['excl']) when /^!ruby\/exception:?(.*)?$/ h = Hash[*o.children.map { |c| accept c }] @@ -177,11 +208,11 @@ module Psych when '!ruby/object:Complex' h = Hash[*o.children.map { |c| accept c }] - Complex(h['real'], h['image']) + register o, Complex(h['real'], h['image']) when '!ruby/object:Rational' h = Hash[*o.children.map { |c| accept c }] - Rational(h['numerator'], h['denominator']) + register o, Rational(h['numerator'], h['denominator']) when /^!ruby\/object:?(.*)?$/ name = $1 || 'Object' @@ -205,10 +236,15 @@ module Psych end def visit_Psych_Nodes_Alias o - @st[o.anchor] + @st.fetch(o.anchor) { raise BadAlias, "Unknown alias: #{o.anchor}" } end private + def register node, object + @st[node.anchor] = object if node.anchor + object + end + def revive_hash hash, o @st[o.anchor] = hash if o.anchor diff --git a/ext/psych/lib/psych/visitors/yaml_tree.rb b/ext/psych/lib/psych/visitors/yaml_tree.rb index 5a09285..80af046 100644 --- a/ext/psych/lib/psych/visitors/yaml_tree.rb +++ b/ext/psych/lib/psych/visitors/yaml_tree.rb @@ -159,13 +159,13 @@ module Psych end def visit_Regexp o - @emitter.scalar o.inspect, nil, '!ruby/regexp', false, false, Nodes::Scalar::ANY + register o, @emitter.scalar(o.inspect, nil, '!ruby/regexp', false, false, Nodes::Scalar::ANY) end def visit_DateTime o formatted = format_time o.to_time tag = '!ruby/object:DateTime' - @emitter.scalar formatted, nil, tag, false, false, Nodes::Scalar::ANY + register o, @emitter.scalar(formatted, nil, tag, false, false, Nodes::Scalar::ANY) end def visit_Time o @@ -174,7 +174,7 @@ module Psych end def visit_Rational o - @emitter.start_mapping(nil, '!ruby/object:Rational', false, Nodes::Mapping::BLOCK) + register o, @emitter.start_mapping(nil, '!ruby/object:Rational', false, Nodes::Mapping::BLOCK) [ 'denominator', o.denominator.to_s, @@ -187,7 +187,7 @@ module Psych end def visit_Complex o - @emitter.start_mapping(nil, '!ruby/object:Complex', false, Nodes::Mapping::BLOCK) + register o, @emitter.start_mapping(nil, '!ruby/object:Complex', false, Nodes::Mapping::BLOCK) ['real', o.real.to_s, 'image', o.imag.to_s].each do |m| @emitter.scalar m, nil, nil, true, false, Nodes::Scalar::ANY @@ -214,6 +214,10 @@ module Psych end end + def visit_BigDecimal o + @emitter.scalar o._dump, nil, '!ruby/object:BigDecimal', false, false, Nodes::Scalar::ANY + end + def binary? string string.encoding == Encoding::ASCII_8BIT || string.index("\x00") || @@ -241,9 +245,15 @@ module Psych ivars = find_ivars o if ivars.empty? + unless o.class == ::String + tag = "!ruby/string:#{o.class}" + end @emitter.scalar str, nil, tag, plain, quote, style else - @emitter.start_mapping nil, '!str', false, Nodes::Mapping::BLOCK + maptag = '!ruby/string' + maptag << ":#{o.class}" unless o.class == ::String + + @emitter.start_mapping nil, maptag, false, Nodes::Mapping::BLOCK @emitter.scalar 'str', nil, nil, true, false, Nodes::Scalar::ANY @emitter.scalar str, nil, tag, plain, quote, style @@ -255,16 +265,16 @@ module Psych def visit_Module o raise TypeError, "can't dump anonymous module: #{o}" unless o.name - @emitter.scalar o.name, nil, '!ruby/module', false, false, Nodes::Scalar::SINGLE_QUOTED + register o, @emitter.scalar(o.name, nil, '!ruby/module', false, false, Nodes::Scalar::SINGLE_QUOTED) end def visit_Class o raise TypeError, "can't dump anonymous class: #{o}" unless o.name - @emitter.scalar o.name, nil, '!ruby/class', false, false, Nodes::Scalar::SINGLE_QUOTED + register o, @emitter.scalar(o.name, nil, '!ruby/class', false, false, Nodes::Scalar::SINGLE_QUOTED) end def visit_Range o - @emitter.start_mapping nil, '!ruby/range', false, Nodes::Mapping::BLOCK + register o, @emitter.start_mapping(nil, '!ruby/range', false, Nodes::Mapping::BLOCK) ['begin', o.begin, 'end', o.end, 'excl', o.exclude_end?].each do |m| accept m end @@ -297,9 +307,13 @@ module Psych end def visit_Array o - register o, @emitter.start_sequence(nil, nil, true, Nodes::Sequence::BLOCK) - o.each { |c| accept c } - @emitter.end_sequence + if o.class == ::Array + register o, @emitter.start_sequence(nil, nil, true, Nodes::Sequence::BLOCK) + o.each { |c| accept c } + @emitter.end_sequence + else + visit_array_subclass o + end end def visit_NilClass o @@ -311,6 +325,39 @@ module Psych end private + def visit_array_subclass o + tag = "!ruby/array:#{o.class}" + if o.instance_variables.empty? + node = @emitter.start_sequence(nil, tag, false, Nodes::Sequence::BLOCK) + register o, node + o.each { |c| accept c } + @emitter.end_sequence + else + node = @emitter.start_mapping(nil, tag, false, Nodes::Sequence::BLOCK) + register o, node + + # Dump the internal list + accept 'internal' + @emitter.start_sequence(nil, nil, true, Nodes::Sequence::BLOCK) + o.each { |c| accept c } + @emitter.end_sequence + + # Dump the ivars + accept 'ivars' + @emitter.start_mapping(nil, nil, true, Nodes::Sequence::BLOCK) + o.instance_variables.each do |ivar| + accept ivar + accept o.instance_variable_get ivar + end + @emitter.end_mapping + + @emitter.end_mapping + end + end + + def dump_list o + end + # '%:z' was no defined until 1.9.3 if RUBY_VERSION < '1.9.3' def format_time time diff --git a/ext/psych/parser.c b/ext/psych/parser.c index e68768f..9808c6b 100644 --- a/ext/psych/parser.c +++ b/ext/psych/parser.c @@ -59,6 +59,163 @@ static VALUE allocate(VALUE klass) return Data_Wrap_Struct(klass, 0, dealloc, parser); } +static VALUE make_exception(yaml_parser_t * parser, VALUE path) +{ + size_t line, column; + + line = parser->context_mark.line + 1; + column = parser->context_mark.column + 1; + + return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6, + path, + INT2NUM(line), + INT2NUM(column), + INT2NUM(parser->problem_offset), + parser->problem ? rb_usascii_str_new2(parser->problem) : Qnil, + parser->context ? rb_usascii_str_new2(parser->context) : Qnil); +} + +#ifdef HAVE_RUBY_ENCODING_H +static VALUE transcode_string(VALUE src, int * parser_encoding) +{ + int utf8 = rb_utf8_encindex(); + int utf16le = rb_enc_find_index("UTF16_LE"); + int utf16be = rb_enc_find_index("UTF16_BE"); + int source_encoding = rb_enc_get_index(src); + + if (source_encoding == utf8) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (source_encoding == utf16le) { + *parser_encoding = YAML_UTF16LE_ENCODING; + return src; + } + + if (source_encoding == utf16be) { + *parser_encoding = YAML_UTF16BE_ENCODING; + return src; + } + + src = rb_str_export_to_enc(src, rb_utf8_encoding()); + RB_GC_GUARD(src); + + *parser_encoding = YAML_UTF8_ENCODING; + return src; +} + +static VALUE transcode_io(VALUE src, int * parser_encoding) +{ + VALUE io_external_encoding; + int io_external_enc_index; + + io_external_encoding = rb_funcall(src, rb_intern("external_encoding"), 0); + + /* if no encoding is returned, assume ascii8bit. */ + if (NIL_P(io_external_encoding)) { + io_external_enc_index = rb_ascii8bit_encindex(); + } else { + io_external_enc_index = rb_to_encoding_index(io_external_encoding); + } + + /* Treat US-ASCII as utf_8 */ + if (io_external_enc_index == rb_usascii_encindex()) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (io_external_enc_index == rb_utf8_encindex()) { + *parser_encoding = YAML_UTF8_ENCODING; + return src; + } + + if (io_external_enc_index == rb_enc_find_index("UTF-16LE")) { + *parser_encoding = YAML_UTF16LE_ENCODING; + return src; + } + + if (io_external_enc_index == rb_enc_find_index("UTF-16BE")) { + *parser_encoding = YAML_UTF16BE_ENCODING; + return src; + } + + /* Just guess on ASCII-8BIT */ + if (io_external_enc_index == rb_ascii8bit_encindex()) { + *parser_encoding = YAML_ANY_ENCODING; + return src; + } + + rb_raise(rb_eArgError, "YAML file must be UTF-8, UTF-16LE, or UTF-16BE, not %s", + rb_enc_name(rb_enc_from_index(io_external_enc_index))); + + return Qnil; +} + +#endif + +static VALUE protected_start_stream(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_start_stream, 1, args[1]); +} + +static VALUE protected_start_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_document, 3, args + 1); +} + +static VALUE protected_end_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_end_document, 1, args[1]); +} + +static VALUE protected_alias(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_alias, 1, args[1]); +} + +static VALUE protected_scalar(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_scalar, 6, args + 1); +} + +static VALUE protected_start_sequence(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_sequence, 4, args + 1); +} + +static VALUE protected_end_sequence(VALUE handler) +{ + return rb_funcall(handler, id_end_sequence, 0); +} + +static VALUE protected_start_mapping(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_mapping, 4, args + 1); +} + +static VALUE protected_end_mapping(VALUE handler) +{ + return rb_funcall(handler, id_end_mapping, 0); +} + +static VALUE protected_empty(VALUE handler) +{ + return rb_funcall(handler, id_empty, 0); +} + +static VALUE protected_end_stream(VALUE handler) +{ + return rb_funcall(handler, id_end_stream, 0); +} + /* * call-seq: * parser.parse(yaml) @@ -68,27 +225,48 @@ static VALUE allocate(VALUE klass) * * See Psych::Parser and Psych::Parser#handler */ -static VALUE parse(VALUE self, VALUE yaml) +static VALUE parse(int argc, VALUE *argv, VALUE self) { + VALUE yaml, path; yaml_parser_t * parser; yaml_event_t event; int done = 0; int tainted = 0; + int state = 0; + int parser_encoding = YAML_ANY_ENCODING; #ifdef HAVE_RUBY_ENCODING_H int encoding = rb_utf8_encindex(); rb_encoding * internal_enc = rb_default_internal_encoding(); #endif VALUE handler = rb_iv_get(self, "@handler"); + if (rb_scan_args(argc, argv, "11", &yaml, &path) == 1) { + if(rb_respond_to(yaml, id_path)) + path = rb_funcall(yaml, id_path, 0); + else + path = rb_str_new2(""); + } + Data_Get_Struct(self, yaml_parser_t, parser); + yaml_parser_delete(parser); + yaml_parser_initialize(parser); + if (OBJ_TAINTED(yaml)) tainted = 1; - if(rb_respond_to(yaml, id_read)) { + if (rb_respond_to(yaml, id_read)) { +#ifdef HAVE_RUBY_ENCODING_H + yaml = transcode_io(yaml, &parser_encoding); + yaml_parser_set_encoding(parser, parser_encoding); +#endif yaml_parser_set_input(parser, io_reader, (void *)yaml); if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1; } else { StringValue(yaml); +#ifdef HAVE_RUBY_ENCODING_H + yaml = transcode_string(yaml, &parser_encoding); + yaml_parser_set_encoding(parser, parser_encoding); +#endif yaml_parser_set_input_string( parser, (const unsigned char *)RSTRING_PTR(yaml), @@ -98,32 +276,28 @@ static VALUE parse(VALUE self, VALUE yaml) while(!done) { if(!yaml_parser_parse(parser, &event)) { - VALUE path; - size_t line = parser->mark.line; - size_t column = parser->mark.column; - - if(rb_respond_to(yaml, id_path)) - path = rb_funcall(yaml, id_path, 0); - else - path = rb_str_new2(""); + VALUE exception; + exception = make_exception(parser, path); yaml_parser_delete(parser); yaml_parser_initialize(parser); - rb_raise(ePsychSyntaxError, "(%s): couldn't parse YAML at line %d column %d", - StringValuePtr(path), - (int)line, (int)column); + rb_exc_raise(exception); } switch(event.type) { - case YAML_STREAM_START_EVENT: - - rb_funcall(handler, id_start_stream, 1, - INT2NUM((long)event.data.stream_start.encoding) - ); - break; + case YAML_STREAM_START_EVENT: + { + VALUE args[2]; + + args[0] = handler; + args[1] = INT2NUM((long)event.data.stream_start.encoding); + rb_protect(protected_start_stream, (VALUE)args, &state); + } + break; case YAML_DOCUMENT_START_EVENT: { + VALUE args[4]; /* Get a list of tag directives (if any) */ VALUE tag_directives = rb_ary_new(); /* Grab the document version */ @@ -161,19 +335,25 @@ static VALUE parse(VALUE self, VALUE yaml) rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); } } - rb_funcall(handler, id_start_document, 3, - version, tag_directives, - event.data.document_start.implicit == 1 ? Qtrue : Qfalse - ); + args[0] = handler; + args[1] = version; + args[2] = tag_directives; + args[3] = event.data.document_start.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_start_document, (VALUE)args, &state); } break; case YAML_DOCUMENT_END_EVENT: - rb_funcall(handler, id_end_document, 1, - event.data.document_end.implicit == 1 ? Qtrue : Qfalse - ); + { + VALUE args[2]; + + args[0] = handler; + args[1] = event.data.document_end.implicit == 1 ? Qtrue : Qfalse; + rb_protect(protected_end_document, (VALUE)args, &state); + } break; case YAML_ALIAS_EVENT: { + VALUE args[2]; VALUE alias = Qnil; if(event.data.alias.anchor) { alias = rb_str_new2((const char *)event.data.alias.anchor); @@ -183,11 +363,14 @@ static VALUE parse(VALUE self, VALUE yaml) #endif } - rb_funcall(handler, id_alias, 1, alias); + args[0] = handler; + args[1] = alias; + rb_protect(protected_alias, (VALUE)args, &state); } break; case YAML_SCALAR_EVENT: { + VALUE args[7]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE plain_implicit, quoted_implicit, style; @@ -225,12 +408,19 @@ static VALUE parse(VALUE self, VALUE yaml) style = INT2NUM((long)event.data.scalar.style); - rb_funcall(handler, id_scalar, 6, - val, anchor, tag, plain_implicit, quoted_implicit, style); + args[0] = handler; + args[1] = val; + args[2] = anchor; + args[3] = tag; + args[4] = plain_implicit; + args[5] = quoted_implicit; + args[6] = style; + rb_protect(protected_scalar, (VALUE)args, &state); } break; case YAML_SEQUENCE_START_EVENT: { + VALUE args[5]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; @@ -256,15 +446,21 @@ static VALUE parse(VALUE self, VALUE yaml) style = INT2NUM((long)event.data.sequence_start.style); - rb_funcall(handler, id_start_sequence, 4, - anchor, tag, implicit, style); + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_sequence, (VALUE)args, &state); } break; case YAML_SEQUENCE_END_EVENT: - rb_funcall(handler, id_end_sequence, 0); + rb_protect(protected_end_sequence, handler, &state); break; case YAML_MAPPING_START_EVENT: { + VALUE args[5]; VALUE anchor = Qnil; VALUE tag = Qnil; VALUE implicit, style; @@ -289,22 +485,28 @@ static VALUE parse(VALUE self, VALUE yaml) style = INT2NUM((long)event.data.mapping_start.style); - rb_funcall(handler, id_start_mapping, 4, - anchor, tag, implicit, style); + args[0] = handler; + args[1] = anchor; + args[2] = tag; + args[3] = implicit; + args[4] = style; + + rb_protect(protected_start_mapping, (VALUE)args, &state); } break; case YAML_MAPPING_END_EVENT: - rb_funcall(handler, id_end_mapping, 0); + rb_protect(protected_end_mapping, handler, &state); break; case YAML_NO_EVENT: - rb_funcall(handler, id_empty, 0); + rb_protect(protected_empty, handler, &state); break; case YAML_STREAM_END_EVENT: - rb_funcall(handler, id_end_stream, 0); + rb_protect(protected_end_stream, handler, &state); done = 1; break; } yaml_event_delete(&event); + if (state) rb_jump_tag(state); } return self; @@ -312,29 +514,6 @@ static VALUE parse(VALUE self, VALUE yaml) /* * call-seq: - * parser.external_encoding=(encoding) - * - * Set the encoding for this parser to +encoding+ - */ -static VALUE set_external_encoding(VALUE self, VALUE encoding) -{ - yaml_parser_t * parser; - VALUE exception; - - Data_Get_Struct(self, yaml_parser_t, parser); - - if(parser->encoding) { - exception = rb_const_get_at(mPsych, rb_intern("Exception")); - rb_raise(exception, "don't set the encoding twice!"); - } - - yaml_parser_set_encoding(parser, NUM2INT(encoding)); - - return encoding; -} - -/* - * call-seq: * parser.mark # => # * * Returns a Psych::Parser::Mark object that contains line, column, and index @@ -376,11 +555,11 @@ void Init_psych_parser() /* UTF-16-BE Encoding with BOM */ rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING)); + rb_require("psych/syntax_error"); ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError); - rb_define_method(cPsychParser, "parse", parse, 1); + rb_define_method(cPsychParser, "parse", parse, -1); rb_define_method(cPsychParser, "mark", mark, 0); - rb_define_method(cPsychParser, "external_encoding=", set_external_encoding, 1); id_read = rb_intern("read"); id_path = rb_intern("path"); diff --git a/test/psych/test_array.rb b/test/psych/test_array.rb index ec6a1aa..9eedbb4 100644 --- a/test/psych/test_array.rb +++ b/test/psych/test_array.rb @@ -2,11 +2,39 @@ require 'psych/helper' module Psych class TestArray < TestCase + class X < Array + end + + class Y < Array + attr_accessor :val + end + def setup super @list = [{ :a => 'b' }, 'foo'] end + def test_subclass + yaml = Psych.dump X.new + assert_match X.name, yaml + + list = X.new + list << 1 + assert_equal X, list.class + assert_equal 1, list.first + end + + def test_subclass_with_attributes + y = Psych.load Psych.dump Y.new.tap {|y| y.val = 1} + assert_equal Y, y.class + assert_equal 1, y.val + end + + def test_backwards_with_syck + x = Psych.load "--- !seq:#{X.name} []\n\n" + assert_equal X, x.class + end + def test_self_referential @list << @list assert_cycle(@list) diff --git a/test/psych/test_encoding.rb b/test/psych/test_encoding.rb index a341c47..8efb676 100644 --- a/test/psych/test_encoding.rb +++ b/test/psych/test_encoding.rb @@ -31,6 +31,79 @@ module Psych @emitter = Psych::Emitter.new @buffer end + def test_transcode_shiftjis + str = "こんにちは!" + loaded = Psych.load("--- こんにちは!".encode('SHIFT_JIS')) + assert_equal str, loaded + end + + def test_transcode_utf16le + str = "こんにちは!" + loaded = Psych.load("--- こんにちは!".encode('UTF-16LE')) + assert_equal str, loaded + end + + def test_transcode_utf16be + str = "こんにちは!" + loaded = Psych.load("--- こんにちは!".encode('UTF-16BE')) + assert_equal str, loaded + end + + def test_io_shiftjis + t = Tempfile.new(['shiftjis', 'yml'], :encoding => 'SHIFT_JIS') + t.write '--- こんにちは!' + t.close + + # If the external encoding isn't utf8, utf16le, or utf16be, we cannot + # process the file. + File.open(t.path, 'r', :encoding => 'SHIFT_JIS') do |f| + assert_raises ArgumentError do + Psych.load(f) + end + end + + t.close(true) + end + + def test_io_utf16le + t = Tempfile.new(['utf16le', 'yml']) + t.binmode + t.write '--- こんにちは!'.encode('UTF-16LE') + t.close + + File.open(t.path, 'rb', :encoding => 'UTF-16LE') do |f| + assert_equal "こんにちは!", Psych.load(f) + end + + t.close(true) + end + + def test_io_utf16be + t = Tempfile.new(['utf16be', 'yml']) + t.binmode + t.write '--- こんにちは!'.encode('UTF-16BE') + t.close + + File.open(t.path, 'rb', :encoding => 'UTF-16BE') do |f| + assert_equal "こんにちは!", Psych.load(f) + end + + t.close(true) + end + + def test_io_utf8 + t = Tempfile.new(['utf8', 'yml']) + t.binmode + t.write '--- こんにちは!'.encode('UTF-8') + t.close + + File.open(t.path, 'rb', :encoding => 'UTF-8') do |f| + assert_equal "こんにちは!", Psych.load(f) + end + + t.close(true) + end + def test_emit_alias @emitter.start_stream Psych::Parser::UTF8 @emitter.start_document [], [], true diff --git a/test/psych/test_exception.rb b/test/psych/test_exception.rb index 806c5e2..c6d98d7 100644 --- a/test/psych/test_exception.rb +++ b/test/psych/test_exception.rb @@ -16,6 +16,97 @@ module Psych @wups = Wups.new end + def test_load_takes_file + ex = assert_raises(Psych::SyntaxError) do + Psych.load '--- `' + end + assert_nil ex.file + + ex = assert_raises(Psych::SyntaxError) do + Psych.load '--- `', 'meow' + end + assert_equal 'meow', ex.file + end + + def test_psych_parse_stream_takes_file + ex = assert_raises(Psych::SyntaxError) do + Psych.parse_stream '--- `' + end + assert_nil ex.file + assert_match '()', ex.message + + ex = assert_raises(Psych::SyntaxError) do + Psych.parse_stream '--- `', 'omg!' + end + assert_equal 'omg!', ex.file + assert_match 'omg!', ex.message + end + + def test_load_stream_takes_file + ex = assert_raises(Psych::SyntaxError) do + Psych.load_stream '--- `' + end + assert_nil ex.file + assert_match '()', ex.message + + ex = assert_raises(Psych::SyntaxError) do + Psych.load_stream '--- `', 'omg!' + end + assert_equal 'omg!', ex.file + end + + def test_parse_file_exception + t = Tempfile.new(['parsefile', 'yml']) + t.binmode + t.write '--- `' + t.close + ex = assert_raises(Psych::SyntaxError) do + Psych.parse_file t.path + end + assert_equal t.path, ex.file + t.close(true) + end + + def test_load_file_exception + t = Tempfile.new(['loadfile', 'yml']) + t.binmode + t.write '--- `' + t.close + ex = assert_raises(Psych::SyntaxError) do + Psych.load_file t.path + end + assert_equal t.path, ex.file + t.close(true) + end + + def test_psych_parse_takes_file + ex = assert_raises(Psych::SyntaxError) do + Psych.parse '--- `' + end + assert_match '()', ex.message + assert_nil ex.file + + ex = assert_raises(Psych::SyntaxError) do + Psych.parse '--- `', 'omg!' + end + assert_match 'omg!', ex.message + end + + def test_attributes + e = assert_raises(Psych::SyntaxError) { + Psych.load '--- `foo' + } + + assert_nil e.file + assert_equal 1, e.line + assert_equal 5, e.column + # FIXME: offset isn't being set correctly by libyaml + # assert_equal 5, e.offset + + assert e.problem + assert e.context + end + def test_convert w = Psych.load(Psych.dump(@wups)) assert_equal @wups, w diff --git a/test/psych/test_merge_keys.rb b/test/psych/test_merge_keys.rb index b3ebe9b..bf5968f 100644 --- a/test/psych/test_merge_keys.rb +++ b/test/psych/test_merge_keys.rb @@ -2,6 +2,15 @@ require 'psych/helper' module Psych class TestMergeKeys < TestCase + def test_missing_merge_key + yaml = <<-eoyml +bar: + << : *foo + eoyml + exp = assert_raises(Psych::BadAlias) { Psych.load yaml } + assert_match 'foo', exp.message + end + # [ruby-core:34679] def test_merge_key yaml = <<-eoyml diff --git a/test/psych/test_numeric.rb b/test/psych/test_numeric.rb index 9adb058..bae723a 100644 --- a/test/psych/test_numeric.rb +++ b/test/psych/test_numeric.rb @@ -1,4 +1,5 @@ require 'psych/helper' +require 'bigdecimal' module Psych ### @@ -10,5 +11,15 @@ module Psych str = Psych.load('--- 090') assert_equal '090', str end + + def test_big_decimal_tag + decimal = BigDecimal("12.34") + assert_match "!ruby/object:BigDecimal", Psych.dump(decimal) + end + + def test_big_decimal_round_trip + decimal = BigDecimal("12.34") + assert_cycle decimal + end end end diff --git a/test/psych/test_object_references.rb b/test/psych/test_object_references.rb new file mode 100644 index 0000000..77cc96e --- /dev/null +++ b/test/psych/test_object_references.rb @@ -0,0 +1,67 @@ +require 'psych/helper' + +module Psych + class TestObjectReferences < TestCase + def test_range_has_references + assert_reference_trip 1..2 + end + + def test_module_has_references + assert_reference_trip Psych + end + + def test_class_has_references + assert_reference_trip TestObjectReferences + end + + def test_rational_has_references + assert_reference_trip Rational('1.2') + end + + def test_complex_has_references + assert_reference_trip Complex(1, 2) + end + + def test_datetime_has_references + assert_reference_trip DateTime.now + end + + def assert_reference_trip obj + yml = Psych.dump([obj, obj]) + assert_match(/\*\d+/, yml) + data = Psych.load yml + assert_equal data.first.object_id, data.last.object_id + end + + def test_float_references + data = Psych.load <<-eoyml +--- +- &name 1.2 +- *name + eoyml + assert_equal data.first, data.last + assert_equal data.first.object_id, data.last.object_id + end + + def test_binary_references + data = Psych.load <<-eoyml +--- +- &name !binary |- + aGVsbG8gd29ybGQh +- *name + eoyml + assert_equal data.first, data.last + assert_equal data.first.object_id, data.last.object_id + end + + def test_regexp_references + data = Psych.load <<-eoyml +--- +- &name !ruby/regexp /pattern/i +- *name + eoyml + assert_equal data.first, data.last + assert_equal data.first.object_id, data.last.object_id + end + end +end diff --git a/test/psych/test_parser.rb b/test/psych/test_parser.rb index decb241..d8c53f2 100644 --- a/test/psych/test_parser.rb +++ b/test/psych/test_parser.rb @@ -32,6 +32,49 @@ module Psych @handler.parser = @parser end + def test_exception_memory_leak + yaml = <<-eoyaml +%YAML 1.1 +%TAG ! tag:tenderlovemaking.com,2009: +--- &ponies +- first element +- *ponies +- foo: bar +... + eoyaml + + [:start_stream, :start_document, :end_document, :alias, :scalar, + :start_sequence, :end_sequence, :start_mapping, :end_mapping, + :end_stream].each do |method| + + klass = Class.new(Psych::Handler) do + define_method(method) do |*args| + raise + end + end + + parser = Psych::Parser.new klass.new + 2.times { + assert_raises(RuntimeError, method.to_s) do + parser.parse yaml + end + } + end + end + + def test_multiparse + 3.times do + @parser.parse '--- foo' + end + end + + def test_filename + ex = assert_raises(Psych::SyntaxError) do + @parser.parse '--- `', 'omg!' + end + assert_match 'omg!', ex.message + end + def test_line_numbers assert_equal 0, @parser.mark.line @parser.parse "---\n- hello\n- world" @@ -80,15 +123,6 @@ module Psych assert_equal 19, @parser.mark.index end - def test_set_encoding_twice - @parser.external_encoding = Psych::Parser::UTF16LE - - e = assert_raises(Psych::Exception) do - @parser.external_encoding = Psych::Parser::UTF16LE - end - assert_equal "don't set the encoding twice!", e.message - end - def test_bom tadpole = 'おたまじゃくし' @@ -108,6 +142,7 @@ module Psych def test_bogus_io o = Object.new + def o.external_encoding; nil end def o.read len; self end assert_raises(TypeError) do diff --git a/test/psych/test_scalar_scanner.rb b/test/psych/test_scalar_scanner.rb index 6599099..cf0dfff 100644 --- a/test/psych/test_scalar_scanner.rb +++ b/test/psych/test_scalar_scanner.rb @@ -1,4 +1,5 @@ require 'psych/helper' +require 'date' module Psych class TestScalarScanner < TestCase @@ -20,6 +21,27 @@ module Psych end end + def test_scan_bad_dates + x = '2000-15-01' + assert_equal x, @ss.tokenize(x) + + x = '2000-10-51' + assert_equal x, @ss.tokenize(x) + + x = '2000-10-32' + assert_equal x, @ss.tokenize(x) + end + + def test_scan_good_edge_date + x = '2000-1-31' + assert_equal Date.strptime(x, '%Y-%m-%d'), @ss.tokenize(x) + end + + def test_scan_bad_edge_date + x = '2000-11-31' + assert_equal x, @ss.tokenize(x) + end + def test_scan_date date = '1980-12-16' token = @ss.tokenize date diff --git a/test/psych/test_stream.rb b/test/psych/test_stream.rb index 4d8f137..beca365 100644 --- a/test/psych/test_stream.rb +++ b/test/psych/test_stream.rb @@ -2,6 +2,50 @@ require 'psych/helper' module Psych class TestStream < TestCase + def test_parse_partial + rb = Psych.parse("--- foo\n...\n--- `").to_ruby + assert_equal 'foo', rb + end + + def test_load_partial + rb = Psych.load("--- foo\n...\n--- `") + assert_equal 'foo', rb + end + + def test_parse_stream_yields_documents + list = [] + Psych.parse_stream("--- foo\n...\n--- bar") do |doc| + list << doc.to_ruby + end + assert_equal %w{ foo bar }, list + end + + def test_parse_stream_break + list = [] + Psych.parse_stream("--- foo\n...\n--- `") do |doc| + list << doc.to_ruby + break + end + assert_equal %w{ foo }, list + end + + def test_load_stream_yields_documents + list = [] + Psych.load_stream("--- foo\n...\n--- bar") do |ruby| + list << ruby + end + assert_equal %w{ foo bar }, list + end + + def test_load_stream_break + list = [] + Psych.load_stream("--- foo\n...\n--- `") do |ruby| + list << ruby + break + end + assert_equal %w{ foo }, list + end + def test_explicit_documents io = StringIO.new stream = Psych::Stream.new(io) diff --git a/test/psych/test_string.rb b/test/psych/test_string.rb index 51f1280..c7d5c60 100644 --- a/test/psych/test_string.rb +++ b/test/psych/test_string.rb @@ -2,6 +2,37 @@ require 'psych/helper' module Psych class TestString < TestCase + class X < String + end + + class Y < String + attr_accessor :val + end + + def test_backwards_with_syck + x = Psych.load "--- !str:#{X.name} foo\n\n" + assert_equal X, x.class + assert_equal 'foo', x + end + + def test_empty_subclass + assert_match "!ruby/string:#{X}", Psych.dump(X.new) + x = Psych.load Psych.dump X.new + assert_equal X, x.class + end + + def test_subclass_with_attributes + y = Psych.load Psych.dump Y.new.tap {|y| y.val = 1} + assert_equal Y, y.class + assert_equal 1, y.val + end + + def test_string_with_base_60 + yaml = Psych.dump '01:03:05' + assert_match "'01:03:05'", yaml + assert_equal '01:03:05', Psych.load(yaml) + end + def test_tagged_binary_should_be_dumped_as_binary string = "hello world!" string.force_encoding 'ascii-8bit' diff --git a/test/psych/test_tainted.rb b/test/psych/test_tainted.rb index bf55d3b..fdcced4 100644 --- a/test/psych/test_tainted.rb +++ b/test/psych/test_tainted.rb @@ -121,7 +121,9 @@ module Psych t.binmode t.write string t.close - File.open(t.path) { |f| @parser.parse f } + File.open(t.path, 'r:bom|utf-8') { |f| + @parser.parse f + } t.close(true) end end