Index: lib/uri/common.rb =================================================================== --- lib/uri/common.rb (revision 24196) +++ lib/uri/common.rb (working copy) @@ -20,12 +20,15 @@ # alpha = lowalpha | upalpha ALPHA = "a-zA-Z" + # digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | 7" | + # "8" | "9" + DIGIT = "0-9" # alphanum = alpha | digit - ALNUM = "#{ALPHA}\\d" + ALNUM = "#{ALPHA}#{DIGIT}" # hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | # "a" | "b" | "c" | "d" | "e" | "f" - HEX = "a-fA-F\\d" + HEX = "#{DIGIT}a-fA-F" # escaped = "%" hex hex ESCAPED = "%[#{HEX}]{2}" # mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | @@ -100,114 +103,114 @@ def split(uri) case uri when '' - # null uri + # null uri when @regexp[:ABS_URI] - scheme, opaque, userinfo, host, port, - registry, path, query, fragment = $~[1..-1] + scheme, opaque, userinfo, host, port, + registry, path, query, fragment = $~[1..-1] - # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] - # absoluteURI = scheme ":" ( hier_part | opaque_part ) - # hier_part = ( net_path | abs_path ) [ "?" query ] - # opaque_part = uric_no_slash *uric - - # abs_path = "/" path_segments - # net_path = "//" authority [ abs_path ] - - # authority = server | reg_name - # server = [ [ userinfo "@" ] hostport ] - - if !scheme - raise InvalidURIError, - "bad URI(absolute but no scheme): #{uri}" - end - if !opaque && (!path && (!host && !registry)) - raise InvalidURIError, - "bad URI(absolute but no path): #{uri}" - end + # absoluteURI = scheme ":" ( hier_part | opaque_part ) + # hier_part = ( net_path | abs_path ) [ "?" query ] + # opaque_part = uric_no_slash *uric + + # abs_path = "/" path_segments + # net_path = "//" authority [ abs_path ] + + # authority = server | reg_name + # server = [ [ userinfo "@" ] hostport ] + + if !scheme + raise InvalidURIError, + "bad URI(absolute but no scheme): #{uri}" + end + if !opaque && (!path && (!host && !registry)) + raise InvalidURIError, + "bad URI(absolute but no path): #{uri}" + end when @regexp[:REL_URI] - scheme = nil - opaque = nil + scheme = nil + opaque = nil + + userinfo, host, port, registry, + rel_segment, abs_path, query, fragment = $~[1..-1] + if rel_segment && abs_path + path = rel_segment + abs_path + elsif rel_segment + path = rel_segment + elsif abs_path + path = abs_path + end + + # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + + # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] - userinfo, host, port, registry, - rel_segment, abs_path, query, fragment = $~[1..-1] - if rel_segment && abs_path - path = rel_segment + abs_path - elsif rel_segment - path = rel_segment - elsif abs_path - path = abs_path - end - - # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] - - # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] - - # net_path = "//" authority [ abs_path ] - # abs_path = "/" path_segments - # rel_path = rel_segment [ abs_path ] + # net_path = "//" authority [ abs_path ] + # abs_path = "/" path_segments + # rel_path = rel_segment [ abs_path ] - # authority = server | reg_name - # server = [ [ userinfo "@" ] hostport ] + # authority = server | reg_name + # server = [ [ userinfo "@" ] hostport ] else - raise InvalidURIError, "bad URI(is not URI?): #{uri}" + raise InvalidURIError, "bad URI(is not URI?): #{uri}" end path = '' if !path && !opaque # (see RFC2396 Section 5.2) ret = [ - scheme, - userinfo, host, port, # X - registry, # X - path, # Y - opaque, # Y - query, - fragment + scheme, + userinfo, host, port, # X + registry, # X + path, # Y + opaque, # Y + query, + fragment ] return ret end def parse(uri) scheme, userinfo, host, port, - registry, path, opaque, query, fragment = self.split(uri) + registry, path, opaque, query, fragment = self.split(uri) if scheme && URI.scheme_list.include?(scheme.upcase) - URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port, + URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port, registry, path, opaque, query, fragment, self) else - Generic.new(scheme, userinfo, host, port, - registry, path, opaque, query, - fragment, self) + Generic.new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment, self) end end def join(*str) u = self.parse(str[0]) str[1 .. -1].each do |x| - u = u.merge(x) + u = u.merge(x) end u end def extract(str, schemes = nil, &block) if block_given? - str.scan(make_regexp(schemes)) { yield $& } - nil + str.scan(make_regexp(schemes)) { yield $& } + nil else - result = [] - str.scan(make_regexp(schemes)) { result.push $& } - result + result = [] + str.scan(make_regexp(schemes)) { result.push $& } + result end end def make_regexp(schemes = nil) unless schemes - @regexp[:ABS_URI_REF] + @regexp[:ABS_URI_REF] else - /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x + /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x end end @@ -262,7 +265,7 @@ # hostname = *( domainlabel "." ) toplabel [ "." ] unless hostname - ret[:HOSTNAME] = hostname = "(?:#{domlabel}\\.)*#{toplabel}\\.?" + ret[:HOSTNAME] = hostname = "(?:#{domlabel}\\.)*#{toplabel}\\.?" end # RFC 2373, APPENDIX B: @@ -276,7 +279,9 @@ # allowed too. Here is a replacement. # # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT - ret[:IPV4ADDR] = ipv4addr = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}" + v4digit = "(?:[01]?[#{PATTERN::DIGIT}]{1,2}?|2[0-4][#{PATTERN::DIGIT}]25[0-5])" + ipv4addr = "#{v4digit}\\." * 3 + v4digit + ret[:IPV4ADDR] = ipv4addr # hex4 = 1*4HEXDIG hex4 = "[#{PATTERN::HEX}]{1,4}" # lastpart = hex4 | IPv4address @@ -298,7 +303,7 @@ # host = hostname | IPv4address | IPv6reference (RFC 2732) ret[:HOST] = host = "(?:#{hostname}|#{ipv4addr}|#{ipv6ref})" # port = *digit - port = '\d*' + port = '["{PATTERN::DIGIT}]*' # hostport = host [ ":" port ] ret[:HOSTPORT] = hostport = "#{host}(?::#{port})?" @@ -329,7 +334,7 @@ ret[:REL_SEGMENT] = rel_segment = "(?:[#{unreserved};@&=+$,]|#{escaped})+" # scheme = alpha *( alpha | digit | "+" | "-" | "." ) - ret[:SCHEME] = scheme = "[#{PATTERN::ALPHA}][-+.#{PATTERN::ALPHA}\\d]*" + ret[:SCHEME] = scheme = "[#{PATTERN::ALPHA}][-+.#{PATTERN::ALPHA}#{PATTERN::DIGIT}]*" # abs_path = "/" path_segments ret[:ABS_PATH] = abs_path = "/#{path_segments}" @@ -352,23 +357,24 @@ ret[:URI_REF] = uri_ref = "(?:#{abs_uri}|#{rel_uri})?(?:##{fragment})?" ret[:X_ABS_URI] = " - (#{scheme}): (?# 1: scheme) + (#{scheme}): (?# 1: scheme) (?: - (#{opaque_part}) (?# 2: opaque) + (#{opaque_part}) (?# 2: opaque) | (?:(?: //(?: - (?:(?:(#{userinfo})@)? (?# 3: userinfo) - (?:(#{host})(?::(\\d*))?))? (?# 4: host, 5: port) + (?:(?:(#{userinfo})@)? (?# 3: userinfo) + (?:(#{host}) (?# 4: host) + (?::([#{PATTERN::DIGIT}]*))?))? (?# 5: port) | - (#{reg_name}) (?# 6: registry) + (#{reg_name}) (?# 6: registry) ) | - (?!//)) (?# XXX: '//' is the mark for hostport) - (#{abs_path})? (?# 7: path) - )(?:\\?(#{query}))? (?# 8: query) + (?!//)) (?# XXX: '//' is the mark for hostport) + (#{abs_path})? (?# 7: path) + )(?:\\?(#{query}))? (?# 8: query) ) - (?:\\#(#{fragment}))? (?# 9: fragment) + (?:\\#(#{fragment}))? (?# 9: fragment) " ret[:X_REL_URI] = " @@ -376,18 +382,19 @@ (?: // (?: - (?:(#{userinfo})@)? (?# 1: userinfo) - (#{host})?(?::(\\d*))? (?# 2: host, 3: port) + (?:(#{userinfo})@)? (?# 1: userinfo) + (#{host})? (?# 2: host) + (?::([#{PATTERN::DIGIT}]*))? (?# 3: port) | - (#{reg_name}) (?# 4: registry) + (#{reg_name}) (?# 4: registry) ) ) | - (#{rel_segment}) (?# 5: rel_segment) + (#{rel_segment}) (?# 5: rel_segment) )? - (#{abs_path})? (?# 6: abs_path) - (?:\\?(#{query}))? (?# 7: query) - (?:\\#(#{fragment}))? (?# 8: fragment) + (#{abs_path})? (?# 6: abs_path) + (?:\\?(#{query}))? (?# 7: query) + (?:\\#(#{fragment}))? (?# 8: fragment) " ret