5588_negative_lookahead.patch - Ruby - Ruby Issue Tracking System

     #define ONIG_OPTION_NONE                 0U
     #define ONIG_OPTION_IGNORECASE           1U
     #define ONIG_OPTION_EXTEND               (ONIG_OPTION_IGNORECASE         << 1)
     #define ONIG_OPTION_MULTILINE            (ONIG_OPTION_EXTEND             << 1)
     #define ONIG_OPTION_NEGATE               (ONIG_OPTION_EXTEND             << 1)
     #define ONIG_OPTION_MULTILINE            (ONIG_OPTION_NEGATE             << 1)
     #define ONIG_OPTION_SINGLELINE           (ONIG_OPTION_MULTILINE          << 1)
     #define ONIG_OPTION_FIND_LONGEST         (ONIG_OPTION_SINGLELINE         << 1)
     #define ONIG_OPTION_FIND_NOT_EMPTY       (ONIG_OPTION_FIND_LONGEST       << 1)

     #define KCODE_FIXED FL_USER4
     #define ARG_REG_OPTION_MASK \
         (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
         (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND|ONIG_OPTION_NEGATE)
     #define ARG_ENCODING_FIXED    16
     #define ARG_ENCODING_NONE     32
-...
           case 'm':
     	val = ONIG_OPTION_MULTILINE;
     	break;
           case 'v':
     	val = ONIG_OPTION_NEGATE;
     	break;
           default:
     	val = 0;
     	break;
-...
+    }
     static char *
     option_to_str(char str[4], int options)
     option_to_str(char str[5], int options)
+    {
         char *p = str;
         if (options & ONIG_OPTION_MULTILINE) *p++ = 'm';
         if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i';
         if (options & ONIG_OPTION_EXTEND) *p++ = 'x';
         if (options & ONIG_OPTION_NEGATE) *p++ = 'v';
         *p = 0;
         return str;
+    }
-...
         rb_reg_expr_str(str, s, len, enc, resenc);
         rb_str_buf_cat2(str, "/");
         if (re) {
     	char opts[4];
     	char opts[5];
     	rb_reg_check(re);
     	if (*option_to_str(opts, RREGEXP(re)->ptr->options))
     	    rb_str_buf_cat2(str, opts);
-...
      *  generally more readable version of <i>rxp</i>.
+     *
      *      r1 = /ab+c/ix           #=> /ab+c/ix
      *      s1 = r1.to_s            #=> "(?ix-m:ab+c)"
      *      r2 = Regexp.new(s1)     #=> /(?ix-m:ab+c)/
      *      s1 = r1.to_s            #=> "(?ix-mv:ab+c)"
      *      r2 = Regexp.new(s1)     #=> /(?ix-mv:ab+c)/
      *      r1 == r2                #=> false
      *      r1.source               #=> "ab+c"
      *      r2.source               #=> "(?ix-m:ab+c)"
      *      r2.source               #=> "(?ix-mv:ab+c)"
      */
     static VALUE
     rb_reg_to_s(VALUE re)
+    {
         int options, opt;
         const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
         const int embeddable = ARG_REG_OPTION_MASK;
         long len;
         const UChar* ptr;
         VALUE str = rb_str_buf_new2("(?");
         char optbuf[5];
         char optbuf[6];
         rb_encoding *enc = rb_enc_get(re);
         rb_reg_check(re);
-...
     static VALUE
     rb_enc_reg_error_desc(const char *s, long len, rb_encoding *enc, int options, const char *err)
+    {
         char opts[6];
         char opts[7];
         VALUE desc = rb_str_buf_new2(err);
         rb_encoding *resenc = rb_default_internal_encoding();
         if (resenc == NULL) resenc = rb_default_external_encoding();
-...
      *  options are propagated, and new options may not be specified (a change as of
      *  Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or
      *  more of the constants <code>Regexp::EXTENDED</code>,
      *  <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>,
      *  <em>or</em>-ed together. Otherwise, if <i>options</i> is not
      *  <code>nil</code>, the regexp will be case insensitive.
      *  When the <i>lang</i> parameter is `n' or `N' sets the regexp no encoding.
      *  <code>Regexp::IGNORECASE</code>, <code>Regexp::MULTILINE</code>, and
      *  <code>Regexp::NEGATED</code>, <em>or</em>-ed together. Otherwise, if
      *  <i>options</i> is not <code>nil</code>, the regexp will be case
      *  insensitive.  When the <i>lang</i> parameter is `n' or `N' sets the regexp
      *  no encoding.
+     *
      *     r1 = Regexp.new('^a-z+:\\s+\w+')           #=> /^a-z+:\s+\w+/
      *     r2 = Regexp.new('cat', true)               #=> /cat/i
-...
      *     Regexp.union("a+b*c")                #=> /a\+b\*c/
      *     Regexp.union("skiing", "sledding")   #=> /skiing|sledding/
      *     Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/
      *     Regexp.union(/dogs/, /cats/i)        #=> /(?-mix:dogs)|(?i-mx:cats)/
      *     Regexp.union(/dogs/, /cats/i)        #=> /(?-mixv:dogs)|(?i-mxv:cats)/
      */
     static VALUE
     rb_reg_s_union_m(VALUE self, VALUE args)
-...
         /* see Regexp.options and Regexp.new */
         rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
         /* see Regexp.options and Regexp.new */
         rb_define_const(rb_cRegexp, "NEGATED", INT2FIX(ONIG_OPTION_NEGATE));
         /* see Regexp.options and Regexp.new */
         rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
         /* see Regexp.options and Regexp.new */
         rb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED));

         fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
         switch (NENCLOSE(node)->type) {
         case ENCLOSE_OPTION:
           fprintf(f, "option:%d\n", NENCLOSE(node)->option);
           print_indent_tree(f, NENCLOSE(node)->target, indent + add);
           fprintf(f, "option:%d", NENCLOSE(node)->option);
           break;
         case ENCLOSE_MEMORY:
           fprintf(f, "memory:%d", NENCLOSE(node)->regnum);

       if (r != ONIG_MISMATCH)
         fprintf(stderr, "onig_search: error %d\n", r);
     #endif
       return r;
       goto negate;
      mismatch_no_msa:
       r = ONIG_MISMATCH;
-...
       if (r != ONIG_MISMATCH)
         fprintf(stderr, "onig_search: error %d\n", r);
     #endif
       return r;
       goto negate;
      match:
       ONIG_STATE_DEC_THREAD(reg);
       MATCH_ARG_FREE(msa);
       return s - str;
       r = s - str;
       /* fall */
      negate:
       if (r >= ONIG_MISMATCH && IS_NEGATE(reg->options))
         return r == ONIG_MISMATCH ? ONIG_NORMAL : ONIG_MISMATCH;
       return r;
+    }
     extern OnigEncoding

regint.h
		#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
		#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
		#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
		#define IS_NEGATE(option) ((option) & ONIG_OPTION_NEGATE)
		#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
		#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
		#define IS_FIND_CONDITION(option) ((option) & \

     #ifdef USE_POSIXLINE_OPTION
         case 'p':
     #endif
         case '-': case 'i': case 'm': case 's': case 'x':
         case '-': case 'i': case 'm': case 's': case 'x': case 'v':
+          {
     	int neg = 0;
-...
     	  case '-':  neg = 1; break;
     	  case 'x':  ONOFF(option, ONIG_OPTION_EXTEND,     neg); break;
     	  case 'v':  ONOFF(option, ONIG_OPTION_NEGATE,     neg); break;
     	  case 'i':  ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
     	  case 's':
     	    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
-...
     	    if (r < 0) return r;
     	    *np = node_new_option(option);
     	    CHECK_NULL_RETURN_MEMERR(*np);
                 /* expand "(?v:r)" into "(?:(?!r).)" */
                 if (IS_NEGATE(option)) {
                   Node *seq, *nla, *any;
                   /* build "(?!r)" */
                   nla = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
                   CHECK_NULL_RETURN_MEMERR(nla);
                   NANCHOR(nla)->target = target;
                   /* build "." */
                   any = node_new_anychar();
                   if (IS_NULL(any)) {
                     onig_node_free(nla);
                     return ONIGERR_MEMORY;
+                  }
                   /* put "(?!r)" and "." in sequence: "(?!r)." */
                   seq = node_new_list(nla, NULL);
                   if (IS_NULL(seq)) {
                     onig_node_free(nla);
                     onig_node_free(any);
                     return ONIGERR_MEMORY;
+                  }
                   NCDR(seq) = node_new_list(any, NULL);
                   if (IS_NULL(NCDR(seq))) {
                     onig_node_free(nla);
                     onig_node_free(any);
                     onig_node_free(seq);
                     return ONIGERR_MEMORY;
+                  }
                   target = seq;
+                }
     	    NENCLOSE(*np)->target = target;
     	    *src = p;
     	    return 0;

       end
       def test_to_s
         assert_equal '(?-mix:\x00)', Regexp.new("\0").to_s
         assert_equal '(?-mixv:\x00)', Regexp.new("\0").to_s
       end
       def test_union
-...
       end
       def test_to_s2
         assert_equal('(?-mix:foo)', /(?:foo)/.to_s)
         assert_equal('(?m-ix:foo)', /(?:foo)/m.to_s)
         assert_equal('(?mi-x:foo)', /(?:foo)/mi.to_s)
         assert_equal('(?mix:foo)', /(?:foo)/mix.to_s)
         assert_equal('(?m-ix:foo)', /(?m-ix:foo)/.to_s)
         assert_equal('(?mi-x:foo)', /(?mi-x:foo)/.to_s)
         assert_equal('(?mix:foo)', /(?mix:foo)/.to_s)
         assert_equal('(?mix:)', /(?mix)/.to_s)
         assert_equal('(?-mix:(?mix:foo) )', /(?mix:foo) /.to_s)
         assert_equal('(?-mixv:foo)', /(?:foo)/.to_s)
         assert_equal('(?m-ixv:foo)', /(?:foo)/m.to_s)
         assert_equal('(?mi-xv:foo)', /(?:foo)/mi.to_s)
         assert_equal('(?mix-v:foo)', /(?:foo)/mix.to_s)
         assert_equal('(?mixv:foo)', /(?:foo)/mixv.to_s)
         assert_equal('(?m-ixv:foo)', /(?m-ixv:foo)/.to_s)
         assert_equal('(?mi-xv:foo)', /(?mi-xv:foo)/.to_s)
         assert_equal('(?mix-v:foo)', /(?mix-v:foo)/.to_s)
         assert_equal('(?mixv:foo)', /(?mixv:foo)/.to_s)
         assert_equal('(?mixv:)', /(?mixv)/.to_s)
         assert_equal('(?-mixv:(?mixv:foo) )', /(?mixv:foo) /.to_s)
       end
       def test_casefold_p
-...
       def test_options
         assert_equal(Regexp::IGNORECASE, /a/i.options)
         assert_equal(Regexp::EXTENDED, /a/x.options)
         assert_equal(Regexp::NEGATED, /a/v.options)
         assert_equal(Regexp::MULTILINE, /a/m.options)
       end
-...
         assert_equal(/foo/, Regexp.union(/foo/))
         assert_equal(/foo/, Regexp.union([/foo/]))
         assert_equal(/\t/, Regexp.union("\t"))
         assert_equal(/(?-mix:\u3042)|(?-mix:\u3042)/, Regexp.union(/\u3042/, /\u3042/))
         assert_equal(/(?-mixv:\u3042)|(?-mixv:\u3042)/, Regexp.union(/\u3042/, /\u3042/))
         assert_equal("\u3041", "\u3041"[Regexp.union(/\u3042/, "\u3041")])
       end
-...
         assert_match(/invalid hex escape/, error.message)
         assert_equal(1, error.message.scan(/.*invalid .*escape.*/i).size, bug3539)
       end
       def test_negated_regexp_creation
         assert_nothing_raised { eval("/ruby/v") }
         assert_nothing_raised { eval("/(?v:ruby)/") }
         assert_nothing_raised { eval("/(?-v:ruby)/") }
         negated = Regexp.new("ruby", Regexp::NEGATED)
         assert_equal(/ruby/v, negated)
         assert_equal(/ruby/v, Regexp.new(negated))
         assert_equal(/ruby/v, Regexp.new(Regexp.new(negated)))
         assert_equal(/(?v-mix:ruby)/, Regexp.new(negated.to_s))
       end
       def test_negated_regexp_matching
         assert_match(/ruby/, "ruby")
         assert_match(/ruby/, "rubyperl")
         assert_match(/ruby/, "perlruby")
         assert_no_match(/ruby/, "perl")
         assert_match(/(?-v:ruby)/, "ruby")
         assert_match(/(?-v:ruby)/, "rubyperl")
         assert_match(/(?-v:ruby)/, "perlruby")
         assert_no_match(/(?-v:ruby)/, "perl")
         assert_no_match(/ruby/v, "ruby")
         assert_no_match(/ruby/v, "rubyperl")
         assert_no_match(/ruby/v, "perlruby")
         assert_match(/ruby/v, "perl")
         assert_match(/(?v:ruby)/, "ruby")
         assert_no_match(/^(?v:ruby)/, "ruby")
         assert_match(/(?v:ruby)/, "rubyperl")
         assert_no_match(/^(?v:ruby)/, "rubyperl")
         assert_match(/(?v:ruby)/, "perlruby")
         assert_match(/^(?v:ruby)/, "perlruby")
         assert_match(/(?v:ruby)/, "perl")
         assert_match(/^(?v:ruby)/, "perl")
         assert_no_match(/a(?v:b)c/, "abc")
         assert_no_match(/a(?v:b)c/, "ac")
         assert_match(/a(?v:b)?c/, "ac")
         assert_match(/a(?v:b)c/, "axc")
         assert_no_match(/a(?v:b)c/, "axbc")
         assert_no_match(/a(?v:b)+c/, "axbc")
         assert_match(/a(?v:b)bc/, "axbc")
         assert_no_match(/a(?v:b)c/, "ab_c")
         assert_no_match(/a(?v:b)c/, "a_bc")
         assert_match(/a(?v:b)bc/, "a_bc")
         assert_no_match(/"[^<&"]*"/, '"aa<&a"')
         assert_no_match(/"(?v:[<&"])*"/, '"aa<&a"')
         assert_no_match(/"(?v:<|&|")*"/, '"aa<&a"')
         languages = %w[ruby perl python lisp smalltalk]
         assert_equal %w[perl lisp smalltalk], languages.grep(/l/)
         assert_equal %w[ruby python], languages.grep(/l/v)
         assert_equal %w[ruby perl python smalltalk], languages.grep(/^(?v:l)/)
       end
     end

Project

General

Profile

Ruby

Feature #5588 » 5588_negative_lookahead.patch