Feature #5588 » 0001-http-redmine.ruby-lang.org-issues-5588.patch
include/ruby/oniguruma.h | ||
---|---|---|
#define ONIG_OPTION_NONE 0U
|
||
#define ONIG_OPTION_IGNORECASE 1U
|
||
#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
|
||
#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
|
||
#define ONIG_OPTION_NEGATE (ONIG_OPTION_EXTEND << 1)
|
||
#define ONIG_OPTION_MULTILINE (ONIG_OPTION_NEGATE << 1)
|
||
#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
|
||
#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
|
||
#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
|
||
... | ... | |
#define ONIG_NORMAL 0
|
||
#define ONIG_MISMATCH -1
|
||
#define ONIG_NO_SUPPORT_CONFIG -2
|
||
#define ONIG_MISMATCH_FROM_NEGATE -3
|
||
/* internal error */
|
||
#define ONIGERR_MEMORY -5
|
re.c | ||
---|---|---|
#define KCODE_FIXED FL_USER4
|
||
#define ARG_REG_OPTION_MASK \
|
||
(ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
|
||
(ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND|ONIG_OPTION_NEGATE)
|
||
#define ARG_ENCODING_FIXED 16
|
||
#define ARG_ENCODING_NONE 32
|
||
... | ... | |
case 'm':
|
||
val = ONIG_OPTION_MULTILINE;
|
||
break;
|
||
case 'v':
|
||
val = ONIG_OPTION_NEGATE;
|
||
break;
|
||
default:
|
||
val = 0;
|
||
break;
|
||
... | ... | |
}
|
||
static char *
|
||
option_to_str(char str[4], int options)
|
||
option_to_str(char str[5], int options)
|
||
{
|
||
char *p = str;
|
||
if (options & ONIG_OPTION_MULTILINE) *p++ = 'm';
|
||
if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i';
|
||
if (options & ONIG_OPTION_EXTEND) *p++ = 'x';
|
||
if (options & ONIG_OPTION_NEGATE) *p++ = 'v';
|
||
*p = 0;
|
||
return str;
|
||
}
|
||
... | ... | |
rb_reg_expr_str(str, s, len, enc, resenc);
|
||
rb_str_buf_cat2(str, "/");
|
||
if (re) {
|
||
char opts[4];
|
||
char opts[5];
|
||
rb_reg_check(re);
|
||
if (*option_to_str(opts, RREGEXP(re)->ptr->options))
|
||
rb_str_buf_cat2(str, opts);
|
||
... | ... | |
* generally more readable version of <i>rxp</i>.
|
||
*
|
||
* r1 = /ab+c/ix #=> /ab+c/ix
|
||
* s1 = r1.to_s #=> "(?ix-m:ab+c)"
|
||
* r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
|
||
* s1 = r1.to_s #=> "(?ix-mv:ab+c)"
|
||
* r2 = Regexp.new(s1) #=> /(?ix-mv:ab+c)/
|
||
* r1 == r2 #=> false
|
||
* r1.source #=> "ab+c"
|
||
* r2.source #=> "(?ix-m:ab+c)"
|
||
* r2.source #=> "(?ix-mv:ab+c)"
|
||
*/
|
||
static VALUE
|
||
rb_reg_to_s(VALUE re)
|
||
{
|
||
int options, opt;
|
||
const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
|
||
const int embeddable = ARG_REG_OPTION_MASK;
|
||
long len;
|
||
const UChar* ptr;
|
||
VALUE str = rb_str_buf_new2("(?");
|
||
char optbuf[5];
|
||
char optbuf[6];
|
||
rb_encoding *enc = rb_enc_get(re);
|
||
rb_reg_check(re);
|
||
... | ... | |
static VALUE
|
||
rb_enc_reg_error_desc(const char *s, long len, rb_encoding *enc, int options, const char *err)
|
||
{
|
||
char opts[6];
|
||
char opts[7];
|
||
VALUE desc = rb_str_buf_new2(err);
|
||
rb_encoding *resenc = rb_default_internal_encoding();
|
||
if (resenc == NULL) resenc = rb_default_external_encoding();
|
||
... | ... | |
* options are propagated, and new options may not be specified (a change as of
|
||
* Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or
|
||
* more of the constants <code>Regexp::EXTENDED</code>,
|
||
* <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>,
|
||
* <em>or</em>-ed together. Otherwise, if <i>options</i> is not
|
||
* <code>nil</code>, the regexp will be case insensitive.
|
||
* When the <i>lang</i> parameter is `n' or `N' sets the regexp no encoding.
|
||
* <code>Regexp::IGNORECASE</code>, <code>Regexp::MULTILINE</code>, and
|
||
* <code>Regexp::NEGATED</code>, <em>or</em>-ed together. Otherwise, if
|
||
* <i>options</i> is not <code>nil</code>, the regexp will be case
|
||
* insensitive. When the <i>lang</i> parameter is `n' or `N' sets the regexp
|
||
* no encoding.
|
||
*
|
||
* r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
|
||
* r2 = Regexp.new('cat', true) #=> /cat/i
|
||
... | ... | |
* Regexp.union("a+b*c") #=> /a\+b\*c/
|
||
* Regexp.union("skiing", "sledding") #=> /skiing|sledding/
|
||
* Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/
|
||
* Regexp.union(/dogs/, /cats/i) #=> /(?-mix:dogs)|(?i-mx:cats)/
|
||
* Regexp.union(/dogs/, /cats/i) #=> /(?-mixv:dogs)|(?i-mxv:cats)/
|
||
*/
|
||
static VALUE
|
||
rb_reg_s_union_m(VALUE self, VALUE args)
|
||
... | ... | |
/* see Regexp.options and Regexp.new */
|
||
rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
|
||
/* see Regexp.options and Regexp.new */
|
||
rb_define_const(rb_cRegexp, "NEGATED", INT2FIX(ONIG_OPTION_NEGATE));
|
||
/* see Regexp.options and Regexp.new */
|
||
rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
|
||
/* see Regexp.options and Regexp.new */
|
||
rb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED));
|
regcomp.c | ||
---|---|---|
if (r) return r;
|
||
for (i = 0; i < n; i++) {
|
||
if (IS_NEGATE(reg->options)) tlen++; /* for OP_NEGATE added below */
|
||
r = add_opcode_rel_addr(reg, OP_PUSH,
|
||
(n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
|
||
if (r) return r;
|
||
r = compile_tree(qn->target, reg);
|
||
if (r) return r;
|
||
if (IS_NEGATE(reg->options)) {
|
||
r = add_opcode(reg, OP_NEGATE);
|
||
if (r) return r;
|
||
}
|
||
}
|
||
}
|
||
else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
||
... | ... | |
fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
|
||
switch (NENCLOSE(node)->type) {
|
||
case ENCLOSE_OPTION:
|
||
fprintf(f, "option:%d\n", NENCLOSE(node)->option);
|
||
print_indent_tree(f, NENCLOSE(node)->target, indent + add);
|
||
fprintf(f, "option:%d", NENCLOSE(node)->option);
|
||
break;
|
||
case ENCLOSE_MEMORY:
|
||
fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
|
regexec.c | ||
---|---|---|
goto finish;
|
||
break;
|
||
case OP_NEGATE:
|
||
STACK_POP_ONE;
|
||
best_len = ONIG_MISMATCH_FROM_NEGATE;
|
||
/* fall */
|
||
fail:
|
||
MOP_OUT;
|
||
/* fall */
|
||
... | ... | |
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||
#define MATCH_AND_RETURN_CHECK(upper_range) \
|
||
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
|
||
if (r != ONIG_MISMATCH) {\
|
||
if (r == ONIG_MISMATCH_FROM_NEGATE)\
|
||
goto mismatch;\
|
||
else if (r != ONIG_MISMATCH) {\
|
||
if (r >= 0) {\
|
||
if (! IS_FIND_LONGEST(reg->options)) {\
|
||
goto match;\
|
||
... | ... | |
#else
|
||
#define MATCH_AND_RETURN_CHECK(upper_range) \
|
||
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
|
||
if (r != ONIG_MISMATCH) {\
|
||
if (r == ONIG_MISMATCH_FROM_NEGATE)\
|
||
goto mismatch_no_msa;\
|
||
else if (r != ONIG_MISMATCH) {\
|
||
if (r >= 0) {\
|
||
goto match;\
|
||
}\
|
||
... | ... | |
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||
#define MATCH_AND_RETURN_CHECK(none) \
|
||
r = match_at(reg, str, end, s, prev, &msa);\
|
||
if (r != ONIG_MISMATCH) {\
|
||
if (r == ONIG_MISMATCH_FROM_NEGATE)\
|
||
goto mismatch;\
|
||
else if (r != ONIG_MISMATCH) {\
|
||
if (r >= 0) {\
|
||
if (! IS_FIND_LONGEST(reg->options)) {\
|
||
goto match;\
|
||
... | ... | |
#else
|
||
#define MATCH_AND_RETURN_CHECK(none) \
|
||
r = match_at(reg, str, end, s, prev, &msa);\
|
||
if (r != ONIG_MISMATCH) {\
|
||
if (r == ONIG_MISMATCH_FROM_NEGATE)\
|
||
goto mismatch_no_msa;\
|
||
else if (r != ONIG_MISMATCH) {\
|
||
if (r >= 0) {\
|
||
goto match;\
|
||
}\
|
||
... | ... | |
if (r != ONIG_MISMATCH)
|
||
fprintf(stderr, "onig_search: error %d\n", r);
|
||
#endif
|
||
return r;
|
||
goto negate;
|
||
mismatch_no_msa:
|
||
r = ONIG_MISMATCH;
|
||
... | ... | |
if (r != ONIG_MISMATCH)
|
||
fprintf(stderr, "onig_search: error %d\n", r);
|
||
#endif
|
||
return r;
|
||
goto negate;
|
||
match:
|
||
ONIG_STATE_DEC_THREAD(reg);
|
||
MATCH_ARG_FREE(msa);
|
||
return s - str;
|
||
r = s - str;
|
||
/* fall */
|
||
negate:
|
||
if (r >= ONIG_MISMATCH && IS_NEGATE(reg->options))
|
||
return r == ONIG_MISMATCH ? ONIG_NORMAL : ONIG_MISMATCH;
|
||
return r;
|
||
}
|
||
extern OnigEncoding
|
regint.h | ||
---|---|---|
#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
|
||
#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
|
||
#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
|
||
#define IS_NEGATE(option) ((option) & ONIG_OPTION_NEGATE)
|
||
#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
|
||
#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
|
||
#define IS_FIND_CONDITION(option) ((option) & \
|
||
... | ... | |
OP_MEMORY_END,
|
||
OP_MEMORY_END_REC, /* push marker to stack */
|
||
OP_NEGATE, /* pop stack twice and move */
|
||
OP_FAIL, /* pop stack and move */
|
||
OP_JUMP,
|
||
OP_PUSH,
|
regparse.c | ||
---|---|---|
UChar** src, UChar* end, ScanEnv* env);
|
||
static int
|
||
set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env);
|
||
static int
|
||
parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
|
||
ScanEnv* env)
|
||
{
|
||
... | ... | |
#ifdef USE_POSIXLINE_OPTION
|
||
case 'p':
|
||
#endif
|
||
case '-': case 'i': case 'm': case 's': case 'x':
|
||
case '-': case 'i': case 'm': case 's': case 'x': case 'v':
|
||
{
|
||
int neg = 0;
|
||
... | ... | |
case '-': neg = 1; break;
|
||
case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
|
||
case 'v': ONOFF(option, ONIG_OPTION_NEGATE, neg); break;
|
||
case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
|
||
case 's':
|
||
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
|
||
... | ... | |
if (r < 0) return r;
|
||
*np = node_new_option(option);
|
||
CHECK_NULL_RETURN_MEMERR(*np);
|
||
/* expand "(?v:r)" into "(?:rN)?" where "N" is OP_NEGATE */
|
||
/* NOTE: OP_NEGATE is emitted in compile_quantifier_node() */
|
||
if (IS_NEGATE(option)) {
|
||
Node *tmp;
|
||
int tmpr;
|
||
tmp = node_new_quantifier(0, 1, 0);
|
||
CHECK_NULL_RETURN_MEMERR(tmp);
|
||
tmpr = set_quantifier(tmp, target, 1, env);
|
||
if (tmpr < 0) {
|
||
onig_node_free(tmp);
|
||
return tmpr;
|
||
}
|
||
target = tmp;
|
||
}
|
||
NENCLOSE(*np)->target = target;
|
||
/* append ".*?" to "(?:rN)?" where "N" is OP_NEGATE */
|
||
if (IS_NEGATE(option)) {
|
||
Node *tmp, *tmp_qtfr, *tmp_any;
|
||
int tmpr;
|
||
/* build "." */
|
||
tmp_any = node_new_anychar();
|
||
CHECK_NULL_RETURN_MEMERR(tmp_any);
|
||
/* build "*?" */
|
||
tmp_qtfr = node_new_quantifier(0, REPEAT_INFINITE, 0);
|
||
CHECK_NULL_RETURN_MEMERR(tmp_qtfr);
|
||
NQTFR(tmp_qtfr)->greedy = 0;
|
||
/* join "." to "*?" */
|
||
tmpr = set_quantifier(tmp_qtfr, tmp_any, 1, env);
|
||
if (tmpr < 0) {
|
||
onig_node_free(tmp_any);
|
||
onig_node_free(tmp_qtfr);
|
||
return tmpr;
|
||
}
|
||
/* append ".*?" */
|
||
*np = node_new_list(*np, NULL);
|
||
if (IS_NULL(*np)) {
|
||
onig_node_free(tmp_any);
|
||
onig_node_free(tmp_qtfr);
|
||
return ONIGERR_MEMORY;
|
||
}
|
||
tmp = NCDR(*np) = node_new_list(tmp_qtfr, NULL);
|
||
if (IS_NULL(tmp)) {
|
||
onig_node_free(tmp_any);
|
||
onig_node_free(tmp_qtfr);
|
||
return ONIGERR_MEMORY;
|
||
}
|
||
np = &(NCAR(tmp));
|
||
}
|
||
*src = p;
|
||
return 0;
|
||
}
|
test/ruby/test_regexp.rb | ||
---|---|---|
end
|
||
def test_to_s
|
||
assert_equal '(?-mix:\x00)', Regexp.new("\0").to_s
|
||
assert_equal '(?-mixv:\x00)', Regexp.new("\0").to_s
|
||
end
|
||
def test_union
|
||
... | ... | |
end
|
||
def test_to_s2
|
||
assert_equal('(?-mix:foo)', /(?:foo)/.to_s)
|
||
assert_equal('(?m-ix:foo)', /(?:foo)/m.to_s)
|
||
assert_equal('(?mi-x:foo)', /(?:foo)/mi.to_s)
|
||
assert_equal('(?mix:foo)', /(?:foo)/mix.to_s)
|
||
assert_equal('(?m-ix:foo)', /(?m-ix:foo)/.to_s)
|
||
assert_equal('(?mi-x:foo)', /(?mi-x:foo)/.to_s)
|
||
assert_equal('(?mix:foo)', /(?mix:foo)/.to_s)
|
||
assert_equal('(?mix:)', /(?mix)/.to_s)
|
||
assert_equal('(?-mix:(?mix:foo) )', /(?mix:foo) /.to_s)
|
||
assert_equal('(?-mixv:foo)', /(?:foo)/.to_s)
|
||
assert_equal('(?m-ixv:foo)', /(?:foo)/m.to_s)
|
||
assert_equal('(?mi-xv:foo)', /(?:foo)/mi.to_s)
|
||
assert_equal('(?mix-v:foo)', /(?:foo)/mix.to_s)
|
||
assert_equal('(?mixv:foo)', /(?:foo)/mixv.to_s)
|
||
assert_equal('(?m-ixv:foo)', /(?m-ixv:foo)/.to_s)
|
||
assert_equal('(?mi-xv:foo)', /(?mi-xv:foo)/.to_s)
|
||
assert_equal('(?mix-v:foo)', /(?mix-v:foo)/.to_s)
|
||
assert_equal('(?mixv:foo)', /(?mixv:foo)/.to_s)
|
||
assert_equal('(?mixv:)', /(?mixv)/.to_s)
|
||
assert_equal('(?-mixv:(?mixv:foo) )', /(?mixv:foo) /.to_s)
|
||
end
|
||
def test_casefold_p
|
||
... | ... | |
def test_options
|
||
assert_equal(Regexp::IGNORECASE, /a/i.options)
|
||
assert_equal(Regexp::EXTENDED, /a/x.options)
|
||
assert_equal(Regexp::NEGATED, /a/v.options)
|
||
assert_equal(Regexp::MULTILINE, /a/m.options)
|
||
end
|
||
... | ... | |
assert_equal(/foo/, Regexp.union(/foo/))
|
||
assert_equal(/foo/, Regexp.union([/foo/]))
|
||
assert_equal(/\t/, Regexp.union("\t"))
|
||
assert_equal(/(?-mix:\u3042)|(?-mix:\u3042)/, Regexp.union(/\u3042/, /\u3042/))
|
||
assert_equal(/(?-mixv:\u3042)|(?-mixv:\u3042)/, Regexp.union(/\u3042/, /\u3042/))
|
||
assert_equal("\u3041", "\u3041"[Regexp.union(/\u3042/, "\u3041")])
|
||
end
|
||
... | ... | |
assert_match(/invalid hex escape/, error.message)
|
||
assert_equal(1, error.message.scan(/.*invalid .*escape.*/i).size, bug3539)
|
||
end
|
||
def test_negated_regexp_creation
|
||
assert_nothing_raised { eval("/ruby/v") }
|
||
assert_nothing_raised { eval("/(?v:ruby)/") }
|
||
assert_nothing_raised { eval("/(?-v:ruby)/") }
|
||
negated = Regexp.new("ruby", Regexp::NEGATED)
|
||
assert_equal(/ruby/v, negated)
|
||
assert_equal(/ruby/v, Regexp.new(negated))
|
||
assert_equal(/ruby/v, Regexp.new(Regexp.new(negated)))
|
||
assert_equal(/(?v-mix:ruby)/, Regexp.new(negated.to_s))
|
||
end
|
||
def test_negated_regexp_matching
|
||
assert_match(/ruby/, "ruby")
|
||
assert_match(/ruby/, "rubyperl")
|
||
assert_match(/ruby/, "perlruby")
|
||
assert_no_match(/ruby/, "perl")
|
||
assert_match(/(?-v:ruby)/, "ruby")
|
||
assert_match(/(?-v:ruby)/, "rubyperl")
|
||
assert_match(/(?-v:ruby)/, "perlruby")
|
||
assert_no_match(/(?-v:ruby)/, "perl")
|
||
assert_no_match(/ruby/v, "ruby")
|
||
assert_no_match(/ruby/v, "rubyperl")
|
||
assert_no_match(/ruby/v, "perlruby")
|
||
assert_match(/ruby/v, "perl")
|
||
assert_no_match(/(?v:ruby)/, "ruby")
|
||
assert_no_match(/(?v:ruby)/, "rubyperl")
|
||
assert_match(/(?v:ruby)/, "perlruby")
|
||
assert_match(/(?v:ruby)/, "perl")
|
||
assert_no_match(/a(?v:b)c/, "abc")
|
||
assert_match(/a(?v:b)c/, "ac")
|
||
assert_match(/a(?v:b)c/, "axc")
|
||
assert_match(/a(?v:b)c/, "axbc")
|
||
assert_match(/a(?v:b)c/, "axbcbc")
|
||
languages = %w[ruby perl python lisp smalltalk]
|
||
assert_equal %w[perl lisp smalltalk], languages.grep(/l/)
|
||
assert_equal %w[ruby python], languages.grep(/l/v)
|
||
assert_equal %w[ruby perl python smalltalk], languages.grep(/(?v:l)/)
|
||
end
|
||
end
|