Project

General

Profile

Feature #1831 ยป ruby-core-24593.patch

naruse (Yui NARUSE), 07/31/2009 08:00 AM

View differences:

include/ruby/oniguruma.h
/* syntax (behavior) warning */
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
#define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */
/* meta character specifiers (onig_set_meta_char()) */
#define ONIG_META_CHAR_ESCAPE 0
regparse.c
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
ONIG_SYN_WARN_CC_DUP |
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
, ONIG_OPTION_NONE
,
......
onig_verb_warn = f;
}
static void CC_DUP_WARN(ScanEnv *env);
static void
bbuf_free(BBuf* bbuf)
{
......
(OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
......
} while (0)
#define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \
BS_ROOM(bs, pos) |= BS_BIT(pos); \
} while (0)
#define BITSET_IS_EMPTY(bs,empty) do {\
int i;\
empty = 1;\
......
} while (0)
static void
bitset_set_range(BitSetRef bs, int from, int to)
bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
{
int i;
for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
BITSET_SET_BIT(bs, i);
BITSET_SET_BIT_CHKDUP(bs, i);
}
}
......
}
static int
add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
{
int r, inc_n, pos;
int low, high, bound, x;
......
if (inc_n != 1) {
if (from > data[low*2])
from = data[low*2];
else CC_DUP_WARN(env);
if (to < data[(high - 1)*2 + 1])
to = data[(high - 1)*2 + 1];
else CC_DUP_WARN(env);
}
if (inc_n != 0 && (OnigCodePoint )high < n) {
......
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
}
return add_code_range_to_buf(pbuf, from, to);
return add_code_range_to_buf(pbuf, env, from, to);
}
static int
not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
{
int r, i, n;
OnigCodePoint pre, from, *data, to = 0;
......
from = data[i*2];
to = data[i*2+1];
if (pre <= from - 1) {
r = add_code_range_to_buf(pbuf, pre, from - 1);
r = add_code_range_to_buf(pbuf, env, pre, from - 1);
if (r != 0) return r;
}
if (to == ~((OnigCodePoint )0)) break;
pre = to + 1;
}
if (to < ~((OnigCodePoint )0)) {
r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0));
}
return r;
}
......
static int
or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
BBuf* bbuf2, int not2, BBuf** pbuf)
BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
{
int r;
OnigCodePoint i, n1, *data1;
......
return bbuf_clone(pbuf, bbuf2);
}
else {
return not_code_range_buf(enc, bbuf2, pbuf);
return not_code_range_buf(enc, bbuf2, pbuf, env);
}
}
}
......
r = bbuf_clone(pbuf, bbuf2);
}
else if (not1 == 0) { /* 1 OR (not 2) */
r = not_code_range_buf(enc, bbuf2, pbuf);
r = not_code_range_buf(enc, bbuf2, pbuf, env);
}
if (r != 0) return r;
for (i = 0; i < n1; i++) {
from = data1[i*2];
to = data1[i*2+1];
r = add_code_range_to_buf(pbuf, from, to);
r = add_code_range_to_buf(pbuf, env, from, to);
if (r != 0) return r;
}
return 0;
}
static int
and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
OnigCodePoint* data, int n)
{
int i, r;
......
else if (from2 <= to1) {
if (to2 < to1) {
if (from1 <= from2 - 1) {
r = add_code_range_to_buf(pbuf, from1, from2-1);
r = add_code_range_to_buf(pbuf, env, from1, from2-1);
if (r != 0) return r;
}
from1 = to2 + 1;
......
if (from1 > to1) break;
}
if (from1 <= to1) {
r = add_code_range_to_buf(pbuf, from1, to1);
r = add_code_range_to_buf(pbuf, env, from1, to1);
if (r != 0) return r;
}
return 0;
}
static int
and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
{
int r;
OnigCodePoint i, j, n1, n2, *data1, *data2;
......
if (to2 < from1) continue;
from = MAX(from1, from2);
to = MIN(to1, to2);
r = add_code_range_to_buf(pbuf, from, to);
r = add_code_range_to_buf(pbuf, env, from, to);
if (r != 0) return r;
}
}
......
for (i = 0; i < n1; i++) {
from1 = data1[i*2];
to1 = data1[i*2+1];
r = and_code_range1(pbuf, from1, to1, data2, n2);
r = and_code_range1(pbuf, env, from1, to1, data2, n2);
if (r != 0) return r;
}
}
......
}
static int
and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
{
OnigEncoding enc = env->enc;
int r, not1, not2;
BBuf *buf1, *buf2, *pbuf;
BitSetRef bsr1, bsr2;
......
if (! ONIGENC_IS_SINGLEBYTE(enc)) {
if (not1 != 0 && not2 != 0) {
r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
}
else {
r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
if (r == 0 && not1 != 0) {
BBuf *tbuf;
r = not_code_range_buf(enc, pbuf, &tbuf);
r = not_code_range_buf(enc, pbuf, &tbuf, env);
if (r != 0) {
bbuf_free(pbuf);
return r;
......
}
static int
or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
{
OnigEncoding enc = env->enc;
int r, not1, not2;
BBuf *buf1, *buf2, *pbuf;
BitSetRef bsr1, bsr2;
......
if (! ONIGENC_IS_SINGLEBYTE(enc)) {
if (not1 != 0 && not2 != 0) {
r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
}
else {
r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
if (r == 0 && not1 != 0) {
BBuf *tbuf;
r = not_code_range_buf(enc, pbuf, &tbuf);
r = not_code_range_buf(enc, pbuf, &tbuf, env);
if (r != 0) {
bbuf_free(pbuf);
return r;
......
}
}
static void
CC_DUP_WARN(ScanEnv *env)
{
UChar buf[WARN_BUFSIZE];
if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) &&
!((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
(env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
env->pattern, env->pattern_end,
(UChar* )"character class has duplicated range");
(*onig_warn)((char* )buf);
}
}
static UChar*
find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
UChar **next, OnigEncoding enc)
......
static int
add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
OnigEncoding enc ARG_UNUSED,
ScanEnv* env,
OnigCodePoint sb_out, const OnigCodePoint mbr[])
{
int i, r;
......
if (j >= sb_out) {
if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;
else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
r = add_code_range_to_buf(&(cc->mbuf), j,
r = add_code_range_to_buf(&(cc->mbuf), env, j,
ONIGENC_CODE_RANGE_TO(mbr, i));
if (r != 0) return r;
i++;
......
goto sb_end;
}
BITSET_SET_BIT(cc->bs, j);
BITSET_SET_BIT_CHKDUP(cc->bs, j);
}
}
sb_end:
for ( ; i < n; i++) {
r = add_code_range_to_buf(&(cc->mbuf),
r = add_code_range_to_buf(&(cc->mbuf), env,
ONIGENC_CODE_RANGE_FROM(mbr, i),
ONIGENC_CODE_RANGE_TO(mbr, i));
if (r != 0) return r;
......
if (j >= sb_out) {
goto sb_end2;
}
BITSET_SET_BIT(cc->bs, j);
BITSET_SET_BIT_CHKDUP(cc->bs, j);
}
prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
}
for (j = prev; j < sb_out; j++) {
BITSET_SET_BIT(cc->bs, j);
BITSET_SET_BIT_CHKDUP(cc->bs, j);
}
sb_end2:
......
for (i = 0; i < n; i++) {
if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
r = add_code_range_to_buf(&(cc->mbuf), prev,
r = add_code_range_to_buf(&(cc->mbuf), env, prev,
ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
if (r != 0) return r;
}
prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
}
if (prev < 0x7fffffff) {
r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
if (r != 0) return r;
}
}
......
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
if (r == 0) {
return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
return add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
}
else if (r != ONIG_NO_SUPPORT_CONFIG) {
return r;
......
if (not != 0) {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
BITSET_SET_BIT_CHKDUP(cc->bs, c);
}
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
else {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
BITSET_SET_BIT_CHKDUP(cc->bs, c);
}
}
break;
......
if (not != 0) {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
BITSET_SET_BIT_CHKDUP(cc->bs, c);
}
}
else {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
BITSET_SET_BIT_CHKDUP(cc->bs, c);
}
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
......
case ONIGENC_CTYPE_WORD:
if (not == 0) {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
}
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
......
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
&& ! ONIGENC_IS_CODE_WORD(enc, c))
BITSET_SET_BIT(cc->bs, c);
BITSET_SET_BIT_CHKDUP(cc->bs, c);
}
}
break;
......
if (*state == CCS_VALUE && *type != CCV_CLASS) {
if (*type == CCV_SB)
BITSET_SET_BIT(cc->bs, (int )(*vs));
BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
else if (*type == CCV_CODE_POINT) {
r = add_code_range(&(cc->mbuf), env, *vs, *vs);
if (r < 0) return r;
......
switch (*state) {
case CCS_VALUE:
if (*type == CCV_SB)
BITSET_SET_BIT(cc->bs, (int )(*vs));
BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
else if (*type == CCV_CODE_POINT) {
r = add_code_range(&(cc->mbuf), env, *vs, *vs);
if (r < 0) return r;
......
else
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
}
bitset_set_range(cc->bs, (int )*vs, (int )v);
bitset_set_range(env, cc->bs, (int )*vs, (int )v);
}
else {
r = add_code_range(&(cc->mbuf), env, *vs, v);
......
else
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
}
bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
if (r < 0) return r;
#if 0
......
r = parse_char_class(&anode, tok, &p, end, env);
if (r != 0) goto cc_open_err;
acc = NCCLASS(anode);
r = or_cclass(cc, acc, env->enc);
r = or_cclass(cc, acc, env);
onig_node_free(anode);
cc_open_err:
......
state = CCS_START;
if (IS_NOT_NULL(prev_cc)) {
r = and_cclass(prev_cc, cc, env->enc);
r = and_cclass(prev_cc, cc, env);
if (r != 0) goto err;
bbuf_free(cc->mbuf);
}
......
}
if (IS_NOT_NULL(prev_cc)) {
r = and_cclass(prev_cc, cc, env->enc);
r = and_cclass(prev_cc, cc, env);
if (r != 0) goto err;
bbuf_free(cc->mbuf);
cc = prev_cc;
......
if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);
else
add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
}
......
add_code_range(&(cc->mbuf), env, *to, *to);
}
else {
BITSET_SET_BIT(bs, *to);
BITSET_SET_BIT_CHKDUP(bs, *to);
}
}
#else
......
BITSET_CLEAR_BIT(bs, *to);
}
else
BITSET_SET_BIT(bs, *to);
BITSET_SET_BIT_CHKDUP(bs, *to);
}
}
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
regparse.h
int curr_max_regnum;
int has_recursion;
#endif
int warnings_flag;
} ScanEnv;
    (1-1/1)