Project

General

Profile

Feature #1493 ยป IS_LEX.patch

daz (Dave B), 05/20/2009 02:18 AM

View differences:

parse.y (working copy)
((id)&ID_SCOPE_MASK) == ID_CLASS))
enum lex_state_e {
EXPR_BEG, /* ignore newline, +/- is a sign. */
EXPR_END, /* newline significant, +/- is an operator. */
EXPR_ENDARG, /* ditto, and unbound braces. */
EXPR_ARG, /* newline significant, +/- is an operator. */
EXPR_CMDARG, /* newline significant, +/- is an operator. */
EXPR_MID, /* newline significant, +/- is an operator. */
EXPR_FNAME, /* ignore newline, no reserved words. */
EXPR_DOT, /* right after `.' or `::', no reserved words. */
EXPR_CLASS, /* immediate after `class', no here document. */
EXPR_VALUE /* alike EXPR_BEG but label is disallowed. */
EXPR_BEG = (1 << 0 ), /* ignore newline, +/- is a sign */
EXPR_VALUE = (1 << 1 ), /* as EXPR_BEG but label is disallowed */
EXPR_MID = (1 << 2 ), /* newline significant, +/- is an operator */
EXPR_CLASS = (1 << 3 ), /* immediately after 'class', no here document */
EXPR_ARG = (1 << 4 ), /* newline significant, +/- is an operator */
EXPR_CMDARG = (1 << 5 ), /* newline significant, +/- is an operator */
EXPR_END = (1 << 6 ), /* newline significant, +/- is an operator */
EXPR_ENDARG = (1 << 7 ), /* as EXPR_END, unbound braces */
EXPR_FNAME = (1 << 8 ), /* ignore newline, no reserved words */
EXPR_DOT = (1 << 9 ), /* immediately after '.' or '::', no reserved words */
/* examine combinations */
EXPR_BEG_ANY = ( EXPR_BEG | EXPR_VALUE | EXPR_MID | EXPR_CLASS ),
EXPR_ARG_ANY = ( EXPR_ARG | EXPR_CMDARG ),
EXPR_END_ANY = ( EXPR_END | EXPR_ENDARG )
};
#define IS_lex_state(ls) (lex_state & ( ls ))
typedef VALUE stack_type;
# define BITSTACK_PUSH(stack, n) (stack = (stack<<1)|((n)&1))
......
parser->enc = rb_enc_get(lex_lastline);
}
#define IS_ARG() (lex_state == EXPR_ARG || lex_state == EXPR_CMDARG)
#define IS_BEG() (lex_state == EXPR_BEG || lex_state == EXPR_MID || lex_state == EXPR_VALUE || lex_state == EXPR_CLASS)
static int
parser_yylex(struct parser_params *parser)
......
#endif
/* fall through */
case '\n':
switch (lex_state) {
case EXPR_BEG:
case EXPR_FNAME:
case EXPR_DOT:
case EXPR_CLASS:
case EXPR_VALUE:
if (IS_lex_state( EXPR_BEG | EXPR_VALUE | EXPR_CLASS | EXPR_FNAME | EXPR_DOT )) {
#ifdef RIPPER
if (!fallthru) {
ripper_dispatch_scan_event(parser, tIGNORED_NL);
......
fallthru = Qfalse;
#endif
goto retry;
default:
break;
}
while ((c = nextc())) {
switch (c) {
......
return tOP_ASGN;
}
pushback(c);
if (IS_ARG() && space_seen && !ISSPACE(c)) {
if (IS_lex_state( EXPR_ARG_ANY ) && space_seen && !ISSPACE(c)) {
rb_warning0("`*' interpreted as argument prefix");
c = tSTAR;
}
else if (IS_BEG()) {
else if (IS_lex_state( EXPR_BEG_ANY )) {
c = tSTAR;
}
else {
c = '*';
}
}
switch (lex_state) {
case EXPR_FNAME: case EXPR_DOT:
lex_state = EXPR_ARG; break;
default:
lex_state = EXPR_BEG; break;
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
}
else {
lex_state = EXPR_BEG;
}
return c;
case '!':
c = nextc();
if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) {
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
if (c == '@') {
return '!';
......
}
}
switch (lex_state) {
case EXPR_FNAME: case EXPR_DOT:
lex_state = EXPR_ARG; break;
default:
lex_state = EXPR_BEG; break;
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
}
else {
lex_state = EXPR_BEG;
}
if ((c = nextc()) == '=') {
if ((c = nextc()) == '=') {
return tEQQ;
......
case '<':
c = nextc();
if (c == '<' &&
lex_state != EXPR_END &&
lex_state != EXPR_DOT &&
lex_state != EXPR_ENDARG &&
lex_state != EXPR_CLASS &&
(!IS_ARG() || space_seen)) {
!IS_lex_state( EXPR_END_ANY | EXPR_CLASS | EXPR_DOT ) &&
(!IS_lex_state( EXPR_ARG_ANY ) || space_seen)) {
int token = heredoc_identifier();
if (token) return token;
}
switch (lex_state) {
case EXPR_FNAME: case EXPR_DOT:
lex_state = EXPR_ARG; break;
default:
lex_state = EXPR_BEG; break;
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
}
else {
lex_state = EXPR_BEG;
}
if (c == '=') {
if ((c = nextc()) == '>') {
return tCMP;
......
return '<';
case '>':
switch (lex_state) {
case EXPR_FNAME: case EXPR_DOT:
lex_state = EXPR_ARG; break;
default:
lex_state = EXPR_BEG; break;
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
}
else {
lex_state = EXPR_BEG;
}
if ((c = nextc()) == '=') {
return tGEQ;
}
......
return tSTRING_BEG;
case '`':
if (lex_state == EXPR_FNAME) {
if (IS_lex_state( EXPR_FNAME )) {
lex_state = EXPR_END;
return c;
}
if (lex_state == EXPR_DOT) {
if (IS_lex_state( EXPR_DOT )) {
if (cmd_state)
lex_state = EXPR_CMDARG;
else
......
return tSTRING_BEG;
case '?':
if (lex_state == EXPR_END || lex_state == EXPR_ENDARG) {
if (IS_lex_state( EXPR_END_ANY )) {
lex_state = EXPR_VALUE;
return '?';
}
......
return 0;
}
if (rb_enc_isspace(c, parser->enc)) {
if (!IS_ARG()) {
if (!IS_lex_state( EXPR_ARG_ANY )) {
int c2 = 0;
switch (c) {
case ' ':
......
return tOP_ASGN;
}
pushback(c);
if (IS_ARG() && space_seen && !ISSPACE(c)) {
if (IS_lex_state( EXPR_ARG_ANY ) && space_seen && !ISSPACE(c)) {
rb_warning0("`&' interpreted as argument prefix");
c = tAMPER;
}
else if (IS_BEG()) {
else if (IS_lex_state( EXPR_BEG_ANY )) {
c = tAMPER;
}
else {
c = '&';
}
switch (lex_state) {
case EXPR_FNAME: case EXPR_DOT:
lex_state = EXPR_ARG; break;
default:
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
}
else {
lex_state = EXPR_BEG;
}
return c;
......
lex_state = EXPR_BEG;
return tOP_ASGN;
}
if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) {
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
}
else {
......
case '+':
c = nextc();
if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) {
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
if (c == '@') {
return tUPLUS;
......
lex_state = EXPR_BEG;
return tOP_ASGN;
}
if (IS_BEG() ||
(IS_ARG() && space_seen && !ISSPACE(c))) {
if (IS_ARG()) arg_ambiguous();
if (IS_lex_state( EXPR_BEG_ANY ) ||
(IS_lex_state( EXPR_ARG_ANY ) && space_seen && !ISSPACE(c))) {
if (IS_lex_state( EXPR_ARG_ANY )) arg_ambiguous();
lex_state = EXPR_BEG;
pushback(c);
if (c != -1 && ISDIGIT(c)) {
......
case '-':
c = nextc();
if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) {
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
if (c == '@') {
return tUMINUS;
......
lex_state = EXPR_ARG;
return tLAMBDA;
}
if (IS_BEG() ||
(IS_ARG() && space_seen && !ISSPACE(c))) {
if (IS_ARG()) arg_ambiguous();
if (IS_lex_state( EXPR_BEG_ANY ) ||
(IS_lex_state( EXPR_ARG_ANY ) && space_seen && !ISSPACE(c))) {
if (IS_lex_state( EXPR_ARG_ANY )) arg_ambiguous();
lex_state = EXPR_BEG;
pushback(c);
if (c != -1 && ISDIGIT(c)) {
......
case ':':
c = nextc();
if (c == ':') {
if (IS_BEG() ||
lex_state == EXPR_CLASS || (IS_ARG() && space_seen)) {
if (IS_lex_state( EXPR_BEG_ANY ) || (IS_lex_state( EXPR_ARG_ANY ) && space_seen)) {
lex_state = EXPR_BEG;
return tCOLON3;
}
lex_state = EXPR_DOT;
return tCOLON2;
}
if (lex_state == EXPR_END || lex_state == EXPR_ENDARG || (c != -1 && ISSPACE(c))) {
if (IS_lex_state( EXPR_END_ANY ) || (c != -1 && ISSPACE(c))) {
pushback(c);
lex_state = EXPR_BEG;
return ':';
......
return tSYMBEG;
case '/':
if (IS_BEG()) {
if (IS_lex_state( EXPR_BEG_ANY )) {
lex_strterm = NEW_STRTERM(str_regexp, '/', 0);
return tREGEXP_BEG;
}
......
return tOP_ASGN;
}
pushback(c);
if (IS_ARG() && space_seen) {
if (IS_lex_state( EXPR_ARG_ANY ) && space_seen) {
if (!ISSPACE(c)) {
arg_ambiguous();
lex_strterm = NEW_STRTERM(str_regexp, '/', 0);
return tREGEXP_BEG;
}
}
switch (lex_state) {
case EXPR_FNAME: case EXPR_DOT:
lex_state = EXPR_ARG; break;
default:
lex_state = EXPR_BEG; break;
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
}
else {
lex_state = EXPR_BEG;
}
return '/';
case '^':
......
lex_state = EXPR_BEG;
return tOP_ASGN;
}
switch (lex_state) {
case EXPR_FNAME: case EXPR_DOT:
lex_state = EXPR_ARG; break;
default:
lex_state = EXPR_BEG; break;
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
}
else {
lex_state = EXPR_BEG;
}
pushback(c);
return '^';
......
return ',';
case '~':
if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) {
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
if ((c = nextc()) != '@') {
pushback(c);
}
......
return '~';
case '(':
if (IS_BEG()) {
if (IS_lex_state( EXPR_BEG_ANY )) {
c = tLPAREN;
}
else if (space_seen) {
if (IS_ARG()) {
if (IS_lex_state( EXPR_ARG_ANY )) {
c = tLPAREN_ARG;
}
}
......
case '[':
paren_nest++;
if (lex_state == EXPR_FNAME || lex_state == EXPR_DOT) {
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
if ((c = nextc()) == ']') {
if ((c = nextc()) == '=') {
......
pushback(c);
return '[';
}
else if (IS_BEG()) {
else if (IS_lex_state( EXPR_BEG_ANY )) {
c = tLBRACK;
}
else if (IS_ARG() && space_seen) {
else if (IS_lex_state( EXPR_ARG_ANY ) && space_seen) {
c = tLBRACK;
}
lex_state = EXPR_BEG;
......
--paren_nest;
return tLAMBEG;
}
if (IS_ARG() || lex_state == EXPR_END)
if (IS_lex_state( EXPR_ARG_ANY | EXPR_END ))
c = '{'; /* block (primary) */
else if (lex_state == EXPR_ENDARG)
else if (IS_lex_state( EXPR_ENDARG ))
c = tLBRACE_ARG; /* block (expr) */
else
c = tLBRACE; /* hash */
......
return '\\';
case '%':
if (IS_BEG()) {
if (IS_lex_state( EXPR_BEG_ANY )) {
int term;
int paren;
......
lex_state = EXPR_BEG;
return tOP_ASGN;
}
if (IS_ARG() && space_seen && !ISSPACE(c)) {
if (IS_lex_state( EXPR_ARG_ANY ) && space_seen && !ISSPACE(c)) {
goto quotation;
}
switch (lex_state) {
case EXPR_FNAME: case EXPR_DOT:
lex_state = EXPR_ARG; break;
default:
lex_state = EXPR_BEG; break;
if (IS_lex_state( EXPR_FNAME | EXPR_DOT )) {
lex_state = EXPR_ARG;
}
else {
lex_state = EXPR_BEG;
}
pushback(c);
return '%';
......
result = tFID;
}
else {
if (lex_state == EXPR_FNAME) {
if (IS_lex_state( EXPR_FNAME )) {
if ((c = nextc()) == '=' && !peek('~') && !peek('>') &&
(!peek('=') || (lex_p + 1 < lex_pend && lex_p[1] == '>'))) {
result = tIDENTIFIER;
......
}
}
if ((lex_state == EXPR_BEG && !cmd_state) ||
lex_state == EXPR_ARG ||
lex_state == EXPR_CMDARG) {
if ((IS_lex_state( EXPR_BEG ) && !cmd_state) ||
IS_lex_state( EXPR_ARG_ANY )) {
if (peek(':') && !(lex_p + 1 < lex_pend && lex_p[1] == ':')) {
lex_state = EXPR_BEG;
nextc();
......
return tLABEL;
}
}
if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
if (mb == ENC_CODERANGE_7BIT && !IS_lex_state( EXPR_DOT )) {
const struct kwtable *kw;
/* See if it is a reserved word. */
......
if (COND_P()) return keyword_do_cond;
if (CMDARG_P() && state != EXPR_CMDARG)
return keyword_do_block;
if (state == EXPR_ENDARG || state == EXPR_BEG)
if (state & ( EXPR_BEG | EXPR_ENDARG ))
return keyword_do_block;
return keyword_do;
}
if (state == EXPR_BEG || state == EXPR_VALUE)
if (state & ( EXPR_BEG | EXPR_VALUE ))
return kw->id[0];
else {
if (kw->id[0] != kw->id[1])
......
}
}
if (IS_BEG() ||
lex_state == EXPR_DOT ||
IS_ARG()) {
if (IS_lex_state( EXPR_BEG_ANY | EXPR_ARG_ANY | EXPR_DOT )) {
if (cmd_state) {
lex_state = EXPR_CMDARG;
}
    (1-1/1)