Project

General

Profile

Feature #9098 ยป dedent_heredoc.patch

bjmllr (Ben Miller), 11/28/2015 01:26 AM

View differences:

doc/syntax/literals.rdoc
always treated as if it is flush left. If you indent the content those spaces
will appear in the output.
To have indented content as well as an indented closing identifier, you can use
a "squiggly" heredoc, which uses a "~" instead of a "-" after <tt><<</tt>:
expected_result = <<~SQUIGGLY_HEREDOC
This would contain specially formatted text.
That might span many lines
SQUIGGLY_HEREDOC
The indentation of the least-indented line will be removed from each line of
the content. Note that empty lines and lines consisting solely of literal tabs
and spaces will be ignored for the purposes of determining indentation, but
escaped tabs and spaces are considered non-indentation characters.
If both tabs and spaces are used as indentation in the same heredoc,
tabs are considered as equal to 8 spaces. If the indentation of the
least-indented line falls in the middle of a leading tab, only
indentation to the left of that tab will be removed.
A heredoc allows interpolation and escaped characters. You may disable
interpolation and escaping by surrounding the opening identifier with single
quotes:
parse.y
int toksiz;
int tokline;
int heredoc_end;
int heredoc_indent;
int heredoc_line_indent;
char *tokenbuf;
NODE *deferred_nodes;
struct local_vars *lvtbl;
......
#define lex_p (parser->lex.pcur)
#define lex_pend (parser->lex.pend)
#define heredoc_end (parser->heredoc_end)
#define heredoc_indent (parser->heredoc_indent)
#define heredoc_line_indent (parser->heredoc_line_indent)
#define command_start (parser->command_start)
#define deferred_nodes (parser->deferred_nodes)
#define lex_gets_ptr (parser->lex.gets_ptr)
......
static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match);
#define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match))
static NODE *parser_heredoc_dedent(struct parser_params*,NODE*);
# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
#define get_id(id) (id)
#define get_value(val) (val)
#else
......
#define new_defined(expr) dispatch1(defined, (expr))
static VALUE parser_heredoc_dedent_ripper(struct parser_params*,VALUE);
# define heredoc_dedent_ripper(str) parser_heredoc_dedent_ripper(parser, (str))
#define FIXME 0
#endif /* RIPPER */
......
else {
node = evstr2dstr(node);
}
heredoc_indent = 0;
$$ = node;
/*%
$$ = $1;
......
string1 : tSTRING_BEG string_contents tSTRING_END
{
/*%%%*/
$$ = $2;
$$ = heredoc_dedent($2);
/*%
$$ = dispatch1(string_literal, $2);
$$ = dispatch1(string_literal,
heredoc_dedent_ripper($2));
%*/
}
;
......
break;
}
}
$$ = node;
$$ = heredoc_dedent(node);
/*%
$$ = dispatch1(xstring_literal, $2);
$$ = dispatch1(xstring_literal,
heredoc_dedent_ripper($2));
%*/
}
;
......
$<num>$ = brace_nest;
brace_nest = 0;
}
{
$<num>$ = heredoc_indent;
heredoc_indent = 0;
}
compstmt tSTRING_DEND
{
cond_stack = $<val>1;
......
lex_strterm = $<node>3;
lex_state = $<num>4;
brace_nest = $<num>5;
heredoc_indent = $<num>6;
/*%%%*/
if ($6) $6->flags &= ~NODE_FL_NEWLINE;
$$ = new_evstr($6);
if ($7) $7->flags &= ~NODE_FL_NEWLINE;
$$ = new_evstr($7);
/*%
$$ = dispatch1(string_embexpr, $6);
$$ = dispatch1(string_embexpr, $7);
%*/
}
;
......
#define STR_FUNC_SYMBOL 0x10
#define STR_FUNC_INDENT 0x20
#define STR_FUNC_LABEL 0x40
#define STR_FUNC_DEDENT 0x80
enum string_type {
str_label = STR_FUNC_LABEL,
......
} while (0)
while ((c = nextc()) != -1) {
if (heredoc_indent > 0) {
if (heredoc_line_indent == -1) {
if (c == '\n') heredoc_line_indent = 0;
} else {
if (c == ' ') {
heredoc_line_indent++;
} else if (c == '\t') {
heredoc_line_indent += 8;
} else if (c != '\n') {
if (heredoc_indent > heredoc_line_indent) {
heredoc_indent = heredoc_line_indent;
}
heredoc_line_indent = -1;
}
}
}
if (paren && c == paren) {
++*nest;
}
......
if (c == '-') {
c = nextc();
func = STR_FUNC_INDENT;
} else if (c == '~') {
c = nextc();
func = STR_FUNC_INDENT;
heredoc_indent = INT_MAX;
heredoc_line_indent = 0;
}
switch (c) {
case '\'':
......
default:
if (!parser_is_identchar()) {
pushback(c);
if (func & STR_FUNC_INDENT) {
if (heredoc_indent > 0) {
pushback('~');
} else if (func & STR_FUNC_INDENT) {
pushback('-');
}
return 0;
......
ripper_flush(parser);
}
static VALUE
parser_heredoc_dedent_string(struct parser_params *parser, VALUE input,
long *count_indent, long *copy_indent)
{
long len, out_len;
char *str, *p, *out_p, *end;
VALUE output;
len = RSTRING_LEN(input);
out_len = 0;
str = RSTRING_PTR(input);
end = &str[len];
p = str;
while (p < end) {
while (p < end && *count_indent > 0) {
if (*p == ' ') {
p++;
(*count_indent)--;
} else if (*p == '\t' && *count_indent >= 8) {
p++;
*count_indent -= 8;
} else if (*p == '\t' && heredoc_indent % 8) {
/* Inconsistent indentation requires us to back up to the
previous tab stop */
heredoc_indent = heredoc_indent - (heredoc_indent % 8);
*count_indent = *copy_indent = heredoc_indent;
return parser_heredoc_dedent_string(parser, input,
count_indent, copy_indent);
} else {
break;
}
}
for (; p < end && *p != '\n'; p++) out_len++;
if (p < end && *p == '\n') {
*count_indent = heredoc_indent;
out_len++;
p++;
}
}
output = rb_str_new(0, out_len);
out_p = RSTRING_PTR(output);
p = str;
while (p < end) {
while (p < end && *copy_indent > 0) {
if (*p == ' ') {
p++;
(*copy_indent)--;
} else if (*p == '\t' && *copy_indent >= 8) {
p++;
*copy_indent -= 8;
} else {
break;
}
}
while (p < end && *p != '\n') *out_p++ = *p++;
if (p < end && *p == '\n') {
*copy_indent = heredoc_indent;
*out_p++ = *p++;
}
}
return output;
}
#ifndef RIPPER
static NODE *
parser_heredoc_dedent(struct parser_params *parser, NODE *root)
{
long count_indent, copy_indent;
VALUE output;
NODE *node, *str_node;
if (heredoc_indent <= 0) return root;
node = str_node = root;
count_indent = copy_indent = heredoc_indent;
while (str_node) {
output = parser_heredoc_dedent_string(parser, str_node->nd_lit,
&count_indent, &copy_indent);
dispose_string(str_node->nd_lit);
str_node->nd_lit = output;
str_node = 0;
while (node = node->nd_next) {
if (nd_type(node) != NODE_ARRAY) break;
if (nd_type(node->nd_head) == NODE_STR ||
nd_type(node->nd_head) == NODE_DSTR) {
str_node = node->nd_head;
break;
}
}
}
return root;
}
#else /* RIPPER */
static VALUE
parser_heredoc_dedent_ripper(struct parser_params *parser, VALUE array)
{
long count_indent, copy_indent, array_len, i;
VALUE e, sym, ret;
if (heredoc_indent <= 0) return array;
count_indent = copy_indent = heredoc_indent;
array_len = RARRAY_LEN(array);
for (i = 0; i < array_len; i++) {
e = rb_ary_entry(array, i);
if (TYPE(e) == T_ARRAY && TYPE(sym = rb_ary_entry(e, 0)) == T_SYMBOL) {
if (rb_to_id(sym) != rb_intern("string_content") &&
rb_to_id(sym) != rb_intern("@tstring_content")) continue;
ret = parser_heredoc_dedent_string(parser, rb_ary_entry(e, 1),
&count_indent, &copy_indent);
rb_ary_store(e, 1, ret);
} else if (TYPE(e) == T_STRING) {
ret = parser_heredoc_dedent_string(parser, e,
&count_indent, &copy_indent);
rb_ary_store(array, i, ret);
}
}
return array;
}
#endif
static int
parser_whole_match_p(struct parser_params *parser,
const char *eos, long len, int indent)
test/ripper/test_ripper.rb
assert_predicate @ripper, :yydebug
end
def test_squiggly_heredoc
assert_equal(Ripper.sexp(<<-eos), Ripper.sexp(<<-eos))
<<-eot
asdf
eot
eos
<<~eot
asdf
eot
eos
end
def test_squiggly_heredoc_with_interpolated_expression
sexp1 = Ripper.sexp(<<-eos)
<<-eot
a\#{1}z
eot
eos
sexp2 = Ripper.sexp(<<-eos)
<<~eot
a\#{1}z
eot
eos
pos = lambda do |s|
s.fetch(1).fetch(0).fetch(1).fetch(2).fetch(1).fetch(0).fetch(2)
end
assert_not_equal pos[sexp1], pos[sexp2]
pos[sexp1].clear
pos[sexp2].clear
assert_equal sexp1, sexp2
end
end if ripper_test
test/ruby/test_syntax.rb
assert_equal(expected, actual, "#{Bug7559}: ")
end
def test_dedented_heredoc_without_indentation
assert_equal(" y\nz\n", <<~eos)
y
z
eos
end
def test_dedented_heredoc_with_indentation
assert_equal(" a\nb\n", <<~eos)
a
b
eos
end
def test_dedented_heredoc_with_blank_less_indented_line
# the blank line has two leading spaces
assert_equal("a\n\nb\n", <<~eos)
a
b
eos
end
def test_dedented_heredoc_with_blank_less_indented_line_escaped
assert_equal(" a\n \n b\n", <<~eos)
a
\ \
b
eos
end
def test_dedented_heredoc_with_blank_more_indented_line
# the blank line has six leading spaces
assert_equal("a\n \nb\n", <<~eos)
a
b
eos
end
def test_dedented_heredoc_with_blank_more_indented_line_escaped
assert_equal(" a\n \n b\n", <<~eos)
a
\ \ \ \ \ \
b
eos
end
def test_dedented_heredoc_with_empty_line
assert_equal(<<-eos, <<~eos)
This would contain specially formatted text.
That might span many lines
eos
This would contain specially formatted text.
That might span many lines
eos
end
def test_dedented_heredoc_with_interpolated_expression
assert_equal(<<-eos, <<~eos)
#{1}a
zy
eos
#{1}a
zy
eos
end
def test_dedented_heredoc_with_interpolated_string
w = ""
assert_equal(<<-eos, <<~eos)
#{w} a
zy
eos
#{w} a
zy
eos
end
def test_dedented_heredoc_mixing_tab_with_space
assert_equal(<<-eos, <<~eos)
16 spaces
2 tabs
eos
16 spaces
2 tabs
eos
end
def test_dedented_heredoc_with_inconsistent_indentation_preserves_tab
assert_equal(<<-eos, <<~eos)
2 tabs
10 spaces
eos
2 tabs
10 spaces
eos
end
def test_lineno_after_heredoc
bug7559 = '[ruby-dev:46737]'
expected, _, actual = __LINE__, <<eom, __LINE__
    (1-1/1)