Feature #9098 ยป dedent_heredoc.patch
doc/syntax/literals.rdoc | ||
---|---|---|
196 | 196 |
always treated as if it is flush left. If you indent the content those spaces |
197 | 197 |
will appear in the output. |
198 | 198 | |
199 |
To have indented content as well as an indented closing identifier, you can use |
|
200 |
a "squiggly" heredoc, which uses a "~" instead of a "-" after <tt><<</tt>: |
|
201 | ||
202 |
expected_result = <<~SQUIGGLY_HEREDOC |
|
203 |
This would contain specially formatted text. |
|
204 | ||
205 |
That might span many lines |
|
206 |
SQUIGGLY_HEREDOC |
|
207 | ||
208 |
The indentation of the least-indented line will be removed from each line of |
|
209 |
the content. Note that empty lines and lines consisting solely of literal tabs |
|
210 |
and spaces will be ignored for the purposes of determining indentation, but |
|
211 |
escaped tabs and spaces are considered non-indentation characters. |
|
212 | ||
213 |
If both tabs and spaces are used as indentation in the same heredoc, |
|
214 |
tabs are considered as equal to 8 spaces. If the indentation of the |
|
215 |
least-indented line falls in the middle of a leading tab, only |
|
216 |
indentation to the left of that tab will be removed. |
|
217 | ||
199 | 218 |
A heredoc allows interpolation and escaped characters. You may disable |
200 | 219 |
interpolation and escaping by surrounding the opening identifier with single |
201 | 220 |
quotes: |
parse.y | ||
---|---|---|
255 | 255 |
int toksiz; |
256 | 256 |
int tokline; |
257 | 257 |
int heredoc_end; |
258 |
int heredoc_indent; |
|
259 |
int heredoc_line_indent; |
|
258 | 260 |
char *tokenbuf; |
259 | 261 |
NODE *deferred_nodes; |
260 | 262 |
struct local_vars *lvtbl; |
... | ... | |
345 | 347 |
#define lex_p (parser->lex.pcur) |
346 | 348 |
#define lex_pend (parser->lex.pend) |
347 | 349 |
#define heredoc_end (parser->heredoc_end) |
350 |
#define heredoc_indent (parser->heredoc_indent) |
|
351 |
#define heredoc_line_indent (parser->heredoc_line_indent) |
|
348 | 352 |
#define command_start (parser->command_start) |
349 | 353 |
#define deferred_nodes (parser->deferred_nodes) |
350 | 354 |
#define lex_gets_ptr (parser->lex.gets_ptr) |
... | ... | |
485 | 489 |
static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match); |
486 | 490 |
#define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match)) |
487 | 491 | |
492 |
static NODE *parser_heredoc_dedent(struct parser_params*,NODE*); |
|
493 |
# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str)) |
|
494 | ||
495 | ||
488 | 496 |
#define get_id(id) (id) |
489 | 497 |
#define get_value(val) (val) |
490 | 498 |
#else |
... | ... | |
668 | 676 | |
669 | 677 |
#define new_defined(expr) dispatch1(defined, (expr)) |
670 | 678 | |
679 |
static VALUE parser_heredoc_dedent_ripper(struct parser_params*,VALUE); |
|
680 |
# define heredoc_dedent_ripper(str) parser_heredoc_dedent_ripper(parser, (str)) |
|
681 | ||
671 | 682 |
#define FIXME 0 |
672 | 683 | |
673 | 684 |
#endif /* RIPPER */ |
... | ... | |
3885 | 3896 |
else { |
3886 | 3897 |
node = evstr2dstr(node); |
3887 | 3898 |
} |
3899 |
heredoc_indent = 0; |
|
3888 | 3900 |
$$ = node; |
3889 | 3901 |
/*% |
3890 | 3902 |
$$ = $1; |
... | ... | |
3907 | 3919 |
string1 : tSTRING_BEG string_contents tSTRING_END |
3908 | 3920 |
{ |
3909 | 3921 |
/*%%%*/ |
3910 |
$$ = $2;
|
|
3922 |
$$ = heredoc_dedent($2);
|
|
3911 | 3923 |
/*% |
3912 |
$$ = dispatch1(string_literal, $2); |
|
3924 |
$$ = dispatch1(string_literal, |
|
3925 |
heredoc_dedent_ripper($2)); |
|
3913 | 3926 |
%*/ |
3914 | 3927 |
} |
3915 | 3928 |
; |
... | ... | |
3934 | 3947 |
break; |
3935 | 3948 |
} |
3936 | 3949 |
} |
3937 |
$$ = node;
|
|
3950 |
$$ = heredoc_dedent(node);
|
|
3938 | 3951 |
/*% |
3939 |
$$ = dispatch1(xstring_literal, $2); |
|
3952 |
$$ = dispatch1(xstring_literal, |
|
3953 |
heredoc_dedent_ripper($2)); |
|
3940 | 3954 |
%*/ |
3941 | 3955 |
} |
3942 | 3956 |
; |
... | ... | |
4317 | 4331 |
$<num>$ = brace_nest; |
4318 | 4332 |
brace_nest = 0; |
4319 | 4333 |
} |
4334 |
{ |
|
4335 |
$<num>$ = heredoc_indent; |
|
4336 |
heredoc_indent = 0; |
|
4337 |
} |
|
4320 | 4338 |
compstmt tSTRING_DEND |
4321 | 4339 |
{ |
4322 | 4340 |
cond_stack = $<val>1; |
... | ... | |
4324 | 4342 |
lex_strterm = $<node>3; |
4325 | 4343 |
lex_state = $<num>4; |
4326 | 4344 |
brace_nest = $<num>5; |
4345 |
heredoc_indent = $<num>6; |
|
4327 | 4346 |
/*%%%*/ |
4328 |
if ($6) $6->flags &= ~NODE_FL_NEWLINE;
|
|
4329 |
$$ = new_evstr($6);
|
|
4347 |
if ($7) $7->flags &= ~NODE_FL_NEWLINE;
|
|
4348 |
$$ = new_evstr($7);
|
|
4330 | 4349 |
/*% |
4331 |
$$ = dispatch1(string_embexpr, $6);
|
|
4350 |
$$ = dispatch1(string_embexpr, $7);
|
|
4332 | 4351 |
%*/ |
4333 | 4352 |
} |
4334 | 4353 |
; |
... | ... | |
5693 | 5712 |
#define STR_FUNC_SYMBOL 0x10 |
5694 | 5713 |
#define STR_FUNC_INDENT 0x20 |
5695 | 5714 |
#define STR_FUNC_LABEL 0x40 |
5715 |
#define STR_FUNC_DEDENT 0x80 |
|
5696 | 5716 | |
5697 | 5717 |
enum string_type { |
5698 | 5718 |
str_label = STR_FUNC_LABEL, |
... | ... | |
6211 | 6231 |
} while (0) |
6212 | 6232 | |
6213 | 6233 |
while ((c = nextc()) != -1) { |
6234 |
if (heredoc_indent > 0) { |
|
6235 |
if (heredoc_line_indent == -1) { |
|
6236 |
if (c == '\n') heredoc_line_indent = 0; |
|
6237 |
} else { |
|
6238 |
if (c == ' ') { |
|
6239 |
heredoc_line_indent++; |
|
6240 |
} else if (c == '\t') { |
|
6241 |
heredoc_line_indent += 8; |
|
6242 |
} else if (c != '\n') { |
|
6243 |
if (heredoc_indent > heredoc_line_indent) { |
|
6244 |
heredoc_indent = heredoc_line_indent; |
|
6245 |
} |
|
6246 |
heredoc_line_indent = -1; |
|
6247 |
} |
|
6248 |
} |
|
6249 |
} |
|
6250 | ||
6214 | 6251 |
if (paren && c == paren) { |
6215 | 6252 |
++*nest; |
6216 | 6253 |
} |
... | ... | |
6471 | 6508 |
if (c == '-') { |
6472 | 6509 |
c = nextc(); |
6473 | 6510 |
func = STR_FUNC_INDENT; |
6511 |
} else if (c == '~') { |
|
6512 |
c = nextc(); |
|
6513 |
func = STR_FUNC_INDENT; |
|
6514 |
heredoc_indent = INT_MAX; |
|
6515 |
heredoc_line_indent = 0; |
|
6474 | 6516 |
} |
6475 | 6517 |
switch (c) { |
6476 | 6518 |
case '\'': |
... | ... | |
6495 | 6537 |
default: |
6496 | 6538 |
if (!parser_is_identchar()) { |
6497 | 6539 |
pushback(c); |
6498 |
if (func & STR_FUNC_INDENT) { |
|
6540 |
if (heredoc_indent > 0) { |
|
6541 |
pushback('~'); |
|
6542 |
} else if (func & STR_FUNC_INDENT) { |
|
6499 | 6543 |
pushback('-'); |
6500 | 6544 |
} |
6501 | 6545 |
return 0; |
... | ... | |
6541 | 6585 |
ripper_flush(parser); |
6542 | 6586 |
} |
6543 | 6587 | |
6588 |
static VALUE |
|
6589 |
parser_heredoc_dedent_string(struct parser_params *parser, VALUE input, |
|
6590 |
long *count_indent, long *copy_indent) |
|
6591 |
{ |
|
6592 |
long len, out_len; |
|
6593 |
char *str, *p, *out_p, *end; |
|
6594 |
VALUE output; |
|
6595 |
|
|
6596 |
len = RSTRING_LEN(input); |
|
6597 |
out_len = 0; |
|
6598 |
str = RSTRING_PTR(input); |
|
6599 |
end = &str[len]; |
|
6600 | ||
6601 |
p = str; |
|
6602 |
while (p < end) { |
|
6603 |
while (p < end && *count_indent > 0) { |
|
6604 |
if (*p == ' ') { |
|
6605 |
p++; |
|
6606 |
(*count_indent)--; |
|
6607 |
} else if (*p == '\t' && *count_indent >= 8) { |
|
6608 |
p++; |
|
6609 |
*count_indent -= 8; |
|
6610 |
} else if (*p == '\t' && heredoc_indent % 8) { |
|
6611 |
/* Inconsistent indentation requires us to back up to the |
|
6612 |
previous tab stop */ |
|
6613 |
heredoc_indent = heredoc_indent - (heredoc_indent % 8); |
|
6614 |
*count_indent = *copy_indent = heredoc_indent; |
|
6615 |
return parser_heredoc_dedent_string(parser, input, |
|
6616 |
count_indent, copy_indent); |
|
6617 |
} else { |
|
6618 |
break; |
|
6619 |
} |
|
6620 |
} |
|
6621 | ||
6622 |
for (; p < end && *p != '\n'; p++) out_len++; |
|
6623 |
if (p < end && *p == '\n') { |
|
6624 |
*count_indent = heredoc_indent; |
|
6625 |
out_len++; |
|
6626 |
p++; |
|
6627 |
} |
|
6628 |
} |
|
6629 | ||
6630 |
output = rb_str_new(0, out_len); |
|
6631 |
out_p = RSTRING_PTR(output); |
|
6632 | ||
6633 |
p = str; |
|
6634 |
while (p < end) { |
|
6635 |
while (p < end && *copy_indent > 0) { |
|
6636 |
if (*p == ' ') { |
|
6637 |
p++; |
|
6638 |
(*copy_indent)--; |
|
6639 |
} else if (*p == '\t' && *copy_indent >= 8) { |
|
6640 |
p++; |
|
6641 |
*copy_indent -= 8; |
|
6642 |
} else { |
|
6643 |
break; |
|
6644 |
} |
|
6645 |
} |
|
6646 | ||
6647 |
while (p < end && *p != '\n') *out_p++ = *p++; |
|
6648 |
if (p < end && *p == '\n') { |
|
6649 |
*copy_indent = heredoc_indent; |
|
6650 |
*out_p++ = *p++; |
|
6651 |
} |
|
6652 |
} |
|
6653 | ||
6654 |
return output; |
|
6655 |
} |
|
6656 | ||
6657 |
#ifndef RIPPER |
|
6658 |
static NODE * |
|
6659 |
parser_heredoc_dedent(struct parser_params *parser, NODE *root) |
|
6660 |
{ |
|
6661 |
long count_indent, copy_indent; |
|
6662 |
VALUE output; |
|
6663 |
NODE *node, *str_node; |
|
6664 | ||
6665 |
if (heredoc_indent <= 0) return root; |
|
6666 | ||
6667 |
node = str_node = root; |
|
6668 |
count_indent = copy_indent = heredoc_indent; |
|
6669 | ||
6670 |
while (str_node) { |
|
6671 |
output = parser_heredoc_dedent_string(parser, str_node->nd_lit, |
|
6672 |
&count_indent, ©_indent); |
|
6673 | ||
6674 |
dispose_string(str_node->nd_lit); |
|
6675 |
str_node->nd_lit = output; |
|
6676 | ||
6677 |
str_node = 0; |
|
6678 |
while (node = node->nd_next) { |
|
6679 |
if (nd_type(node) != NODE_ARRAY) break; |
|
6680 |
if (nd_type(node->nd_head) == NODE_STR || |
|
6681 |
nd_type(node->nd_head) == NODE_DSTR) { |
|
6682 |
str_node = node->nd_head; |
|
6683 |
break; |
|
6684 |
} |
|
6685 |
} |
|
6686 |
} |
|
6687 | ||
6688 |
return root; |
|
6689 |
} |
|
6690 |
#else /* RIPPER */ |
|
6691 |
static VALUE |
|
6692 |
parser_heredoc_dedent_ripper(struct parser_params *parser, VALUE array) |
|
6693 |
{ |
|
6694 |
long count_indent, copy_indent, array_len, i; |
|
6695 |
VALUE e, sym, ret; |
|
6696 | ||
6697 |
if (heredoc_indent <= 0) return array; |
|
6698 | ||
6699 |
count_indent = copy_indent = heredoc_indent; |
|
6700 | ||
6701 |
array_len = RARRAY_LEN(array); |
|
6702 |
for (i = 0; i < array_len; i++) { |
|
6703 |
e = rb_ary_entry(array, i); |
|
6704 |
if (TYPE(e) == T_ARRAY && TYPE(sym = rb_ary_entry(e, 0)) == T_SYMBOL) { |
|
6705 |
if (rb_to_id(sym) != rb_intern("string_content") && |
|
6706 |
rb_to_id(sym) != rb_intern("@tstring_content")) continue; |
|
6707 |
ret = parser_heredoc_dedent_string(parser, rb_ary_entry(e, 1), |
|
6708 |
&count_indent, ©_indent); |
|
6709 |
rb_ary_store(e, 1, ret); |
|
6710 |
} else if (TYPE(e) == T_STRING) { |
|
6711 |
ret = parser_heredoc_dedent_string(parser, e, |
|
6712 |
&count_indent, ©_indent); |
|
6713 |
rb_ary_store(array, i, ret); |
|
6714 |
} |
|
6715 |
} |
|
6716 | ||
6717 |
return array; |
|
6718 |
} |
|
6719 |
#endif |
|
6720 | ||
6544 | 6721 |
static int |
6545 | 6722 |
parser_whole_match_p(struct parser_params *parser, |
6546 | 6723 |
const char *eos, long len, int indent) |
test/ripper/test_ripper.rb | ||
---|---|---|
60 | 60 |
assert_predicate @ripper, :yydebug |
61 | 61 |
end |
62 | 62 | |
63 |
def test_squiggly_heredoc |
|
64 |
assert_equal(Ripper.sexp(<<-eos), Ripper.sexp(<<-eos)) |
|
65 |
<<-eot |
|
66 |
asdf |
|
67 |
eot |
|
68 |
eos |
|
69 |
<<~eot |
|
70 |
asdf |
|
71 |
eot |
|
72 |
eos |
|
73 |
end |
|
74 | ||
75 |
def test_squiggly_heredoc_with_interpolated_expression |
|
76 |
sexp1 = Ripper.sexp(<<-eos) |
|
77 |
<<-eot |
|
78 |
a\#{1}z |
|
79 |
eot |
|
80 |
eos |
|
81 | ||
82 |
sexp2 = Ripper.sexp(<<-eos) |
|
83 |
<<~eot |
|
84 |
a\#{1}z |
|
85 |
eot |
|
86 |
eos |
|
87 | ||
88 |
pos = lambda do |s| |
|
89 |
s.fetch(1).fetch(0).fetch(1).fetch(2).fetch(1).fetch(0).fetch(2) |
|
90 |
end |
|
91 |
assert_not_equal pos[sexp1], pos[sexp2] |
|
92 |
pos[sexp1].clear |
|
93 |
pos[sexp2].clear |
|
94 |
assert_equal sexp1, sexp2 |
|
95 |
end |
|
63 | 96 |
end if ripper_test |
test/ruby/test_syntax.rb | ||
---|---|---|
475 | 475 |
assert_equal(expected, actual, "#{Bug7559}: ") |
476 | 476 |
end |
477 | 477 | |
478 |
def test_dedented_heredoc_without_indentation |
|
479 |
assert_equal(" y\nz\n", <<~eos) |
|
480 |
y |
|
481 |
z |
|
482 |
eos |
|
483 |
end |
|
484 | ||
485 |
def test_dedented_heredoc_with_indentation |
|
486 |
assert_equal(" a\nb\n", <<~eos) |
|
487 |
a |
|
488 |
b |
|
489 |
eos |
|
490 |
end |
|
491 | ||
492 |
def test_dedented_heredoc_with_blank_less_indented_line |
|
493 |
# the blank line has two leading spaces |
|
494 |
assert_equal("a\n\nb\n", <<~eos) |
|
495 |
a |
|
496 |
|
|
497 |
b |
|
498 |
eos |
|
499 |
end |
|
500 | ||
501 |
def test_dedented_heredoc_with_blank_less_indented_line_escaped |
|
502 |
assert_equal(" a\n \n b\n", <<~eos) |
|
503 |
a |
|
504 |
\ \ |
|
505 |
b |
|
506 |
eos |
|
507 |
end |
|
508 | ||
509 |
def test_dedented_heredoc_with_blank_more_indented_line |
|
510 |
# the blank line has six leading spaces |
|
511 |
assert_equal("a\n \nb\n", <<~eos) |
|
512 |
a |
|
513 |
|
|
514 |
b |
|
515 |
eos |
|
516 |
end |
|
517 | ||
518 |
def test_dedented_heredoc_with_blank_more_indented_line_escaped |
|
519 |
assert_equal(" a\n \n b\n", <<~eos) |
|
520 |
a |
|
521 |
\ \ \ \ \ \ |
|
522 |
b |
|
523 |
eos |
|
524 |
end |
|
525 | ||
526 |
def test_dedented_heredoc_with_empty_line |
|
527 |
assert_equal(<<-eos, <<~eos) |
|
528 |
This would contain specially formatted text. |
|
529 | ||
530 |
That might span many lines |
|
531 |
eos |
|
532 |
This would contain specially formatted text. |
|
533 | ||
534 |
That might span many lines |
|
535 |
eos |
|
536 |
end |
|
537 | ||
538 |
def test_dedented_heredoc_with_interpolated_expression |
|
539 |
assert_equal(<<-eos, <<~eos) |
|
540 |
#{1}a |
|
541 |
zy |
|
542 |
eos |
|
543 |
#{1}a |
|
544 |
zy |
|
545 |
eos |
|
546 |
end |
|
547 | ||
548 |
def test_dedented_heredoc_with_interpolated_string |
|
549 |
w = "" |
|
550 |
assert_equal(<<-eos, <<~eos) |
|
551 |
#{w} a |
|
552 |
zy |
|
553 |
eos |
|
554 |
#{w} a |
|
555 |
zy |
|
556 |
eos |
|
557 |
end |
|
558 | ||
559 |
def test_dedented_heredoc_mixing_tab_with_space |
|
560 |
assert_equal(<<-eos, <<~eos) |
|
561 |
16 spaces |
|
562 |
2 tabs |
|
563 |
eos |
|
564 |
16 spaces |
|
565 |
2 tabs |
|
566 |
eos |
|
567 |
end |
|
568 | ||
569 |
def test_dedented_heredoc_with_inconsistent_indentation_preserves_tab |
|
570 |
assert_equal(<<-eos, <<~eos) |
|
571 |
2 tabs |
|
572 |
10 spaces |
|
573 |
eos |
|
574 |
2 tabs |
|
575 |
10 spaces |
|
576 |
eos |
|
577 |
end |
|
578 | ||
478 | 579 |
def test_lineno_after_heredoc |
479 | 580 |
bug7559 = '[ruby-dev:46737]' |
480 | 581 |
expected, _, actual = __LINE__, <<eom, __LINE__ |