Project

General

Profile

Feature #9098 ยป dedent_heredoc.patch

bjmllr (Ben Miller), 11/28/2015 01:26 AM

View differences:

doc/syntax/literals.rdoc
196 196
always treated as if it is flush left.  If you indent the content those spaces
197 197
will appear in the output.
198 198

  
199
To have indented content as well as an indented closing identifier, you can use
200
a "squiggly" heredoc, which uses a "~" instead of a "-" after <tt><<</tt>:
201

  
202
    expected_result = <<~SQUIGGLY_HEREDOC
203
      This would contain specially formatted text.
204

  
205
      That might span many lines
206
    SQUIGGLY_HEREDOC
207

  
208
The indentation of the least-indented line will be removed from each line of
209
the content.  Note that empty lines and lines consisting solely of literal tabs
210
and spaces will be ignored for the purposes of determining indentation, but
211
escaped tabs and spaces are considered non-indentation characters.
212

  
213
If both tabs and spaces are used as indentation in the same heredoc,
214
tabs are considered as equal to 8 spaces.  If the indentation of the
215
least-indented line falls in the middle of a leading tab, only
216
indentation to the left of that tab will be removed.
217

  
199 218
A heredoc allows interpolation and escaped characters.  You may disable
200 219
interpolation and escaping by surrounding the opening identifier with single
201 220
quotes:
parse.y
255 255
    int toksiz;
256 256
    int tokline;
257 257
    int heredoc_end;
258
    int heredoc_indent;
259
    int heredoc_line_indent;
258 260
    char *tokenbuf;
259 261
    NODE *deferred_nodes;
260 262
    struct local_vars *lvtbl;
......
345 347
#define lex_p			(parser->lex.pcur)
346 348
#define lex_pend		(parser->lex.pend)
347 349
#define heredoc_end		(parser->heredoc_end)
350
#define heredoc_indent		(parser->heredoc_indent)
351
#define heredoc_line_indent	(parser->heredoc_line_indent)
348 352
#define command_start		(parser->command_start)
349 353
#define deferred_nodes		(parser->deferred_nodes)
350 354
#define lex_gets_ptr		(parser->lex.gets_ptr)
......
485 489
static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match);
486 490
#define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match))
487 491

  
492
static NODE *parser_heredoc_dedent(struct parser_params*,NODE*);
493
# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
494

  
495

  
488 496
#define get_id(id) (id)
489 497
#define get_value(val) (val)
490 498
#else
......
668 676

  
669 677
#define new_defined(expr) dispatch1(defined, (expr))
670 678

  
679
static VALUE parser_heredoc_dedent_ripper(struct parser_params*,VALUE);
680
# define heredoc_dedent_ripper(str) parser_heredoc_dedent_ripper(parser, (str))
681

  
671 682
#define FIXME 0
672 683

  
673 684
#endif /* RIPPER */
......
3885 3896
			else {
3886 3897
			    node = evstr2dstr(node);
3887 3898
			}
3899
			heredoc_indent = 0;
3888 3900
			$$ = node;
3889 3901
		    /*%
3890 3902
			$$ = $1;
......
3907 3919
string1		: tSTRING_BEG string_contents tSTRING_END
3908 3920
		    {
3909 3921
		    /*%%%*/
3910
			$$ = $2;
3922
			$$ = heredoc_dedent($2);
3911 3923
		    /*%
3912
			$$ = dispatch1(string_literal, $2);
3924
			$$ = dispatch1(string_literal, 
3925
                                       heredoc_dedent_ripper($2));
3913 3926
		    %*/
3914 3927
		    }
3915 3928
		;
......
3934 3947
				break;
3935 3948
			    }
3936 3949
			}
3937
			$$ = node;
3950
			$$ = heredoc_dedent(node);
3938 3951
		    /*%
3939
			$$ = dispatch1(xstring_literal, $2);
3952
			$$ = dispatch1(xstring_literal, 
3953
				       heredoc_dedent_ripper($2));
3940 3954
		    %*/
3941 3955
		    }
3942 3956
		;
......
4317 4331
			$<num>$ = brace_nest;
4318 4332
			brace_nest = 0;
4319 4333
		    }
4334
		    {
4335
			$<num>$ = heredoc_indent;
4336
			heredoc_indent = 0;
4337
		    }
4320 4338
		  compstmt tSTRING_DEND
4321 4339
		    {
4322 4340
			cond_stack = $<val>1;
......
4324 4342
			lex_strterm = $<node>3;
4325 4343
			lex_state = $<num>4;
4326 4344
			brace_nest = $<num>5;
4345
			heredoc_indent = $<num>6;
4327 4346
		    /*%%%*/
4328
			if ($6) $6->flags &= ~NODE_FL_NEWLINE;
4329
			$$ = new_evstr($6);
4347
			if ($7) $7->flags &= ~NODE_FL_NEWLINE;
4348
			$$ = new_evstr($7);
4330 4349
		    /*%
4331
			$$ = dispatch1(string_embexpr, $6);
4350
			$$ = dispatch1(string_embexpr, $7);
4332 4351
		    %*/
4333 4352
		    }
4334 4353
		;
......
5693 5712
#define STR_FUNC_SYMBOL 0x10
5694 5713
#define STR_FUNC_INDENT 0x20
5695 5714
#define STR_FUNC_LABEL  0x40
5715
#define STR_FUNC_DEDENT 0x80
5696 5716

  
5697 5717
enum string_type {
5698 5718
    str_label  = STR_FUNC_LABEL,
......
6211 6231
    } while (0)
6212 6232

  
6213 6233
    while ((c = nextc()) != -1) {
6234
	if (heredoc_indent > 0) {
6235
	    if (heredoc_line_indent == -1) {
6236
		if (c == '\n') heredoc_line_indent = 0;
6237
	    } else {
6238
		if (c == ' ') {
6239
		    heredoc_line_indent++;
6240
		} else if (c == '\t') {
6241
		    heredoc_line_indent += 8;
6242
		} else if (c != '\n') {
6243
		    if (heredoc_indent > heredoc_line_indent) {
6244
			heredoc_indent = heredoc_line_indent;
6245
		    }
6246
		    heredoc_line_indent = -1;
6247
		}
6248
	    }
6249
	}
6250

  
6214 6251
	if (paren && c == paren) {
6215 6252
	    ++*nest;
6216 6253
	}
......
6471 6508
    if (c == '-') {
6472 6509
	c = nextc();
6473 6510
	func = STR_FUNC_INDENT;
6511
    } else if (c == '~') {
6512
	c = nextc();
6513
	func = STR_FUNC_INDENT;
6514
	heredoc_indent = INT_MAX;
6515
	heredoc_line_indent = 0;
6474 6516
    }
6475 6517
    switch (c) {
6476 6518
      case '\'':
......
6495 6537
      default:
6496 6538
	if (!parser_is_identchar()) {
6497 6539
	    pushback(c);
6498
	    if (func & STR_FUNC_INDENT) {
6540
	    if (heredoc_indent > 0) {
6541
		pushback('~');
6542
	    } else if (func & STR_FUNC_INDENT) {
6499 6543
		pushback('-');
6500 6544
	    }
6501 6545
	    return 0;
......
6541 6585
    ripper_flush(parser);
6542 6586
}
6543 6587

  
6588
static VALUE
6589
parser_heredoc_dedent_string(struct parser_params *parser, VALUE input,
6590
			     long *count_indent, long *copy_indent)
6591
{
6592
    long len, out_len;
6593
    char *str, *p, *out_p, *end;
6594
    VALUE output;
6595
    
6596
    len = RSTRING_LEN(input);
6597
    out_len = 0;
6598
    str = RSTRING_PTR(input);
6599
    end = &str[len];
6600

  
6601
    p = str;
6602
    while (p < end) {
6603
	while (p < end && *count_indent > 0) {
6604
	    if (*p == ' ') {
6605
		p++;
6606
		(*count_indent)--;
6607
	    } else if (*p == '\t' && *count_indent >= 8) {
6608
		p++;
6609
		*count_indent -= 8;
6610
	    } else if (*p == '\t' && heredoc_indent % 8) {
6611
		/* Inconsistent indentation requires us to back up to the
6612
		   previous tab stop */
6613
		heredoc_indent = heredoc_indent - (heredoc_indent % 8);
6614
		*count_indent = *copy_indent = heredoc_indent;
6615
		return parser_heredoc_dedent_string(parser, input,
6616
						    count_indent, copy_indent);
6617
	    } else {
6618
		break;
6619
	    }
6620
	}
6621

  
6622
	for (; p < end && *p != '\n'; p++) out_len++;
6623
	if (p < end && *p == '\n') {
6624
	    *count_indent = heredoc_indent;
6625
	    out_len++;
6626
	    p++;
6627
	}
6628
    }
6629

  
6630
    output = rb_str_new(0, out_len);
6631
    out_p = RSTRING_PTR(output);
6632

  
6633
    p = str;
6634
    while (p < end) {
6635
	while (p < end && *copy_indent > 0) {
6636
	    if (*p == ' ') {
6637
		p++;
6638
		(*copy_indent)--;
6639
	    } else if (*p == '\t' && *copy_indent >= 8) {
6640
		p++;
6641
		*copy_indent -= 8;
6642
	    } else {
6643
		break;
6644
	    }
6645
	}
6646

  
6647
	while (p < end && *p != '\n') *out_p++ = *p++;
6648
	if (p < end && *p == '\n') {
6649
	    *copy_indent = heredoc_indent;
6650
	    *out_p++ = *p++;
6651
	}
6652
    }
6653

  
6654
    return output;
6655
}
6656

  
6657
#ifndef RIPPER
6658
static NODE *
6659
parser_heredoc_dedent(struct parser_params *parser, NODE *root)
6660
{
6661
    long count_indent, copy_indent;
6662
    VALUE output;
6663
    NODE *node, *str_node;
6664

  
6665
    if (heredoc_indent <= 0) return root;
6666

  
6667
    node = str_node = root;
6668
    count_indent = copy_indent = heredoc_indent;
6669

  
6670
    while (str_node) {
6671
	output = parser_heredoc_dedent_string(parser, str_node->nd_lit,
6672
	    &count_indent, &copy_indent);
6673

  
6674
	dispose_string(str_node->nd_lit);
6675
	str_node->nd_lit = output;
6676

  
6677
	str_node = 0;
6678
	while (node = node->nd_next) {
6679
	    if (nd_type(node) != NODE_ARRAY) break;
6680
	    if (nd_type(node->nd_head) == NODE_STR ||
6681
		nd_type(node->nd_head) == NODE_DSTR) {
6682
		str_node = node->nd_head;
6683
		break;
6684
	    }
6685
	}
6686
    }
6687

  
6688
    return root;
6689
}
6690
#else /* RIPPER */
6691
static VALUE
6692
parser_heredoc_dedent_ripper(struct parser_params *parser, VALUE array)
6693
{
6694
    long count_indent, copy_indent, array_len, i;
6695
    VALUE e, sym, ret;
6696

  
6697
    if (heredoc_indent <= 0) return array;
6698

  
6699
    count_indent = copy_indent = heredoc_indent;
6700

  
6701
    array_len = RARRAY_LEN(array);
6702
    for (i = 0; i < array_len; i++) {
6703
	e = rb_ary_entry(array, i);
6704
	if (TYPE(e) == T_ARRAY && TYPE(sym = rb_ary_entry(e, 0)) == T_SYMBOL) {
6705
	    if (rb_to_id(sym) != rb_intern("string_content") &&
6706
	    	rb_to_id(sym) != rb_intern("@tstring_content")) continue;
6707
	    ret = parser_heredoc_dedent_string(parser, rb_ary_entry(e, 1),
6708
					       &count_indent, &copy_indent);
6709
	    rb_ary_store(e, 1, ret);
6710
	} else if (TYPE(e) == T_STRING) {
6711
	    ret = parser_heredoc_dedent_string(parser, e,
6712
					       &count_indent, &copy_indent);
6713
	    rb_ary_store(array, i, ret);
6714
	}
6715
    }
6716

  
6717
    return array;
6718
}
6719
#endif
6720

  
6544 6721
static int
6545 6722
parser_whole_match_p(struct parser_params *parser,
6546 6723
    const char *eos, long len, int indent)
test/ripper/test_ripper.rb
60 60
    assert_predicate @ripper, :yydebug
61 61
  end
62 62

  
63
  def test_squiggly_heredoc
64
    assert_equal(Ripper.sexp(<<-eos), Ripper.sexp(<<-eos))
65
    <<-eot
66
asdf
67
    eot
68
    eos
69
    <<~eot
70
      asdf
71
    eot
72
    eos
73
  end
74

  
75
  def test_squiggly_heredoc_with_interpolated_expression
76
    sexp1 = Ripper.sexp(<<-eos)
77
<<-eot
78
a\#{1}z
79
eot
80
    eos
81

  
82
    sexp2 = Ripper.sexp(<<-eos)
83
<<~eot
84
  a\#{1}z
85
eot
86
    eos
87

  
88
    pos = lambda do |s|
89
      s.fetch(1).fetch(0).fetch(1).fetch(2).fetch(1).fetch(0).fetch(2)
90
    end
91
    assert_not_equal pos[sexp1], pos[sexp2]
92
    pos[sexp1].clear
93
    pos[sexp2].clear
94
    assert_equal sexp1, sexp2
95
  end
63 96
end if ripper_test
test/ruby/test_syntax.rb
475 475
    assert_equal(expected, actual, "#{Bug7559}: ")
476 476
  end
477 477

  
478
  def test_dedented_heredoc_without_indentation
479
    assert_equal(" y\nz\n", <<~eos)
480
 y
481
z
482
    eos
483
  end
484

  
485
  def test_dedented_heredoc_with_indentation
486
    assert_equal(" a\nb\n", <<~eos)
487
     a
488
    b
489
    eos
490
  end
491

  
492
  def test_dedented_heredoc_with_blank_less_indented_line
493
    # the blank line has two leading spaces
494
    assert_equal("a\n\nb\n", <<~eos)
495
    a
496
  
497
    b
498
    eos
499
  end
500

  
501
  def test_dedented_heredoc_with_blank_less_indented_line_escaped
502
    assert_equal("    a\n  \n    b\n", <<~eos)
503
    a
504
\ \ 
505
    b
506
    eos
507
  end
508

  
509
  def test_dedented_heredoc_with_blank_more_indented_line
510
    # the blank line has six leading spaces
511
    assert_equal("a\n  \nb\n", <<~eos)
512
    a
513
      
514
    b
515
    eos
516
  end
517

  
518
  def test_dedented_heredoc_with_blank_more_indented_line_escaped
519
    assert_equal("    a\n      \n    b\n", <<~eos)
520
    a
521
\ \ \ \ \ \ 
522
    b
523
    eos
524
  end
525

  
526
  def test_dedented_heredoc_with_empty_line
527
    assert_equal(<<-eos, <<~eos)
528
This would contain specially formatted text.
529

  
530
That might span many lines
531
    eos
532
      This would contain specially formatted text.
533

  
534
      That might span many lines
535
    eos
536
  end
537

  
538
  def test_dedented_heredoc_with_interpolated_expression
539
      assert_equal(<<-eos, <<~eos)
540
 #{1}a
541
zy
542
      eos
543
  #{1}a
544
 zy
545
      eos
546
  end
547

  
548
  def test_dedented_heredoc_with_interpolated_string
549
    w = ""
550
    assert_equal(<<-eos, <<~eos)
551
#{w} a
552
 zy
553
    eos
554
 #{w} a
555
  zy
556
    eos
557
  end
558

  
559
  def test_dedented_heredoc_mixing_tab_with_space
560
    assert_equal(<<-eos, <<~eos)
561
16 spaces
562
2 tabs
563
    eos
564
                16 spaces
565
		2 tabs
566
    eos
567
  end
568

  
569
  def test_dedented_heredoc_with_inconsistent_indentation_preserves_tab
570
    assert_equal(<<-eos, <<~eos)
571
	2 tabs
572
  10 spaces
573
    eos
574
		2 tabs
575
          10 spaces
576
    eos
577
  end
578

  
478 579
  def test_lineno_after_heredoc
479 580
    bug7559 = '[ruby-dev:46737]'
480 581
    expected, _, actual = __LINE__, <<eom, __LINE__
481
-