From b77abac663fb1568543dd2403a41cbc0f30e652d Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 15 May 2019 08:09:41 -0700 Subject: [PATCH] Fix mixed encoding in heredoc when UTF-8 occurs first This checks that the string generated by the heredoc has a valid encoding. If not, this raises a syntax error. --- parse.y | 3 +++ test/ruby/test_syntax.rb | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/parse.y b/parse.y index 72bf8bf74d..de49a8a0ed 100644 --- a/parse.y +++ b/parse.y @@ -7304,6 +7304,9 @@ here_document(struct parser_params *p, rb_strterm_heredoc_t *here) } while (!whole_match_p(p, eos, len, indent)); str = STR_NEW3(tok(p), toklen(p), enc, func); } + if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) { + parser_mixed_error(p, enc, p->enc); + } dispatch_heredoc_end(p); #ifdef RIPPER str = ripper_new_yylval(p, ripper_token2eventid(tSTRING_CONTENT), diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb index 23b7a626fc..7bf1e0e43c 100644 --- a/test/ruby/test_syntax.rb +++ b/test/ruby/test_syntax.rb @@ -788,6 +788,19 @@ def test_heredoc_mixed_encoding \u1234 TEXT HEREDOC + assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + #encoding: cp932 + <<-TEXT + \u1234\xe9\x9d + TEXT + HEREDOC + assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source') + #encoding: cp932 + <<-TEXT + \u1234 + \xe9\x9d + TEXT + HEREDOC end def test_lineno_operation_brace_block -- 2.21.0