From 9d7a3dd98f8092db60971d6cffe3857de1bd105b Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 19 Jun 2019 10:44:54 -0700 Subject: [PATCH] Check that String#scrub block does not modify receiver Similar to the check used for String#gsub. Can fix possible segfault. Fixes [Bug #15941] --- string.c | 19 ++++++++++++------- test/ruby/test_m17n.rb | 13 +++++++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/string.c b/string.c index 3feeb8c705..99363b1ef2 100644 --- a/string.c +++ b/string.c @@ -10251,9 +10251,10 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) { int encidx; VALUE buf = Qnil; - const char *rep; + const char *rep, *p, *e, *p1, *sp; long replen = -1; int tainted = 0; + long slen; if (rb_block_given_p()) { if (!NIL_P(repl)) @@ -10279,10 +10280,13 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) rep = replace; replen = (int)sizeof(replace); \ } while (0) + slen = RSTRING_LEN(str); + p = RSTRING_PTR(str); + e = RSTRING_END(str); + p1 = p; + sp = p; + if (rb_enc_asciicompat(enc)) { - const char *p = RSTRING_PTR(str); - const char *e = RSTRING_END(str); - const char *p1 = p; int rep7bit_p; if (!replen) { rep = NULL; @@ -10347,6 +10351,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) } else { repl = rb_yield(rb_enc_str_new(p, clen, enc)); + str_mod_check(str, sp, slen); repl = str_compat_and_valid(repl, enc); tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); @@ -10382,6 +10387,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); + str_mod_check(str, sp, slen); repl = str_compat_and_valid(repl, enc); tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); @@ -10392,9 +10398,6 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) } else { /* ASCII incompatible */ - const char *p = RSTRING_PTR(str); - const char *e = RSTRING_END(str); - const char *p1 = p; long mbminlen = rb_enc_mbminlen(enc); if (!replen) { rep = NULL; @@ -10451,6 +10454,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) } else { repl = rb_yield(rb_enc_str_new(p, clen, enc)); + str_mod_check(str, sp, slen); repl = str_compat_and_valid(repl, enc); tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); @@ -10478,6 +10482,7 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr) } else { repl = rb_yield(rb_enc_str_new(p, e-p, enc)); + str_mod_check(str, sp, slen); repl = str_compat_and_valid(repl, enc); tainted |= OBJ_TAINTED_RAW(repl); rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl)); diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 75daf61376..19a00ec36c 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -1602,6 +1602,19 @@ def test_scrub_valid_string assert_predicate(str.dup.taint.scrub, :tainted?) end + def test_scrub_modification_inside_block + str = ("abc\u3042".b << "\xE3\x80".b).force_encoding('UTF-8') + assert_raise(RuntimeError) {str.scrub{|_| str << "1234567890"; "?" }} + + str = "\x00\xD8\x42\x30".force_encoding(Encoding::UTF_16LE) + assert_raise(RuntimeError) do + str.scrub do |_| + str << "1\x002\x00".force_encoding('UTF-16LE') + "?\x00".force_encoding('UTF-16LE') + end + end + end + def test_scrub_replace_default assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub) assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub) -- 2.21.0