From a34a57a498acf377a01dd3db2f43dec9168e97b8 Mon Sep 17 00:00:00 2001 From: Prem Sichanugrist Date: Tue, 24 Nov 2015 14:14:31 -0500 Subject: [PATCH] Introduce String#squish and String#squish! This is a port of methods that are part of Active Support[1]. Both of these methods are useful when you are dealing with heredoc and would like to ignore all the whitespaces before, after, and in-between the string. Example: sql = <<-SQL.squish SELECT * FROM users WHERE users.username = 'sikachu' SQL #=> "SELECT * FROM users WHERE users.username='sikachu'" [1]: http://api.rubyonrails.org/v4.2.5/classes/String.html#method-i-squish --- string.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ test/ruby/test_string.rb | 18 +++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/string.c b/string.c index d834754..90b32a0 100644 --- a/string.c +++ b/string.c @@ -8803,6 +8803,62 @@ str_scrub_bang(int argc, VALUE *argv, VALUE str) return str; } +/* + * call-seq: + * str.squish! -> str + * + * Performs a destructive squish. See String#squish. + * + * str = " foo bar \n \t boo" + * str.squish! # => "foo bar boo" + * str # => "foo bar boo" + */ +static VALUE +rb_str_squish_bang(VALUE str) +{ + static const char before_regex_source[] = "\\A[[:space:]]+"; + static const char after_regex_source[] = "[[:space:]]+\\z"; + static const char between_regex_source[] = "[[:space:]]+"; + VALUE before_argv[] = { + rb_reg_new(before_regex_source, sizeof before_regex_source - 1, 0), + rb_str_new_cstr("") + }; + VALUE after_argv[] = { + rb_reg_new(after_regex_source, sizeof after_regex_source - 1, 0), + rb_str_new_cstr("") + }; + VALUE between_argv[] = { + rb_reg_new(between_regex_source, sizeof between_regex_source - 1, 0), + rb_str_new_cstr(" ") + }; + + rb_str_gsub_bang(2, before_argv, str); + rb_str_gsub_bang(2, after_argv, str); + rb_str_gsub_bang(2, between_argv, str); + return str; +} + +/* + * call-seq: + * str.squish -> str + * + * Returns the string, first removing all whitespace on both ends of the + * string, and then changing remaining consecutive whitespace groups into one + * space each. + * + * Note that it handles both ASCII and Unicode whitespace. + * + * %{ Multi-line + * string }.squish # => "Multi-line string" + * " foo bar \n \t boo".squish # => "foo bar boo" + */ +static VALUE +rb_str_squish(VALUE str) +{ + str = rb_str_dup(str); + return rb_str_squish_bang(str); +} + /********************************************************************** * Document-class: Symbol * @@ -9328,6 +9384,7 @@ Init_String(void) rb_define_method(rb_cString, "strip", rb_str_strip, 0); rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0); rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0); + rb_define_method(rb_cString, "squish", rb_str_squish, 0); rb_define_method(rb_cString, "sub!", rb_str_sub_bang, -1); rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1); @@ -9336,6 +9393,7 @@ Init_String(void) rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0); rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, 0); rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, 0); + rb_define_method(rb_cString, "squish!", rb_str_squish_bang, 0); rb_define_method(rb_cString, "tr", rb_str_tr, 2); rb_define_method(rb_cString, "tr_s", rb_str_tr_s, 2); diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index ce21db9..e62ca67 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -2304,6 +2304,24 @@ def test_LSHIFT_neary_long_max end; end if [0].pack("l!").bytesize < [nil].pack("p").bytesize # enable only when string size range is smaller than memory space + + def test_string_squish + original = %{\u205f\u3000 A string surrounded by various unicode spaces, + with tabs(\t\t), newlines(\n\n), unicode nextlines(\u0085\u0085) and many spaces( ). \u00a0\u2007} + + expected = "A string surrounded by various unicode spaces, " + + "with tabs( ), newlines( ), unicode nextlines( ) and many spaces( )." + + # Make sure squish returns what we expect: + assert_equal expected, original.squish + # But doesn't modify the original string: + assert_not_equal expected, original + + # Make sure squish! returns what we expect: + assert_equal expected, original.squish! + # And changes the original string: + assert_equal expected, original + end end class TestString2 < TestString -- 2.6.3