From 37be3e260c883abbbacedff4c53da8e40b3f12a0 Mon Sep 17 00:00:00 2001
From: Eric Wong <e@80x24.org>
Date: Tue, 27 Dec 2016 00:54:41 +0000
Subject: [PATCH] introduce String#fstring method

This exposes the rb_fstring internal function to return a
deduped and frozen string.  This is useful for writing all sorts
of record processing key values maybe stored, but certain keys
and values are often duplicated at a high frequency,
so memory savings can noticeable.

Use cases are many:

* email/NNTP header processing

  There are some standard header keys everybody uses
  (From/To/Cc/Date/Subject/Received/Message-ID/References/In-Reply-To),
  as well as common ones specific to a certain lists:
  (ruby-core has X-Redmine-* headers)
  It is also useful to dedupe values, as most inboxes have
  multiple messages from the same sender, or MUA.

* package management systems -
  things like RubyGems stores identical strings for licenses,
  dependency names, author names/emails, etc

* HTTP headers/trailers -
  standard headers (Host/Accept/Accept-Encoding/User-Agent/...)
  are common, but there are also uncommon ones.
  Values may be deduped, as well, as it is likely a user
  agent will make multiple/parallel requests to the same
  server.

* version control systems -
  this can be useful for deduplicating names of frequent
  committers (like "nobu" :)

  In linux.git and git.git, there are also common
  trailers such as Signed-Off-By/Acked-by/Reviewed-by/Fixes/...
  as well as less common ones.

* audio metadata -

  There are commonly used tags (Artist/Album/Title/Tracknumber),
  but Vorbis comments allows arbitrary key values to be stored.
  Music collections contain songs by the same artist or mutiple
  songs from the same album, so deduplicating values will be
  helpful there, too.

* JSON, YAML, XML, HTML processing

  certain fields, tags and attributes are commonly used
  across the same and multiple documents
---
 string.c                 | 11 +++++++++++
 test/ruby/test_string.rb |  8 ++++++++
 2 files changed, 19 insertions(+)

diff --git a/string.c b/string.c
index 494dc1d90e..ce44aa5454 100644
--- a/string.c
+++ b/string.c
@@ -296,6 +296,16 @@ fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existi
     }
 }
 
+/*
+ *  call-seq:
+ *     str.fstring -> str2
+ *     str2.fstring -> str2
+ *
+ *  Returns a deduplicated and frozen string identical in content
+ *  to <i>str</i>.  If <i>str</i> was already deduplicated and
+ *  frozen (e.g. frozen string literal), then <i>str</i> itself
+ *  is returned.
+ */
 RUBY_FUNC_EXPORTED
 VALUE
 rb_fstring(VALUE str)
@@ -10098,6 +10108,7 @@ Init_String(void)
     rb_define_method(rb_cString, "b", rb_str_b, 0);
     rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
     rb_define_method(rb_cString, "ascii_only?", rb_str_is_ascii_only_p, 0);
+    rb_define_method(rb_cString, "fstring", rb_fstring, 0);
 
     rb_fs = Qnil;
     rb_define_hooked_variable("$;", &rb_fs, 0, rb_fs_setter);
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index f2fcf3fb25..a2cd131c90 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2549,6 +2549,14 @@ def test_chr
     assert_equal("\u3042", "\u3042\u3043".chr)
     assert_equal('', ''.chr)
   end
+
+  def test_fstring
+    ts = 'fstring test case'.freeze
+    tmp = ts.dup
+    assert_same ts, tmp.fstring
+    assert_not_predicate tmp, :frozen?
+    assert_predicate tmp.fstring, :frozen?
+  end
 end
 
 class TestString2 < TestString
-- 
EW

