From ff473adad4aebc3e306efbe5767c7da7904e0b28 Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Thu, 7 Jan 2010 17:01:30 +0900 Subject: [PATCH] * string.c (rb_str_hamming_distance, Init_String), test/ruby/test_string.c (test_hamming_distance): Added support for calculating hamming distance between two strings. --- ChangeLog | 6 +++++ string.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++ test/ruby/test_string.rb | 11 ++++++++++ 3 files changed, 66 insertions(+), 0 deletions(-) diff --git a/ChangeLog b/ChangeLog index a2e1809..8138bab 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Thu Jan 7 16:32:00 2010 Kenta Murata + + * string.c (rb_str_hamming_distance, Init_String), + test/ruby/test_string.c (test_hamming_distance): Added support for + calculating hamming distance between two strings. + Thu Jan 7 07:56:09 2010 Ryan Davis * lib/minitest/*.rb: Imported minitest 1.5.0 r5596. diff --git a/string.c b/string.c index 2dcc330..434a335 100644 --- a/string.c +++ b/string.c @@ -6903,6 +6903,52 @@ rb_str_setter(VALUE val, ID id, VALUE *var) *var = val; } +/* + */ +VALUE +rb_str_hamming_distance(VALUE str1, VALUE str2) +{ + long d = 0; + rb_encoding *enc; + char *p1, *p1end, *p2, *p2end; + + if (TYPE(str2) != T_STRING) return Qnil; + + enc = rb_enc_compatible(str1, str2); + if (!enc) return Qnil; + if (str_strlen(str1, enc) != str_strlen(str2, enc)) return Qnil; + + p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1); + p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2); + if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) { + while (p1 < p1end) { + if (*p1 != *p2) ++d; + ++p1; + ++p2; + } + } + else { + while (p1 < p1end && p2 < p2end) { + int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc); + int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc); + if (0 <= c1 && 0 <= c2) { + if (c1 != c2) ++d; + } + else { + int r, len; + l1 = rb_enc_mbclen(p1, p1end, enc); + l2 = rb_enc_mbclen(p2, p2end, enc); + len = l1 < l2 ? l1 : l2; + r = memcmp(p1, p2, len); + if (r != 0 || l1 != l2) ++d; + } + p1 += l1; + p2 += l2; + } + } + return LONG2NUM(d); +} + /* * call-seq: @@ -7468,6 +7514,9 @@ Init_String(void) rb_define_method(rb_cString, "partition", rb_str_partition, 1); rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1); + rb_define_method(rb_cString, "hamming_distance", rb_str_hamming_distance, 1); + rb_define_method(rb_cString, "^", rb_str_hamming_distance, 1); + rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */ rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1); rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0); diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 407c6d2..9a68a5c 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -1850,4 +1850,15 @@ class TestString < Test::Unit::TestCase assert_equal(s, k, '[ruby-dev:39068]') assert_equal(Encoding::UTF_8, k.encoding, '[ruby-dev:39068]') end + + def test_hamming_distance + assert_nil("a" ^ "abc") + assert_equal(0, "" ^ "") + assert_equal(2, "abc" ^ "dbf") + assert_equal(1, "aXb" ^ "a\u{ff38}b") + assert_equal(1, "\u3042\u3044\u3046" ^ "\u3042\u3044\u3048") + assert_equal(3, "\u3042a\u3044\u3046" ^ "\u3042\u3044b\u3048") + assert_equal(1, "aXb".force_encoding("EUC-JP") ^ "aYb".force_encoding("Shift_JIS")) + assert_nil("\u3042\u3044\u3046".force_encoding("EUC-JP") ^ "\u3042\u3044\u3046".force_encoding("Shift_JIS")) + end end -- 1.6.4.4