0001-string.c-rb_str_hamming_distance-Init_String.patch

mrkn (Kenta Murata), 01/07/2010 05:06 pm

Download (3.9 kB)

b/ChangeLog
1
Thu Jan  7 16:32:00 2010  Kenta Murata <mrkn@mrkn.jp>
2

  
3
	* string.c (rb_str_hamming_distance, Init_String),
4
	test/ruby/test_string.c (test_hamming_distance): Added support for
5
	calculating hamming distance between two strings.
6

  
1 7
Thu Jan  7 07:56:09 2010  Ryan Davis  <ryand-ruby@zenspider.com>
2 8

  
3 9
	* lib/minitest/*.rb: Imported minitest 1.5.0 r5596.
b/string.c
6903 6903
    *var = val;
6904 6904
}
6905 6905

  
6906
/*
6907
 */
6908
VALUE
6909
rb_str_hamming_distance(VALUE str1, VALUE str2)
6910
{
6911
    long d = 0;
6912
    rb_encoding *enc;
6913
    char *p1, *p1end, *p2, *p2end;
6914

  
6915
    if (TYPE(str2) != T_STRING) return Qnil;
6916

  
6917
    enc = rb_enc_compatible(str1, str2);
6918
    if (!enc) return Qnil;
6919
    if (str_strlen(str1, enc) != str_strlen(str2, enc)) return Qnil;
6920

  
6921
    p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
6922
    p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
6923
    if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
6924
        while (p1 < p1end) {
6925
            if (*p1 != *p2) ++d;
6926
            ++p1;
6927
            ++p2;
6928
        }
6929
    }
6930
    else {
6931
        while (p1 < p1end && p2 < p2end) {
6932
            int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
6933
            int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
6934
            if (0 <= c1 && 0 <= c2) {
6935
                if (c1 != c2) ++d;
6936
            }
6937
            else {
6938
                int r, len;
6939
                l1 = rb_enc_mbclen(p1, p1end, enc);
6940
                l2 = rb_enc_mbclen(p2, p2end, enc);
6941
                len = l1 < l2 ? l1 : l2; 
6942
                r = memcmp(p1, p2, len);
6943
                if (r != 0 || l1 != l2) ++d;
6944
            }
6945
            p1 += l1;
6946
            p2 += l2;
6947
        }
6948
    }
6949
    return LONG2NUM(d);
6950
}
6951

  
6906 6952

  
6907 6953
/*
6908 6954
 *  call-seq:
......
7468 7514
    rb_define_method(rb_cString, "partition", rb_str_partition, 1);
7469 7515
    rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
7470 7516

  
7517
    rb_define_method(rb_cString, "hamming_distance", rb_str_hamming_distance, 1);
7518
    rb_define_method(rb_cString, "^", rb_str_hamming_distance, 1);
7519

  
7471 7520
    rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
7472 7521
    rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
7473 7522
    rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
b/test/ruby/test_string.rb
1850 1850
    assert_equal(s, k, '[ruby-dev:39068]')
1851 1851
    assert_equal(Encoding::UTF_8, k.encoding, '[ruby-dev:39068]')
1852 1852
  end
1853

  
1854
  def test_hamming_distance
1855
    assert_nil("a" ^ "abc")
1856
    assert_equal(0, "" ^ "")
1857
    assert_equal(2, "abc" ^ "dbf")
1858
    assert_equal(1, "aXb" ^ "a\u{ff38}b")
1859
    assert_equal(1, "\u3042\u3044\u3046" ^ "\u3042\u3044\u3048")
1860
    assert_equal(3, "\u3042a\u3044\u3046" ^ "\u3042\u3044b\u3048")
1861
    assert_equal(1, "aXb".force_encoding("EUC-JP") ^ "aYb".force_encoding("Shift_JIS"))
1862
    assert_nil("\u3042\u3044\u3046".force_encoding("EUC-JP") ^ "\u3042\u3044\u3046".force_encoding("Shift_JIS"))
1863
  end
1853 1864
end
1854
-