Project

General

Profile

Feature #10552 ยป add_enum_frequencies.patch

brianhempel (Brian Hempel), 11/27/2014 07:55 AM

View differences:

ChangeLog
1
Thu Nov 27 15:29:06 2014  Brian Hempel  <plasticchicken@gmail.com>
2

  
3
	* enum.c: New methods Enumerable#{frequencies,relative_frequencies}
4

  
5
    Counts the number of times each item appears in an Enumerable.
6

  
7
	* test/ruby/test_enum.rb: Tests for new methods.
8

  
1 9
Thu Nov 27 13:11:00 2014  Koichi Sasada  <ko1@atdot.net>
2 10

  
3 11
	* gc.c (objspace_malloc_increase): enable lazy sweep on GC by malloc()
enum.c
759 759
}
760 760

  
761 761
static VALUE
762
frequencies_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, hash))
763
{
764
    VALUE freq;
765
	long freql;
766

  
767
    ENUM_WANT_SVALUE();
768

  
769
    freq = rb_hash_aref(hash, i);
770
    if (!RB_TYPE_P(freq, T_FIXNUM)) {
771
	freq = INT2FIX(0);
772
    }
773

  
774
	freql = FIX2LONG(freq);
775
	freq = LONG2FIX(freql+1);
776

  
777
	rb_hash_aset(hash, i, freq);
778

  
779
    return Qnil;
780
}
781

  
782
static VALUE
783
frequencies_sort_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, dummy))
784
{
785
    VALUE freq;
786

  
787
    freq = RARRAY_AREF(i, 1);
788

  
789
    return LONG2FIX(-FIX2LONG(freq));
790
}
791

  
792
/*
793
 *  call-seq:
794
 *     enum.frequencies -> a_hash
795
 *
796
 *  Counts the number of times each object appears in <i>enum</i>.
797
 *  Returns a hash where the keys are objects from <i>enum</i> and the
798
 *  values are the number of times the object occurs in <i>enum</i>.
799
 *
800
 *  The hash is sorted, with the most common objects first.
801
 *
802
 *     %w[cat bird bird horse].frequencies
803
 *       #=> {"bird" => 2, "horse" => 1, "cat" => 1}
804
 *
805
 */
806
static VALUE
807
enum_frequencies(VALUE obj)
808
{
809
    VALUE hash;
810
    VALUE sorted;
811

  
812
    hash = rb_hash_new();
813
    rb_block_call(obj, id_each, 0, 0, frequencies_i, hash);
814

  
815
    sorted = rb_block_call(hash, rb_intern("sort_by"), 0, 0, frequencies_sort_by_i, 0);
816
    hash = rb_funcall(sorted, rb_intern("to_h"), 0);
817

  
818
    OBJ_INFECT(hash, obj);
819

  
820
    return hash;
821
}
822

  
823
static VALUE
824
relative_frequencies_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, memop))
825
{
826
    NODE *memo = RNODE(memop);
827
    VALUE hash = memo->u1.value;
828
    VALUE freq;
829
	long freql;
830

  
831
    ENUM_WANT_SVALUE();
832

  
833
    freq = rb_hash_aref(hash, i);
834
    if (!RB_TYPE_P(freq, T_FIXNUM)) {
835
	freq = INT2FIX(0);
836
    }
837

  
838
	freql = FIX2LONG(freq);
839
	freq = LONG2FIX(freql+1);
840

  
841
    memo->u3.cnt += 1;
842

  
843
	rb_hash_aset(hash, i, freq);
844

  
845
    return Qnil;
846
}
847

  
848
static VALUE
849
relative_frequencies_norm_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, count))
850
{
851
    VALUE freq;
852
	long freql;
853

  
854
    freq = RARRAY_AREF(i, 1);
855
    freql = FIX2LONG(freq);
856
    freq = DBL2NUM((double)freql / count);
857

  
858
    RARRAY_ASET(i, 1, freq);
859

  
860
    return Qnil;
861
}
862

  
863
/*
864
 *  call-seq:
865
 *     enum.relative_frequencies -> a_hash
866
 *
867
 *  Calculates the relative frequency of each object appearing in <i>enum</i>.
868
 *  Returns a hash where the keys are objects from <i>enum</i> and the
869
 *  values are numbers between 0.0 and 1.0, indicating what proportion of
870
 *  the entries in <i>enum</i> contain the object.
871
 *
872
 *  For example, the pair <code>"crow" => 0.1</code> means that one out of
873
 *  every ten entries in <i>enum</i> contains <code>"crow"</code>.
874
 *
875
 *  The hash is sorted, with the most common objects first.
876
 *
877
 *     %w[cat bird bird horse].relative_frequencies
878
 *       #=> {"bird" => 0.5, "horse" => 0.25, "cat" => 0.25}
879
 *
880
 */
881
static VALUE
882
enum_relative_frequencies(VALUE obj)
883
{
884
    VALUE hash, sorted;
885
    NODE *memo;
886
    double count;
887

  
888
    hash = rb_hash_new();
889
    memo = NEW_MEMO(hash, 0, 0);
890
    rb_block_call(obj, id_each, 0, 0, relative_frequencies_i, (VALUE)memo);
891
    count = (double)memo->u3.cnt;
892

  
893
    sorted = rb_block_call(hash, rb_intern("sort_by"), 0, 0, frequencies_sort_by_i, 0);
894

  
895
    rb_block_call(sorted, id_each, 0, 0, relative_frequencies_norm_i, count);
896

  
897
    hash = rb_funcall(sorted, rb_intern("to_h"), 0);
898

  
899
    OBJ_INFECT(hash, obj);
900

  
901
    return hash;
902
}
903

  
904

  
905
static VALUE
762 906
first_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, params))
763 907
{
764 908
    NODE *memo = RNODE(params);
......
3403 3547
    rb_define_method(rb_mEnumerable, "reduce", enum_inject, -1);
3404 3548
    rb_define_method(rb_mEnumerable, "partition", enum_partition, 0);
3405 3549
    rb_define_method(rb_mEnumerable, "group_by", enum_group_by, 0);
3550
    rb_define_method(rb_mEnumerable, "frequencies", enum_frequencies, 0);
3551
    rb_define_method(rb_mEnumerable, "relative_frequencies", enum_relative_frequencies, 0);
3406 3552
    rb_define_method(rb_mEnumerable, "first", enum_first, -1);
3407 3553
    rb_define_method(rb_mEnumerable, "all?", enum_all, 0);
3408 3554
    rb_define_method(rb_mEnumerable, "any?", enum_any, 0);
test/ruby/test_enum.rb
169 169
    assert_equal(h, @obj.each_with_index.group_by(&cond))
170 170
  end
171 171

  
172
  def test_frequencies
173
    h = { 1 => 2, 2 => 2, 3 => 1}
174
    assert_equal(h, @obj.frequencies)
175

  
176
    assert_equal([:c, 2], [:a, :b, :c, :c].frequencies.first)
177
  end
178

  
179
  def test_relative_frequencies
180
    h = { 1 => 0.4, 2 => 0.4, 3 => 0.2}
181
    assert_equal(h, @obj.relative_frequencies)
182

  
183
    assert_equal([:c, 0.5], [:a, :b, :c, :c].relative_frequencies.first)
184
  end
185

  
172 186
  def test_first
173 187
    assert_equal(1, @obj.first)
174 188
    assert_equal([1, 2, 3], @obj.first(3))