Project

General

Profile

Feature #10552 ยป add_enum_frequencies.patch

brianhempel (Brian Hempel), 11/27/2014 07:55 AM

View differences:

ChangeLog
Thu Nov 27 15:29:06 2014 Brian Hempel <plasticchicken@gmail.com>
* enum.c: New methods Enumerable#{frequencies,relative_frequencies}
Counts the number of times each item appears in an Enumerable.
* test/ruby/test_enum.rb: Tests for new methods.
Thu Nov 27 13:11:00 2014 Koichi Sasada <ko1@atdot.net>
* gc.c (objspace_malloc_increase): enable lazy sweep on GC by malloc()
enum.c
}
static VALUE
frequencies_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, hash))
{
VALUE freq;
long freql;
ENUM_WANT_SVALUE();
freq = rb_hash_aref(hash, i);
if (!RB_TYPE_P(freq, T_FIXNUM)) {
freq = INT2FIX(0);
}
freql = FIX2LONG(freq);
freq = LONG2FIX(freql+1);
rb_hash_aset(hash, i, freq);
return Qnil;
}
static VALUE
frequencies_sort_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, dummy))
{
VALUE freq;
freq = RARRAY_AREF(i, 1);
return LONG2FIX(-FIX2LONG(freq));
}
/*
* call-seq:
* enum.frequencies -> a_hash
*
* Counts the number of times each object appears in <i>enum</i>.
* Returns a hash where the keys are objects from <i>enum</i> and the
* values are the number of times the object occurs in <i>enum</i>.
*
* The hash is sorted, with the most common objects first.
*
* %w[cat bird bird horse].frequencies
* #=> {"bird" => 2, "horse" => 1, "cat" => 1}
*
*/
static VALUE
enum_frequencies(VALUE obj)
{
VALUE hash;
VALUE sorted;
hash = rb_hash_new();
rb_block_call(obj, id_each, 0, 0, frequencies_i, hash);
sorted = rb_block_call(hash, rb_intern("sort_by"), 0, 0, frequencies_sort_by_i, 0);
hash = rb_funcall(sorted, rb_intern("to_h"), 0);
OBJ_INFECT(hash, obj);
return hash;
}
static VALUE
relative_frequencies_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, memop))
{
NODE *memo = RNODE(memop);
VALUE hash = memo->u1.value;
VALUE freq;
long freql;
ENUM_WANT_SVALUE();
freq = rb_hash_aref(hash, i);
if (!RB_TYPE_P(freq, T_FIXNUM)) {
freq = INT2FIX(0);
}
freql = FIX2LONG(freq);
freq = LONG2FIX(freql+1);
memo->u3.cnt += 1;
rb_hash_aset(hash, i, freq);
return Qnil;
}
static VALUE
relative_frequencies_norm_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, count))
{
VALUE freq;
long freql;
freq = RARRAY_AREF(i, 1);
freql = FIX2LONG(freq);
freq = DBL2NUM((double)freql / count);
RARRAY_ASET(i, 1, freq);
return Qnil;
}
/*
* call-seq:
* enum.relative_frequencies -> a_hash
*
* Calculates the relative frequency of each object appearing in <i>enum</i>.
* Returns a hash where the keys are objects from <i>enum</i> and the
* values are numbers between 0.0 and 1.0, indicating what proportion of
* the entries in <i>enum</i> contain the object.
*
* For example, the pair <code>"crow" => 0.1</code> means that one out of
* every ten entries in <i>enum</i> contains <code>"crow"</code>.
*
* The hash is sorted, with the most common objects first.
*
* %w[cat bird bird horse].relative_frequencies
* #=> {"bird" => 0.5, "horse" => 0.25, "cat" => 0.25}
*
*/
static VALUE
enum_relative_frequencies(VALUE obj)
{
VALUE hash, sorted;
NODE *memo;
double count;
hash = rb_hash_new();
memo = NEW_MEMO(hash, 0, 0);
rb_block_call(obj, id_each, 0, 0, relative_frequencies_i, (VALUE)memo);
count = (double)memo->u3.cnt;
sorted = rb_block_call(hash, rb_intern("sort_by"), 0, 0, frequencies_sort_by_i, 0);
rb_block_call(sorted, id_each, 0, 0, relative_frequencies_norm_i, count);
hash = rb_funcall(sorted, rb_intern("to_h"), 0);
OBJ_INFECT(hash, obj);
return hash;
}
static VALUE
first_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, params))
{
NODE *memo = RNODE(params);
......
rb_define_method(rb_mEnumerable, "reduce", enum_inject, -1);
rb_define_method(rb_mEnumerable, "partition", enum_partition, 0);
rb_define_method(rb_mEnumerable, "group_by", enum_group_by, 0);
rb_define_method(rb_mEnumerable, "frequencies", enum_frequencies, 0);
rb_define_method(rb_mEnumerable, "relative_frequencies", enum_relative_frequencies, 0);
rb_define_method(rb_mEnumerable, "first", enum_first, -1);
rb_define_method(rb_mEnumerable, "all?", enum_all, 0);
rb_define_method(rb_mEnumerable, "any?", enum_any, 0);
test/ruby/test_enum.rb
assert_equal(h, @obj.each_with_index.group_by(&cond))
end
def test_frequencies
h = { 1 => 2, 2 => 2, 3 => 1}
assert_equal(h, @obj.frequencies)
assert_equal([:c, 2], [:a, :b, :c, :c].frequencies.first)
end
def test_relative_frequencies
h = { 1 => 0.4, 2 => 0.4, 3 => 0.2}
assert_equal(h, @obj.relative_frequencies)
assert_equal([:c, 0.5], [:a, :b, :c, :c].relative_frequencies.first)
end
def test_first
assert_equal(1, @obj.first)
assert_equal([1, 2, 3], @obj.first(3))
    (1-1/1)