Feature #10552 ยป add_enum_frequencies.patch
ChangeLog | ||
---|---|---|
Thu Nov 27 15:29:06 2014 Brian Hempel <plasticchicken@gmail.com>
|
||
* enum.c: New methods Enumerable#{frequencies,relative_frequencies}
|
||
Counts the number of times each item appears in an Enumerable.
|
||
* test/ruby/test_enum.rb: Tests for new methods.
|
||
Thu Nov 27 13:11:00 2014 Koichi Sasada <ko1@atdot.net>
|
||
* gc.c (objspace_malloc_increase): enable lazy sweep on GC by malloc()
|
enum.c | ||
---|---|---|
}
|
||
static VALUE
|
||
frequencies_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, hash))
|
||
{
|
||
VALUE freq;
|
||
long freql;
|
||
ENUM_WANT_SVALUE();
|
||
freq = rb_hash_aref(hash, i);
|
||
if (!RB_TYPE_P(freq, T_FIXNUM)) {
|
||
freq = INT2FIX(0);
|
||
}
|
||
freql = FIX2LONG(freq);
|
||
freq = LONG2FIX(freql+1);
|
||
rb_hash_aset(hash, i, freq);
|
||
return Qnil;
|
||
}
|
||
static VALUE
|
||
frequencies_sort_by_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, dummy))
|
||
{
|
||
VALUE freq;
|
||
freq = RARRAY_AREF(i, 1);
|
||
return LONG2FIX(-FIX2LONG(freq));
|
||
}
|
||
/*
|
||
* call-seq:
|
||
* enum.frequencies -> a_hash
|
||
*
|
||
* Counts the number of times each object appears in <i>enum</i>.
|
||
* Returns a hash where the keys are objects from <i>enum</i> and the
|
||
* values are the number of times the object occurs in <i>enum</i>.
|
||
*
|
||
* The hash is sorted, with the most common objects first.
|
||
*
|
||
* %w[cat bird bird horse].frequencies
|
||
* #=> {"bird" => 2, "horse" => 1, "cat" => 1}
|
||
*
|
||
*/
|
||
static VALUE
|
||
enum_frequencies(VALUE obj)
|
||
{
|
||
VALUE hash;
|
||
VALUE sorted;
|
||
hash = rb_hash_new();
|
||
rb_block_call(obj, id_each, 0, 0, frequencies_i, hash);
|
||
sorted = rb_block_call(hash, rb_intern("sort_by"), 0, 0, frequencies_sort_by_i, 0);
|
||
hash = rb_funcall(sorted, rb_intern("to_h"), 0);
|
||
OBJ_INFECT(hash, obj);
|
||
return hash;
|
||
}
|
||
static VALUE
|
||
relative_frequencies_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, memop))
|
||
{
|
||
NODE *memo = RNODE(memop);
|
||
VALUE hash = memo->u1.value;
|
||
VALUE freq;
|
||
long freql;
|
||
ENUM_WANT_SVALUE();
|
||
freq = rb_hash_aref(hash, i);
|
||
if (!RB_TYPE_P(freq, T_FIXNUM)) {
|
||
freq = INT2FIX(0);
|
||
}
|
||
freql = FIX2LONG(freq);
|
||
freq = LONG2FIX(freql+1);
|
||
memo->u3.cnt += 1;
|
||
rb_hash_aset(hash, i, freq);
|
||
return Qnil;
|
||
}
|
||
static VALUE
|
||
relative_frequencies_norm_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, count))
|
||
{
|
||
VALUE freq;
|
||
long freql;
|
||
freq = RARRAY_AREF(i, 1);
|
||
freql = FIX2LONG(freq);
|
||
freq = DBL2NUM((double)freql / count);
|
||
RARRAY_ASET(i, 1, freq);
|
||
return Qnil;
|
||
}
|
||
/*
|
||
* call-seq:
|
||
* enum.relative_frequencies -> a_hash
|
||
*
|
||
* Calculates the relative frequency of each object appearing in <i>enum</i>.
|
||
* Returns a hash where the keys are objects from <i>enum</i> and the
|
||
* values are numbers between 0.0 and 1.0, indicating what proportion of
|
||
* the entries in <i>enum</i> contain the object.
|
||
*
|
||
* For example, the pair <code>"crow" => 0.1</code> means that one out of
|
||
* every ten entries in <i>enum</i> contains <code>"crow"</code>.
|
||
*
|
||
* The hash is sorted, with the most common objects first.
|
||
*
|
||
* %w[cat bird bird horse].relative_frequencies
|
||
* #=> {"bird" => 0.5, "horse" => 0.25, "cat" => 0.25}
|
||
*
|
||
*/
|
||
static VALUE
|
||
enum_relative_frequencies(VALUE obj)
|
||
{
|
||
VALUE hash, sorted;
|
||
NODE *memo;
|
||
double count;
|
||
hash = rb_hash_new();
|
||
memo = NEW_MEMO(hash, 0, 0);
|
||
rb_block_call(obj, id_each, 0, 0, relative_frequencies_i, (VALUE)memo);
|
||
count = (double)memo->u3.cnt;
|
||
sorted = rb_block_call(hash, rb_intern("sort_by"), 0, 0, frequencies_sort_by_i, 0);
|
||
rb_block_call(sorted, id_each, 0, 0, relative_frequencies_norm_i, count);
|
||
hash = rb_funcall(sorted, rb_intern("to_h"), 0);
|
||
OBJ_INFECT(hash, obj);
|
||
return hash;
|
||
}
|
||
static VALUE
|
||
first_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, params))
|
||
{
|
||
NODE *memo = RNODE(params);
|
||
... | ... | |
rb_define_method(rb_mEnumerable, "reduce", enum_inject, -1);
|
||
rb_define_method(rb_mEnumerable, "partition", enum_partition, 0);
|
||
rb_define_method(rb_mEnumerable, "group_by", enum_group_by, 0);
|
||
rb_define_method(rb_mEnumerable, "frequencies", enum_frequencies, 0);
|
||
rb_define_method(rb_mEnumerable, "relative_frequencies", enum_relative_frequencies, 0);
|
||
rb_define_method(rb_mEnumerable, "first", enum_first, -1);
|
||
rb_define_method(rb_mEnumerable, "all?", enum_all, 0);
|
||
rb_define_method(rb_mEnumerable, "any?", enum_any, 0);
|
test/ruby/test_enum.rb | ||
---|---|---|
assert_equal(h, @obj.each_with_index.group_by(&cond))
|
||
end
|
||
def test_frequencies
|
||
h = { 1 => 2, 2 => 2, 3 => 1}
|
||
assert_equal(h, @obj.frequencies)
|
||
assert_equal([:c, 2], [:a, :b, :c, :c].frequencies.first)
|
||
end
|
||
def test_relative_frequencies
|
||
h = { 1 => 0.4, 2 => 0.4, 3 => 0.2}
|
||
assert_equal(h, @obj.relative_frequencies)
|
||
assert_equal([:c, 0.5], [:a, :b, :c, :c].relative_frequencies.first)
|
||
end
|
||
def test_first
|
||
assert_equal(1, @obj.first)
|
||
assert_equal([1, 2, 3], @obj.first(3))
|