Use this instead of a HashMap[E, Int]
var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
assert c["a"] == 2
assert c["b"] == 3
assert c["c"] == 1
assert c["d"] == 0
The counter class can also be used to gather statistical informations.
assert c.length == 3 # because 3 distinct values
assert c.max == "b" # because "b" has the most count (3)
assert c.avg == 2.0
counter :: Counter :: defaultinit
counter :: Counter :: element_to_s
The method used to display an elementcounter :: Counter :: print_content
Prints the content of the counter along with statisticscounter :: Counter :: print_elements
Display up tocount
most used elements and count
least used elements
serialization :: Serializable :: accept_json_serializer
Refinable service to customize the serialization of this class to JSONserialization :: Serializable :: accept_msgpack_attribute_counter
Hook to customize the behavior of theAttributeCounter
serialization :: Serializable :: accept_msgpack_serializer
Hook to customize the serialization of this class to MessagePackserialization :: Serializable :: add_to_bundle
Called by[]=
to dynamically choose the appropriate method according
core :: Object :: class_factory
Implementation used byget_class
to create the specific class.
serialization :: Serializable :: core_serialize_to
Actual serialization ofself
to serializer
counter :: Counter :: defaultinit
core :: Object :: defaultinit
core :: Map :: defaultinit
core :: MapRead :: defaultinit
counter :: Counter :: element_to_s
The method used to display an elementcore :: MapRead :: filter_keys
Return all elements ofkeys
that have a value.
serialization :: Serializable :: from_deserializer
Create an instance of this class from thedeserializer
core :: MapRead :: get_or_default
Get the item atkey
or return default
if not in map
core :: MapRead :: get_or_null
Get the item atkey
or null if key
is not in the map.
core :: Object :: is_same_instance
Return true ifself
and other
are the same instance (i.e. same identity).
core :: Object :: is_same_serialized
Isself
the same as other
in a serialization context?
core :: Object :: is_same_type
Return true ifself
and other
have the same dynamic type.
core :: MapRead :: keys_sorted_by_values
Return an array of all keys sorted with their values usingcomparator
.
core :: MapRead :: lookup_all_values
Search all the values inpe.greaters
.
core :: MapRead :: lookup_values
Combine the values inpe.greaters
from the most smaller elements that have a value.
serialization :: Serializable :: msgpack_extra_array_items
Hook to request a larger than usual metadata arraycore :: Object :: output_class_name
Display class name on stdout (debug only).counter :: Counter :: print_content
Prints the content of the counter along with statisticscounter :: Counter :: print_elements
Display up tocount
most used elements and count
least used elements
core :: MapRead :: provide_default_value
Called by the underling implementation of[]
to provide a default value when a key
has no value
serialization :: Serializable :: serialize_msgpack
Serializeself
to MessagePack bytes
serialization :: Serializable :: serialize_to
Serializeself
to serializer
serialization :: Serializable :: serialize_to_json
Serializeself
to JSON
core :: MapRead :: to_map_comparator
A comparator that compares things with their values in self.serialization :: Serializable :: to_pretty_json
Serializeself
to plain pretty JSON
core :: MapRead :: values_sorted_by_key
Return an array of all values sorted with their keys usingcomparator
.
Serializer::serialize
# A counter counts occurrences of things
# Use this instead of a `HashMap[E, Int]`
#
# ~~~
# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
# assert c["a"] == 2
# assert c["b"] == 3
# assert c["c"] == 1
# assert c["d"] == 0
# ~~~
#
# The counter class can also be used to gather statistical informations.
#
# ~~~~
# assert c.length == 3 # because 3 distinct values
# assert c.max == "b" # because "b" has the most count (3)
# assert c.avg == 2.0 # because it is the mean of the counts
# ~~~~
class Counter[E]
super Map[E, Int]
# Total number of counted occurrences
#
# ~~~
# var c = new Counter[String]
# assert c.sum == 0
# c.inc_all(["a", "a", "b", "b", "b", "c"])
# assert c.sum == 6
# ~~~
var sum: Int = 0
private var map = new HashMap[E, Int]
redef fun iterator do return map.iterator
# The number of counted occurrences of `e`
redef fun [](e)
do
var map = self.map
if map.has_key(e) then return map[e]
return 0
end
redef fun []=(e, value)
do
sum -= self[e]
self.map[e] = value
sum += value
end
redef fun keys do return map.keys
redef fun values do return map.values
redef fun length do return map.length
redef fun is_empty do return map.is_empty
redef fun clear do
sum = 0
map.clear
end
redef fun add_all(other) do
for k, v in other do
self[k] += v
end
end
# Count one more occurrence of `e`
fun inc(e: E)
do
self.map[e] = self[e] + 1
sum += 1
end
# Count one more for each element of `es`
fun inc_all(es: Collection[E])
do
for e in es do inc(e)
end
# Decrement the value of `e` by 1
fun dec(e: E) do
if not has_key(e) then
self.map[e] = 0
else
self.map[e] = self[e] - 1
sum += - 1
end
end
# Decrement the value for each element of `es`
fun dec_all(es: Collection[E])
do
for e in es do dec(e)
end
# A new Counter initialized with `inc_all`.
init from(es: Collection[E])
do
inc_all(es)
end
# Return an array of elements sorted by occurrences
#
# ~~~
# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
# assert c.sort == ["c", "a", "b"]
# ~~~
fun sort: Array[E]
do
var res = map.keys.to_a
var sorter = new CounterComparator[E](self)
sorter.sort(res)
return res
end
# The method used to display an element
# @toimplement by default just call `to_s` on the element
protected fun element_to_s(e: E): String
do
if e == null then return "null"
return e.to_s
end
# Display statistical information
fun print_summary
do
var list = self.sort
print " population: {list.length}"
if list.is_empty then return
print " minimum value: {self[list.first]}"
print " maximum value: {self[list.last]}"
print " total value: {self.sum}"
print " average value: {div(self.sum,list.length)}"
print " distribution:"
var count = 0
var sum = 0
var limit = self[list.first]
for t in list do
if self[t] > limit then
print " <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
count = 0
sum = 0
while self[t] > limit do
limit = limit * 2
if limit == 0 then limit = 1
end
end
count += 1
sum += self[t]
end
print " <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
end
# Display up to `count` most used elements and `count` least used elements
# Use `element_to_s` to display the element
fun print_elements(count: Int)
do
print " list:"
var list = self.sort
var min = count
if list.length <= count*2 then min = list.length
for i in [0..min[ do
var t = list[list.length-i-1]
print " {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
end
if list.length <= count*2 then return
print " ..."
for i in [0..min[ do
var t = list[min-i-1]
print " {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
end
end
# Return the element with the highest value (aka. the mode)
#
# ~~~
# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
# assert c.max == "b"
# ~~~
#
# If more than one max exists, the first one is returned.
fun max: nullable E do
var max: nullable Int = null
var elem: nullable E = null
for e in map.keys do
var v = map[e]
if max == null or v > max then
max = v
elem = e
end
end
return elem
end
# Return the couple with the lowest value
#
# ~~~
# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
# assert c.min == "c"
# ~~~
#
# If more than one min exists, the first one is returned.
fun min: nullable E do
var min: nullable Int = null
var elem: nullable E = null
for e in map.keys do
var v = map[e]
if min == null or v < min then
min = v
elem = e
end
end
return elem
end
# Values average (aka. arithmetic mean)
#
# ~~~
# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
# assert c.avg == 2.0
# ~~~
fun avg: Float do
if values.is_empty then return 0.0
return (sum / values.length).to_f
end
# The standard derivation of the counter values
#
# ~~~
# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
# assert c.std_dev > 0.81
# assert c.std_dev < 0.82
# ~~~
fun std_dev: Float do
var avg = self.avg
var sum = 0.0
for value in map.values do
sum += (value.to_f - avg).pow(2.0)
end
return (sum / map.length.to_f).sqrt
end
# The information entropy (Shannon entropy) of the elements in the counter (in bits).
fun entropy: Float
do
var res = 0.0
var sum = self.sum.to_f
for k, v in self do
var f = v.to_f / sum
res = res - f * f.log_base(2.0)
end
return res
end
# Prints the content of the counter along with statistics
#
# Content is printed in order (if available) from lowest to highest on the keys.
# Else, it is printed as-is
fun print_content do
var a = keys.to_a
if a isa Array[Comparable] then default_comparator.sort(a)
var subtotal = 0
for i in a do
subtotal += self[i]
printn("* ", i or else "null", " = ", self[i], " => occurences ", self[i].to_f / sum.to_f * 100.0, "%, cumulative ", subtotal.to_f / sum.to_f * 100.0, "% \n")
end
end
# Packs elements into separate arrays based on their occurences
#
# ~~~nit
# var x = "aaaabbbeeecccddhhgjt"
# var c = new Counter[Char]
# for i in x do c.inc i
# var ret = c.pack
# assert ret.join(", ") == """[t,g,j], [d,h], [c,b,e], [a]"""
# ~~~
fun pack: Array[Array[E]] do
var ret = new Array[Array[E]]
var base = self.sort
if base.is_empty then return ret
var curr = new Array[E]
curr.push base.first
var curr_score = self[base.first]
base.shift
for i in base do
if self[i] == curr_score then
curr.push i
continue
end
curr_score = self[i]
ret.push curr
curr = new Array[E]
curr.push i
end
if not curr.is_empty then ret.push curr
return ret
end
end
lib/counter/counter.nit:20,1--321,3