X-Git-Url: http://nitlanguage.org diff --git a/lib/counter.nit b/lib/counter.nit index df4beca..e539bc5 100644 --- a/lib/counter.nit +++ b/lib/counter.nit @@ -19,10 +19,33 @@ import poset # A counter counts occurrences of things # Use this instead of a `HashMap[E, Int]` -class Counter[E: Object] +# +# ~~~ +# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"]) +# assert c["a"] == 2 +# assert c["b"] == 3 +# assert c["c"] == 1 +# assert c["d"] == 0 +# ~~~ +# +# The counter class can also be used to gather statistical informations. +# +# ~~~~ +# assert c.length == 3 # because 3 distinct values +# assert c.max == "b" # because "b" has the most count (3) +# assert c.avg == 2.0 # because it is the mean of the counts +# ~~~~ +class Counter[E] super Map[E, Int] # Total number of counted occurrences + # + # ~~~ + # var c = new Counter[String] + # assert c.sum == 0 + # c.inc_all(["a", "a", "b", "b", "b", "c"]) + # assert c.sum == 6 + # ~~~ var sum: Int = 0 private var map = new HashMap[E, Int] @@ -30,14 +53,14 @@ class Counter[E: Object] redef fun iterator do return map.iterator # The number of counted occurrences of `e` - redef fun [](e: E): Int + redef fun [](e) do var map = self.map if map.has_key(e) then return map[e] return 0 end - redef fun []=(e: E, value: Int) + redef fun []=(e, value) do sum -= self[e] self.map[e] = value @@ -52,7 +75,16 @@ class Counter[E: Object] redef fun is_empty do return map.is_empty - redef fun clear do map.clear + redef fun clear do + sum = 0 + map.clear + end + + redef fun add_all(other) do + for k, v in other do + self[k] += v + end + end # Count one more occurrence of `e` fun inc(e: E) @@ -61,11 +93,44 @@ class Counter[E: Object] sum += 1 end + # Count one more for each element of `es` + fun inc_all(es: Collection[E]) + do + for e in es do inc(e) + end + + # Decrement the value of `e` by 1 + fun dec(e: E) do + if not has_key(e) then + self.map[e] = 0 + else + self.map[e] = self[e] - 1 + sum += - 1 + end + end + + # Decrement the value for each element of `es` + fun dec_all(es: Collection[E]) + do + for e in es do dec(e) + end + + # A new Counter initialized with `inc_all`. + init from(es: Collection[E]) + do + inc_all(es) + end + # Return an array of elements sorted by occurrences + # + # ~~~ + # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"]) + # assert c.sort == ["c", "a", "b"] + # ~~~ fun sort: Array[E] do var res = map.keys.to_a - var sorter = new CounterSorter[E](self) + var sorter = new CounterComparator[E](self) sorter.sort(res) return res end @@ -74,7 +139,8 @@ class Counter[E: Object] # @toimplement by default just call `to_s` on the element protected fun element_to_s(e: E): String do - do return e.to_s + if e == null then return "null" + return e.to_s end # Display statistical information @@ -127,11 +193,19 @@ class Counter[E: Object] end end - # Return the element with the highest value + # Return the element with the highest value (aka. the mode) + # + # ~~~ + # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"]) + # assert c.max == "b" + # ~~~ + # + # If more than one max exists, the first one is returned. fun max: nullable E do var max: nullable Int = null var elem: nullable E = null - for e, v in map do + for e in map.keys do + var v = map[e] if max == null or v > max then max = v elem = e @@ -141,10 +215,18 @@ class Counter[E: Object] end # Return the couple with the lowest value + # + # ~~~ + # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"]) + # assert c.min == "c" + # ~~~ + # + # If more than one min exists, the first one is returned. fun min: nullable E do var min: nullable Int = null var elem: nullable E = null - for e, v in map do + for e in map.keys do + var v = map[e] if min == null or v < min then min = v elem = e @@ -153,13 +235,24 @@ class Counter[E: Object] return elem end - # Values average + # Values average (aka. arithmetic mean) + # + # ~~~ + # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"]) + # assert c.avg == 2.0 + # ~~~ fun avg: Float do if values.is_empty then return 0.0 return (sum / values.length).to_f end # The standard derivation of the counter values + # + # ~~~ + # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"]) + # assert c.std_dev > 0.81 + # assert c.std_dev < 0.82 + # ~~~ fun std_dev: Float do var avg = self.avg var sum = 0.0 @@ -168,10 +261,86 @@ class Counter[E: Object] end return (sum / map.length.to_f).sqrt end + + # The information entropy (Shannon entropy) of the elements in the counter (in bits). + fun entropy: Float + do + var res = 0.0 + var sum = self.sum.to_f + for k, v in self do + var f = v.to_f / sum + res = res - f * f.log_base(2.0) + end + return res + end + + # Prints the content of the counter along with statistics + # + # Content is printed in order (if available) from lowest to highest on the keys. + # Else, it is printed as-is + fun print_content do + var a = keys.to_a + if a isa Array[Comparable] then default_comparator.sort(a) + var subtotal = 0 + for i in a do + subtotal += self[i] + printn("* ", i or else "null", " = ", self[i], " => occurences ", self[i].to_f / sum.to_f * 100.0, "%, cumulative ", subtotal.to_f / sum.to_f * 100.0, "% \n") + end + end + + # Packs elements into separate arrays based on their occurences + # + # ~~~nit + # var x = "aaaabbbeeecccddhhgjt" + # var c = new Counter[Char] + # for i in x do c.inc i + # var ret = c.pack + # assert ret.join(", ") == """[t,g,j], [d,h], [c,b,e], [a]""" + # ~~~ + fun pack: Array[Array[E]] do + var ret = new Array[Array[E]] + var base = self.sort + if base.is_empty then return ret + var curr = new Array[E] + curr.push base.first + var curr_score = self[base.first] + base.shift + for i in base do + if self[i] == curr_score then + curr.push i + continue + end + curr_score = self[i] + ret.push curr + curr = new Array[E] + curr.push i + end + if not curr.is_empty then ret.push curr + return ret + end +end + +redef class Collection[E] + # Create and fill up a counter with the elements of `self. + # + # ~~~ + # var cpt = "abaa".chars.to_counter + # assert cpt['a'] == 3 + # assert cpt['b'] == 1 + # assert cpt.length == 2 + # assert cpt.sum == 4 + # ~~~ + fun to_counter: Counter[E] + do + var res = new Counter[E] + res.inc_all(self) + return res + end end -private class CounterSorter[E: Object] - super AbstractSorter[E] +private class CounterComparator[E] + super Comparator + redef type COMPARED: E var counter: Counter[E] redef fun compare(a,b) do return self.counter.map[a] <=> self.counter.map[b] end @@ -180,7 +349,7 @@ redef class POSet[E] private fun show_counter(c: Counter[Int]) do var list = c.sort - (new ComparableSorter[Int]).sort(list) + default_comparator.sort(list) for e in list do print " {e} -> {c[e]} times ({div(c[e]*100, c.sum)}%)" end