examples: annotate examples
[nit.git] / lib / counter.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Simple numerical statistical analysis and presentation
16 module counter
17
18 import poset
19
20 # A counter counts occurrences of things
21 # Use this instead of a `HashMap[E, Int]`
22 #
23 # ~~~
24 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
25 # assert c["a"] == 2
26 # assert c["b"] == 3
27 # assert c["c"] == 1
28 # assert c["d"] == 0
29 # ~~~
30 #
31 # The counter class can also be used to gather statistical informations.
32 #
33 # ~~~~
34 # assert c.length == 3 # because 3 distinct values
35 # assert c.max == "b" # because "b" has the most count (3)
36 # assert c.avg == 2.0 # because it is the mean of the counts
37 # ~~~~
38 class Counter[E]
39 super Map[E, Int]
40
41 # Total number of counted occurrences
42 #
43 # ~~~
44 # var c = new Counter[String]
45 # assert c.sum == 0
46 # c.inc_all(["a", "a", "b", "b", "b", "c"])
47 # assert c.sum == 6
48 # ~~~
49 var sum: Int = 0
50
51 private var map = new HashMap[E, Int]
52
53 redef fun iterator do return map.iterator
54
55 # The number of counted occurrences of `e`
56 redef fun [](e)
57 do
58 var map = self.map
59 if map.has_key(e) then return map[e]
60 return 0
61 end
62
63 redef fun []=(e, value)
64 do
65 sum -= self[e]
66 self.map[e] = value
67 sum += value
68 end
69
70 redef fun keys do return map.keys
71
72 redef fun values do return map.values
73
74 redef fun length do return map.length
75
76 redef fun is_empty do return map.is_empty
77
78 redef fun clear do
79 sum = 0
80 map.clear
81 end
82
83 redef fun add_all(other) do
84 for k, v in other do
85 self[k] += v
86 end
87 end
88
89 # Count one more occurrence of `e`
90 fun inc(e: E)
91 do
92 self.map[e] = self[e] + 1
93 sum += 1
94 end
95
96 # Count one more for each element of `es`
97 fun inc_all(es: Collection[E])
98 do
99 for e in es do inc(e)
100 end
101
102 # Decrement the value of `e` by 1
103 fun dec(e: E) do
104 if not has_key(e) then
105 self.map[e] = 0
106 else
107 self.map[e] = self[e] - 1
108 sum += - 1
109 end
110 end
111
112 # Decrement the value for each element of `es`
113 fun dec_all(es: Collection[E])
114 do
115 for e in es do dec(e)
116 end
117
118 # A new Counter initialized with `inc_all`.
119 init from(es: Collection[E])
120 do
121 inc_all(es)
122 end
123
124 # Return an array of elements sorted by occurrences
125 #
126 # ~~~
127 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
128 # assert c.sort == ["c", "a", "b"]
129 # ~~~
130 fun sort: Array[E]
131 do
132 var res = map.keys.to_a
133 var sorter = new CounterComparator[E](self)
134 sorter.sort(res)
135 return res
136 end
137
138 # The method used to display an element
139 # @toimplement by default just call `to_s` on the element
140 protected fun element_to_s(e: E): String
141 do
142 if e == null then return "null"
143 return e.to_s
144 end
145
146 # Display statistical information
147 fun print_summary
148 do
149 var list = self.sort
150 print " population: {list.length}"
151 if list.is_empty then return
152 print " minimum value: {self[list.first]}"
153 print " maximum value: {self[list.last]}"
154 print " total value: {self.sum}"
155 print " average value: {div(self.sum,list.length)}"
156 print " distribution:"
157 var count = 0
158 var sum = 0
159 var limit = self[list.first]
160 for t in list do
161 if self[t] > limit then
162 print " <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
163 count = 0
164 sum = 0
165 while self[t] > limit do
166 limit = limit * 2
167 if limit == 0 then limit = 1
168 end
169 end
170 count += 1
171 sum += self[t]
172 end
173 print " <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
174 end
175
176 # Display up to `count` most used elements and `count` least used elements
177 # Use `element_to_s` to display the element
178 fun print_elements(count: Int)
179 do
180 print " list:"
181 var list = self.sort
182 var min = count
183 if list.length <= count*2 then min = list.length
184 for i in [0..min[ do
185 var t = list[list.length-i-1]
186 print " {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
187 end
188 if list.length <= count*2 then return
189 print " ..."
190 for i in [0..min[ do
191 var t = list[min-i-1]
192 print " {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
193 end
194 end
195
196 # Return the element with the highest value (aka. the mode)
197 #
198 # ~~~
199 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
200 # assert c.max == "b"
201 # ~~~
202 #
203 # If more than one max exists, the first one is returned.
204 fun max: nullable E do
205 var max: nullable Int = null
206 var elem: nullable E = null
207 for e in map.keys do
208 var v = map[e]
209 if max == null or v > max then
210 max = v
211 elem = e
212 end
213 end
214 return elem
215 end
216
217 # Return the couple with the lowest value
218 #
219 # ~~~
220 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
221 # assert c.min == "c"
222 # ~~~
223 #
224 # If more than one min exists, the first one is returned.
225 fun min: nullable E do
226 var min: nullable Int = null
227 var elem: nullable E = null
228 for e in map.keys do
229 var v = map[e]
230 if min == null or v < min then
231 min = v
232 elem = e
233 end
234 end
235 return elem
236 end
237
238 # Values average (aka. arithmetic mean)
239 #
240 # ~~~
241 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
242 # assert c.avg == 2.0
243 # ~~~
244 fun avg: Float do
245 if values.is_empty then return 0.0
246 return (sum / values.length).to_f
247 end
248
249 # The standard derivation of the counter values
250 #
251 # ~~~
252 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
253 # assert c.std_dev > 0.81
254 # assert c.std_dev < 0.82
255 # ~~~
256 fun std_dev: Float do
257 var avg = self.avg
258 var sum = 0.0
259 for value in map.values do
260 sum += (value.to_f - avg).pow(2.0)
261 end
262 return (sum / map.length.to_f).sqrt
263 end
264
265 # The information entropy (Shannon entropy) of the elements in the counter (in bits).
266 fun entropy: Float
267 do
268 var res = 0.0
269 var sum = self.sum.to_f
270 for k, v in self do
271 var f = v.to_f / sum
272 res = res - f * f.log_base(2.0)
273 end
274 return res
275 end
276
277 # Prints the content of the counter along with statistics
278 #
279 # Content is printed in order (if available) from lowest to highest on the keys.
280 # Else, it is printed as-is
281 fun print_content do
282 var a = keys.to_a
283 if a isa Array[Comparable] then default_comparator.sort(a)
284 var subtotal = 0
285 for i in a do
286 subtotal += self[i]
287 printn("* ", i or else "null", " = ", self[i], " => occurences ", self[i].to_f / sum.to_f * 100.0, "%, cumulative ", subtotal.to_f / sum.to_f * 100.0, "% \n")
288 end
289 end
290
291 # Packs elements into separate arrays based on their occurences
292 #
293 # ~~~nit
294 # var x = "aaaabbbeeecccddhhgjt"
295 # var c = new Counter[Char]
296 # for i in x do c.inc i
297 # var ret = c.pack
298 # assert ret.join(", ") == """[t,g,j], [d,h], [c,b,e], [a]"""
299 # ~~~
300 fun pack: Array[Array[E]] do
301 var ret = new Array[Array[E]]
302 var base = self.sort
303 if base.is_empty then return ret
304 var curr = new Array[E]
305 curr.push base.first
306 var curr_score = self[base.first]
307 base.shift
308 for i in base do
309 if self[i] == curr_score then
310 curr.push i
311 continue
312 end
313 curr_score = self[i]
314 ret.push curr
315 curr = new Array[E]
316 curr.push i
317 end
318 if not curr.is_empty then ret.push curr
319 return ret
320 end
321 end
322
323 redef class Collection[E]
324 # Create and fill up a counter with the elements of `self.
325 #
326 # ~~~
327 # var cpt = "abaa".chars.to_counter
328 # assert cpt['a'] == 3
329 # assert cpt['b'] == 1
330 # assert cpt.length == 2
331 # assert cpt.sum == 4
332 # ~~~
333 fun to_counter: Counter[E]
334 do
335 var res = new Counter[E]
336 res.inc_all(self)
337 return res
338 end
339 end
340
341 private class CounterComparator[E]
342 super Comparator
343 redef type COMPARED: E
344 var counter: Counter[E]
345 redef fun compare(a,b) do return self.counter.map[a] <=> self.counter.map[b]
346 end
347
348 redef class POSet[E]
349 private fun show_counter(c: Counter[Int])
350 do
351 var list = c.sort
352 default_comparator.sort(list)
353 for e in list do
354 print " {e} -> {c[e]} times ({div(c[e]*100, c.sum)}%)"
355 end
356 end
357
358 # Display exhaustive metrics about the poset
359 fun print_metrics
360 do
361 var nb_greaters = new Counter[E]
362 var nb_direct_greaters = new Counter[E]
363 var nb_smallers = new Counter[E]
364 var nb_direct_smallers = new Counter[E]
365 var nb_direct_edges = 0
366 var nb_edges = 0
367 for n in self do
368 var ne = self[n]
369 nb_edges += ne.greaters.length
370 nb_direct_edges += ne.direct_greaters.length
371 nb_greaters[n] = ne.greaters.length
372 nb_direct_greaters[n] = ne.direct_greaters.length
373 nb_smallers[n] = ne.smallers.length
374 nb_direct_smallers[n] = ne.direct_smallers.length
375 end
376 print "Number of nodes: {self.length}"
377 print "Number of edges: {nb_edges} ({div(nb_edges,self.length)} per node)"
378 print "Number of direct edges: {nb_direct_edges} ({div(nb_direct_edges,self.length)} per node)"
379 print "Distribution of greaters"
380 nb_greaters.print_summary
381 print "Distribution of direct greaters"
382 nb_direct_greaters.print_summary
383 print "Distribution of smallers"
384 nb_smallers.print_summary
385 print "Distribution of direct smallers"
386 nb_direct_smallers.print_summary
387 end
388 end
389
390 # Helper function to display `n/d` and handle division by 0
391 fun div(n: Int, d: Int): String
392 do
393 if d == 0 then return "na"
394 return ((100*n/d).to_f/100.0).to_precision(2)
395 end