misc/vim: inform the user when no results are found
[nit.git] / lib / counter.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Simple numerical statistical analysis and presentation
16 module counter
17
18 import poset
19
20 # A counter counts occurrences of things
21 # Use this instead of a `HashMap[E, Int]`
22 #
23 # ~~~
24 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
25 # assert c["a"] == 2
26 # assert c["b"] == 3
27 # assert c["c"] == 1
28 # assert c["d"] == 0
29 # ~~~
30 #
31 # The counter class can also be used to gather statistical informations.
32 #
33 # ~~~~
34 # assert c.length == 3 # because 3 distinct values
35 # assert c.max == "b" # because "b" has the most count (3)
36 # assert c.avg == 2.0 # because it is the mean of the counts
37 # ~~~~
38 class Counter[E]
39 super Map[E, Int]
40
41 # Total number of counted occurrences
42 #
43 # ~~~
44 # var c = new Counter[String]
45 # assert c.sum == 0
46 # c.inc_all(["a", "a", "b", "b", "b", "c"])
47 # assert c.sum == 6
48 # ~~~
49 var sum: Int = 0
50
51 private var map = new HashMap[E, Int]
52
53 redef fun iterator do return map.iterator
54
55 # The number of counted occurrences of `e`
56 redef fun [](e: E): Int
57 do
58 var map = self.map
59 if map.has_key(e) then return map[e]
60 return 0
61 end
62
63 redef fun []=(e: E, value: Int)
64 do
65 sum -= self[e]
66 self.map[e] = value
67 sum += value
68 end
69
70 redef fun keys do return map.keys
71
72 redef fun values do return map.values
73
74 redef fun length do return map.length
75
76 redef fun is_empty do return map.is_empty
77
78 redef fun clear do
79 sum = 0
80 map.clear
81 end
82
83 # Count one more occurrence of `e`
84 fun inc(e: E)
85 do
86 self.map[e] = self[e] + 1
87 sum += 1
88 end
89
90 # Count one more for each element of `es`
91 fun inc_all(es: Collection[E])
92 do
93 for e in es do inc(e)
94 end
95
96 # A new Counter initialized with `inc_all`.
97 init from(es: Collection[E])
98 do
99 inc_all(es)
100 end
101
102 # Return an array of elements sorted by occurrences
103 #
104 # ~~~
105 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
106 # assert c.sort == ["c", "a", "b"]
107 # ~~~
108 fun sort: Array[E]
109 do
110 var res = map.keys.to_a
111 var sorter = new CounterComparator[E](self)
112 sorter.sort(res)
113 return res
114 end
115
116 # The method used to display an element
117 # @toimplement by default just call `to_s` on the element
118 protected fun element_to_s(e: E): String
119 do
120 return e.to_s
121 end
122
123 # Display statistical information
124 fun print_summary
125 do
126 var list = self.sort
127 print " population: {list.length}"
128 if list.is_empty then return
129 print " minimum value: {self[list.first]}"
130 print " maximum value: {self[list.last]}"
131 print " total value: {self.sum}"
132 print " average value: {div(self.sum,list.length)}"
133 print " distribution:"
134 var count = 0
135 var sum = 0
136 var limit = self[list.first]
137 for t in list do
138 if self[t] > limit then
139 print " <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
140 count = 0
141 sum = 0
142 while self[t] > limit do
143 limit = limit * 2
144 if limit == 0 then limit = 1
145 end
146 end
147 count += 1
148 sum += self[t]
149 end
150 print " <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
151 end
152
153 # Display up to `count` most used elements and `count` least used elements
154 # Use `element_to_s` to display the element
155 fun print_elements(count: Int)
156 do
157 print " list:"
158 var list = self.sort
159 var min = count
160 if list.length <= count*2 then min = list.length
161 for i in [0..min[ do
162 var t = list[list.length-i-1]
163 print " {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
164 end
165 if list.length <= count*2 then return
166 print " ..."
167 for i in [0..min[ do
168 var t = list[min-i-1]
169 print " {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
170 end
171 end
172
173 # Return the element with the highest value (aka. the mode)
174 #
175 # ~~~
176 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
177 # assert c.max == "b"
178 # ~~~
179 #
180 # If more than one max exists, the first one is returned.
181 fun max: nullable E do
182 var max: nullable Int = null
183 var elem: nullable E = null
184 for e in map.keys do
185 var v = map[e]
186 if max == null or v > max then
187 max = v
188 elem = e
189 end
190 end
191 return elem
192 end
193
194 # Return the couple with the lowest value
195 #
196 # ~~~
197 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
198 # assert c.min == "c"
199 # ~~~
200 #
201 # If more than one min exists, the first one is returned.
202 fun min: nullable E do
203 var min: nullable Int = null
204 var elem: nullable E = null
205 for e in map.keys do
206 var v = map[e]
207 if min == null or v < min then
208 min = v
209 elem = e
210 end
211 end
212 return elem
213 end
214
215 # Values average (aka. arithmetic mean)
216 #
217 # ~~~
218 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
219 # assert c.avg == 2.0
220 # ~~~
221 fun avg: Float do
222 if values.is_empty then return 0.0
223 return (sum / values.length).to_f
224 end
225
226 # The standard derivation of the counter values
227 #
228 # ~~~
229 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
230 # assert c.std_dev > 0.81
231 # assert c.std_dev < 0.82
232 # ~~~
233 fun std_dev: Float do
234 var avg = self.avg
235 var sum = 0.0
236 for value in map.values do
237 sum += (value.to_f - avg).pow(2.0)
238 end
239 return (sum / map.length.to_f).sqrt
240 end
241 end
242
243 private class CounterComparator[E]
244 super Comparator
245 redef type COMPARED: E
246 var counter: Counter[E]
247 redef fun compare(a,b) do return self.counter.map[a] <=> self.counter.map[b]
248 end
249
250 redef class POSet[E]
251 private fun show_counter(c: Counter[Int])
252 do
253 var list = c.sort
254 default_comparator.sort(list)
255 for e in list do
256 print " {e} -> {c[e]} times ({div(c[e]*100, c.sum)}%)"
257 end
258 end
259
260 # Display exhaustive metrics about the poset
261 fun print_metrics
262 do
263 var nb_greaters = new Counter[E]
264 var nb_direct_greaters = new Counter[E]
265 var nb_smallers = new Counter[E]
266 var nb_direct_smallers = new Counter[E]
267 var nb_direct_edges = 0
268 var nb_edges = 0
269 for n in self do
270 var ne = self[n]
271 nb_edges += ne.greaters.length
272 nb_direct_edges += ne.direct_greaters.length
273 nb_greaters[n] = ne.greaters.length
274 nb_direct_greaters[n] = ne.direct_greaters.length
275 nb_smallers[n] = ne.smallers.length
276 nb_direct_smallers[n] = ne.direct_smallers.length
277 end
278 print "Number of nodes: {self.length}"
279 print "Number of edges: {nb_edges} ({div(nb_edges,self.length)} per node)"
280 print "Number of direct edges: {nb_direct_edges} ({div(nb_direct_edges,self.length)} per node)"
281 print "Distribution of greaters"
282 nb_greaters.print_summary
283 print "Distribution of direct greaters"
284 nb_direct_greaters.print_summary
285 print "Distribution of smallers"
286 nb_smallers.print_summary
287 print "Distribution of direct smallers"
288 nb_direct_smallers.print_summary
289 end
290 end
291
292 # Helper function to display `n/d` and handle division by 0
293 fun div(n: Int, d: Int): String
294 do
295 if d == 0 then return "na"
296 return ((100*n/d).to_f/100.0).to_precision(2)
297 end