migration: use Comparator instead of Sorter in code
[nit.git] / lib / counter.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Simple numerical statistical analysis and presentation
16 module counter
17
18 import poset
19
20 # A counter counts occurrences of things
21 # Use this instead of a `HashMap[E, Int]`
22 class Counter[E: Object]
23 super Map[E, Int]
24
25 # Total number of counted occurrences
26 var sum: Int = 0
27
28 private var map = new HashMap[E, Int]
29
30 redef fun iterator do return map.iterator
31
32 # The number of counted occurrences of `e`
33 redef fun [](e: E): Int
34 do
35 var map = self.map
36 if map.has_key(e) then return map[e]
37 return 0
38 end
39
40 redef fun []=(e: E, value: Int)
41 do
42 sum -= self[e]
43 self.map[e] = value
44 sum += value
45 end
46
47 redef fun keys do return map.keys
48
49 redef fun values do return map.values
50
51 redef fun length do return map.length
52
53 redef fun is_empty do return map.is_empty
54
55 redef fun clear do map.clear
56
57 # Count one more occurrence of `e`
58 fun inc(e: E)
59 do
60 self.map[e] = self[e] + 1
61 sum += 1
62 end
63
64 # Return an array of elements sorted by occurrences
65 fun sort: Array[E]
66 do
67 var res = map.keys.to_a
68 var sorter = new CounterComparator[E](self)
69 sorter.sort(res)
70 return res
71 end
72
73 # The method used to display an element
74 # @toimplement by default just call `to_s` on the element
75 protected fun element_to_s(e: E): String
76 do
77 do return e.to_s
78 end
79
80 # Display statistical information
81 fun print_summary
82 do
83 var list = self.sort
84 print " population: {list.length}"
85 if list.is_empty then return
86 print " minimum value: {self[list.first]}"
87 print " maximum value: {self[list.last]}"
88 print " total value: {self.sum}"
89 print " average value: {div(self.sum,list.length)}"
90 print " distribution:"
91 var count = 0
92 var sum = 0
93 var limit = self[list.first]
94 for t in list do
95 if self[t] > limit then
96 print " <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
97 count = 0
98 sum = 0
99 while self[t] > limit do
100 limit = limit * 2
101 if limit == 0 then limit = 1
102 end
103 end
104 count += 1
105 sum += self[t]
106 end
107 print " <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
108 end
109
110 # Display up to `count` most used elements and `count` least used elements
111 # Use `element_to_s` to display the element
112 fun print_elements(count: Int)
113 do
114 print " list:"
115 var list = self.sort
116 var min = count
117 if list.length <= count*2 then min = list.length
118 for i in [0..min[ do
119 var t = list[list.length-i-1]
120 print " {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
121 end
122 if list.length <= count*2 then return
123 print " ..."
124 for i in [0..min[ do
125 var t = list[min-i-1]
126 print " {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
127 end
128 end
129
130 # Return the element with the highest value
131 fun max: nullable E do
132 var max: nullable Int = null
133 var elem: nullable E = null
134 for e, v in map do
135 if max == null or v > max then
136 max = v
137 elem = e
138 end
139 end
140 return elem
141 end
142
143 # Return the couple with the lowest value
144 fun min: nullable E do
145 var min: nullable Int = null
146 var elem: nullable E = null
147 for e, v in map do
148 if min == null or v < min then
149 min = v
150 elem = e
151 end
152 end
153 return elem
154 end
155
156 # Values average
157 fun avg: Float do
158 if values.is_empty then return 0.0
159 return (sum / values.length).to_f
160 end
161
162 # The standard derivation of the counter values
163 fun std_dev: Float do
164 var avg = self.avg
165 var sum = 0.0
166 for value in map.values do
167 sum += (value.to_f - avg).pow(2.0)
168 end
169 return (sum / map.length.to_f).sqrt
170 end
171 end
172
173 private class CounterComparator[E: Object]
174 super Comparator[E]
175 var counter: Counter[E]
176 redef fun compare(a,b) do return self.counter.map[a] <=> self.counter.map[b]
177 end
178
179 redef class POSet[E]
180 private fun show_counter(c: Counter[Int])
181 do
182 var list = c.sort
183 default_comparator.sort(list)
184 for e in list do
185 print " {e} -> {c[e]} times ({div(c[e]*100, c.sum)}%)"
186 end
187 end
188
189 # Display exhaustive metrics about the poset
190 fun print_metrics
191 do
192 var nb_greaters = new Counter[E]
193 var nb_direct_greaters = new Counter[E]
194 var nb_smallers = new Counter[E]
195 var nb_direct_smallers = new Counter[E]
196 var nb_direct_edges = 0
197 var nb_edges = 0
198 for n in self do
199 var ne = self[n]
200 nb_edges += ne.greaters.length
201 nb_direct_edges += ne.direct_greaters.length
202 nb_greaters[n] = ne.greaters.length
203 nb_direct_greaters[n] = ne.direct_greaters.length
204 nb_smallers[n] = ne.smallers.length
205 nb_direct_smallers[n] = ne.direct_smallers.length
206 end
207 print "Number of nodes: {self.length}"
208 print "Number of edges: {nb_edges} ({div(nb_edges,self.length)} per node)"
209 print "Number of direct edges: {nb_direct_edges} ({div(nb_direct_edges,self.length)} per node)"
210 print "Distribution of greaters"
211 nb_greaters.print_summary
212 print "Distribution of direct greaters"
213 nb_direct_greaters.print_summary
214 print "Distribution of smallers"
215 nb_smallers.print_summary
216 print "Distribution of direct smallers"
217 nb_direct_smallers.print_summary
218 end
219 end
220
221 # Helper function to display `n/d` and handle division by 0
222 fun div(n: Int, d: Int): String
223 do
224 if d == 0 then return "na"
225 return ((100*n/d).to_f/100.0).to_precision(2)
226 end