Merge branch 'hardening_types'
[nit.git] / lib / counter.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Simple numerical statistical analysis and presentation
16 module counter
17
18 import poset
19
20 # A counter counts occurrences of things
21 # Use this instead of a `HashMap[E, Int]`
22 class Counter[E: Object]
23 super Map[E, Int]
24
25 # Total number of counted occurrences
26 var total: Int = 0
27
28 private var map = new HashMap[E, Int]
29
30 redef fun iterator do return map.iterator
31
32 # The number of counted occurrences of `e`
33 redef fun [](e: E): Int
34 do
35 var map = self.map
36 if map.has_key(e) then return map[e]
37 return 0
38 end
39
40 redef fun []=(e: E, value: Int)
41 do
42 total -= self[e]
43 self.map[e] = value
44 total += value
45 end
46
47 redef fun keys do return map.keys
48
49 redef fun values do return map.values
50
51 redef fun length do return map.length
52
53 redef fun is_empty do return map.is_empty
54
55 redef fun clear do map.clear
56
57 # Count one more occurrence of `e`
58 fun inc(e: E)
59 do
60 self.map[e] = self[e] + 1
61 total += 1
62 end
63
64 # Return an array of elements sorted by occurrences
65 fun sort: Array[E]
66 do
67 var res = map.keys.to_a
68 var sorter = new CounterSorter[E](self)
69 sorter.sort(res)
70 return res
71 end
72
73 # The method used to display an element
74 # @toimplement by default just call `to_s` on the element
75 protected fun element_to_s(e: E): String
76 do
77 do return e.to_s
78 end
79
80 # Display statistical information
81 fun print_summary
82 do
83 var list = self.sort
84 print " population: {list.length}"
85 if list.is_empty then return
86 print " minimum value: {self[list.first]}"
87 print " maximum value: {self[list.last]}"
88 print " total value: {self.total}"
89 print " average value: {div(self.total,list.length)}"
90 print " distribution:"
91 var count = 0
92 var sum = 0
93 var limit = self[list.first]
94 for t in list do
95 if self[t] > limit then
96 print " <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.total)}%)"
97 count = 0
98 sum = 0
99 while self[t] > limit do
100 limit = limit * 2
101 if limit == 0 then limit = 1
102 end
103 end
104 count += 1
105 sum += self[t]
106 end
107 print " <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.total)}%)"
108 end
109
110 # Display up to `count` most used elements and `count` least used elements
111 # Use `element_to_s` to display the element
112 fun print_elements(count: Int)
113 do
114 print " list:"
115 var list = self.sort
116 var min = count
117 if list.length <= count*2 then min = list.length
118 for i in [0..min[ do
119 var t = list[list.length-i-1]
120 print " {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.total)}%)"
121 end
122 if list.length <= count*2 then return
123 print " ..."
124 for i in [0..min[ do
125 var t = list[min-i-1]
126 print " {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.total)}%)"
127 end
128 end
129
130 # Return the element with the highest value
131 fun max: nullable E do
132 var max: nullable Int = null
133 var elem: nullable E = null
134 for e, v in map do
135 if max == null or v > max then
136 max = v
137 elem = e
138 end
139 end
140 return elem
141 end
142
143 # Return the couple with the lowest value
144 fun min: nullable E do
145 var min: nullable Int = null
146 var elem: nullable E = null
147 for e, v in map do
148 if min == null or v < min then
149 min = v
150 elem = e
151 end
152 end
153 return elem
154 end
155
156 # Values average
157 fun avg: Float do
158 if values.is_empty then return 0.0
159 var sum = 0
160 for value in map.values do
161 sum += value
162 end
163 return sum.to_f / values.length.to_f
164 end
165 end
166
167 private class CounterSorter[E: Object]
168 super AbstractSorter[E]
169 var counter: Counter[E]
170 redef fun compare(a,b) do return self.counter.map[a] <=> self.counter.map[b]
171 end
172
173 redef class POSet[E]
174 private fun show_counter(c: Counter[Int])
175 do
176 var list = c.sort
177 (new ComparableSorter[Int]).sort(list)
178 for e in list do
179 print " {e} -> {c[e]} times ({div(c[e]*100, c.total)}%)"
180 end
181 end
182
183 # Display exhaustive metrics about the poset
184 fun print_metrics
185 do
186 var nb_greaters = new Counter[E]
187 var nb_direct_greaters = new Counter[E]
188 var nb_smallers = new Counter[E]
189 var nb_direct_smallers = new Counter[E]
190 var nb_direct_edges = 0
191 var nb_edges = 0
192 for n in self do
193 var ne = self[n]
194 nb_edges += ne.greaters.length
195 nb_direct_edges += ne.direct_greaters.length
196 nb_greaters[n] = ne.greaters.length
197 nb_direct_greaters[n] = ne.direct_greaters.length
198 nb_smallers[n] = ne.smallers.length
199 nb_direct_smallers[n] = ne.direct_smallers.length
200 end
201 print "Number of nodes: {self.length}"
202 print "Number of edges: {nb_edges} ({div(nb_edges,self.length)} per node)"
203 print "Number of direct edges: {nb_direct_edges} ({div(nb_direct_edges,self.length)} per node)"
204 print "Distribution of greaters"
205 nb_greaters.print_summary
206 print "Distribution of direct greaters"
207 nb_direct_greaters.print_summary
208 print "Distribution of smallers"
209 nb_smallers.print_summary
210 print "Distribution of direct smallers"
211 nb_direct_smallers.print_summary
212 end
213 end
214
215 # Helper function to display `n/d` and handle division by 0
216 fun div(n: Int, d: Int): String
217 do
218 if d == 0 then return "na"
219 return ((100*n/d).to_f/100.0).to_precision(2)
220 end