1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Simple numerical statistical analysis and presentation
20 # A counter counts occurrences of things
21 # Use this instead of a `HashMap[E, Int]`
24 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
31 # The counter class can also be used to gather statistical informations.
34 # assert c.length == 3 # because 3 distinct values
35 # assert c.max == "b" # because "b" has the most count (3)
36 # assert c.avg == 2.0 # because it is the mean of the counts
41 # Total number of counted occurrences
44 # var c = new Counter[String]
46 # c.inc_all(["a", "a", "b", "b", "b", "c"])
51 private var map
= new HashMap[E
, Int]
53 redef fun iterator
do return map
.iterator
55 # The number of counted occurrences of `e`
59 if map
.has_key
(e
) then return map
[e
]
63 redef fun []=(e
, value
)
70 redef fun keys
do return map
.keys
72 redef fun values
do return map
.values
74 redef fun length
do return map
.length
76 redef fun is_empty
do return map
.is_empty
83 # Count one more occurrence of `e`
86 self.map
[e
] = self[e
] + 1
90 # Count one more for each element of `es`
91 fun inc_all
(es
: Collection[E
])
96 # Decrement the value of `e` by 1
98 if not has_key
(e
) then
101 self.map
[e
] = self[e
] - 1
106 # Decrement the value for each element of `es`
107 fun dec_all
(es
: Collection[E
])
109 for e
in es
do dec
(e
)
112 # A new Counter initialized with `inc_all`.
113 init from
(es
: Collection[E
])
118 # Return an array of elements sorted by occurrences
121 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
122 # assert c.sort == ["c", "a", "b"]
126 var res
= map
.keys
.to_a
127 var sorter
= new CounterComparator[E
](self)
132 # The method used to display an element
133 # @toimplement by default just call `to_s` on the element
134 protected fun element_to_s
(e
: E
): String
139 # Display statistical information
143 print
" population: {list.length}"
144 if list
.is_empty
then return
145 print
" minimum value: {self[list.first]}"
146 print
" maximum value: {self[list.last]}"
147 print
" total value: {self.sum}"
148 print
" average value: {div(self.sum,list.length)}"
149 print
" distribution:"
152 var limit
= self[list
.first
]
154 if self[t
] > limit
then
155 print
" <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
158 while self[t
] > limit
do
160 if limit
== 0 then limit
= 1
166 print
" <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
169 # Display up to `count` most used elements and `count` least used elements
170 # Use `element_to_s` to display the element
171 fun print_elements
(count
: Int)
176 if list
.length
<= count
*2 then min
= list
.length
178 var t
= list
[list
.length-i-1
]
179 print
" {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
181 if list
.length
<= count
*2 then return
184 var t
= list
[min-i-1
]
185 print
" {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
189 # Return the element with the highest value (aka. the mode)
192 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
193 # assert c.max == "b"
196 # If more than one max exists, the first one is returned.
197 fun max
: nullable E
do
198 var max
: nullable Int = null
199 var elem
: nullable E
= null
202 if max
== null or v
> max
then
210 # Return the couple with the lowest value
213 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
214 # assert c.min == "c"
217 # If more than one min exists, the first one is returned.
218 fun min
: nullable E
do
219 var min
: nullable Int = null
220 var elem
: nullable E
= null
223 if min
== null or v
< min
then
231 # Values average (aka. arithmetic mean)
234 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
235 # assert c.avg == 2.0
238 if values
.is_empty
then return 0.0
239 return (sum
/ values
.length
).to_f
242 # The standard derivation of the counter values
245 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
246 # assert c.std_dev > 0.81
247 # assert c.std_dev < 0.82
249 fun std_dev
: Float do
252 for value
in map
.values
do
253 sum
+= (value
.to_f
- avg
).pow
(2.0)
255 return (sum
/ map
.length
.to_f
).sqrt
258 # The information entropy (Shannon entropy) of the elements in the counter (in bits).
262 var sum
= self.sum
.to_f
265 res
= res
- f
* f
.log_base
(2.0)
271 redef class Collection[E
]
272 # Create and fill up a counter with the elements of `self.
275 # var cpt = "abaa".chars.to_counter
276 # assert cpt['a'] == 3
277 # assert cpt['b'] == 1
278 # assert cpt.length == 2
279 # assert cpt.sum == 4
281 fun to_counter
: Counter[E
]
283 var res
= new Counter[E
]
289 private class CounterComparator[E
]
291 redef type COMPARED: E
292 var counter
: Counter[E
]
293 redef fun compare
(a
,b
) do return self.counter
.map
[a
] <=> self.counter
.map
[b
]
297 private fun show_counter
(c
: Counter[Int])
300 default_comparator
.sort
(list
)
302 print
" {e} -> {c[e]} times ({div(c[e]*100, c.sum)}%)"
306 # Display exhaustive metrics about the poset
309 var nb_greaters
= new Counter[E
]
310 var nb_direct_greaters
= new Counter[E
]
311 var nb_smallers
= new Counter[E
]
312 var nb_direct_smallers
= new Counter[E
]
313 var nb_direct_edges
= 0
317 nb_edges
+= ne
.greaters
.length
318 nb_direct_edges
+= ne
.direct_greaters
.length
319 nb_greaters
[n
] = ne
.greaters
.length
320 nb_direct_greaters
[n
] = ne
.direct_greaters
.length
321 nb_smallers
[n
] = ne
.smallers
.length
322 nb_direct_smallers
[n
] = ne
.direct_smallers
.length
324 print
"Number of nodes: {self.length}"
325 print
"Number of edges: {nb_edges} ({div(nb_edges,self.length)} per node)"
326 print
"Number of direct edges: {nb_direct_edges} ({div(nb_direct_edges,self.length)} per node)"
327 print
"Distribution of greaters"
328 nb_greaters
.print_summary
329 print
"Distribution of direct greaters"
330 nb_direct_greaters
.print_summary
331 print
"Distribution of smallers"
332 nb_smallers
.print_summary
333 print
"Distribution of direct smallers"
334 nb_direct_smallers
.print_summary
338 # Helper function to display `n/d` and handle division by 0
339 fun div
(n
: Int, d
: Int): String
341 if d
== 0 then return "na"
342 return ((100*n
/d
).to_f
/100.0).to_precision
(2)