1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Simple numerical statistical analysis and presentation
20 # A counter counts occurrences of things
21 # Use this instead of a `HashMap[E, Int]`
24 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
31 # The counter class can also be used to gather statistical informations.
34 # assert c.length == 3 # because 3 distinct values
35 # assert c.max == "b" # because "b" has the most count (3)
36 # assert c.avg == 2.0 # because it is the mean of the counts
41 # Total number of counted occurrences
44 # var c = new Counter[String]
46 # c.inc_all(["a", "a", "b", "b", "b", "c"])
51 private var map
= new HashMap[E
, Int]
53 redef fun iterator
do return map
.iterator
55 # The number of counted occurrences of `e`
59 if map
.has_key
(e
) then return map
[e
]
63 redef fun []=(e
, value
)
70 redef fun keys
do return map
.keys
72 redef fun values
do return map
.values
74 redef fun length
do return map
.length
76 redef fun is_empty
do return map
.is_empty
83 redef fun add_all
(other
) do
89 # Count one more occurrence of `e`
92 self.map
[e
] = self[e
] + 1
96 # Count one more for each element of `es`
97 fun inc_all
(es
: Collection[E
])
102 # Decrement the value of `e` by 1
104 if not has_key
(e
) then
107 self.map
[e
] = self[e
] - 1
112 # Decrement the value for each element of `es`
113 fun dec_all
(es
: Collection[E
])
115 for e
in es
do dec
(e
)
118 # A new Counter initialized with `inc_all`.
119 init from
(es
: Collection[E
])
124 # Return an array of elements sorted by occurrences
127 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
128 # assert c.sort == ["c", "a", "b"]
132 var res
= map
.keys
.to_a
133 var sorter
= new CounterComparator[E
](self)
138 # The method used to display an element
139 # @toimplement by default just call `to_s` on the element
140 protected fun element_to_s
(e
: E
): String
142 if e
== null then return "null"
146 # Display statistical information
150 print
" population: {list.length}"
151 if list
.is_empty
then return
152 print
" minimum value: {self[list.first]}"
153 print
" maximum value: {self[list.last]}"
154 print
" total value: {self.sum}"
155 print
" average value: {div(self.sum,list.length)}"
156 print
" distribution:"
159 var limit
= self[list
.first
]
161 if self[t
] > limit
then
162 print
" <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
165 while self[t
] > limit
do
167 if limit
== 0 then limit
= 1
173 print
" <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
176 # Display up to `count` most used elements and `count` least used elements
177 # Use `element_to_s` to display the element
178 fun print_elements
(count
: Int)
183 if list
.length
<= count
*2 then min
= list
.length
185 var t
= list
[list
.length-i-1
]
186 print
" {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
188 if list
.length
<= count
*2 then return
191 var t
= list
[min-i-1
]
192 print
" {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
196 # Return the element with the highest value (aka. the mode)
199 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
200 # assert c.max == "b"
203 # If more than one max exists, the first one is returned.
204 fun max
: nullable E
do
205 var max
: nullable Int = null
206 var elem
: nullable E
= null
209 if max
== null or v
> max
then
217 # Return the couple with the lowest value
220 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
221 # assert c.min == "c"
224 # If more than one min exists, the first one is returned.
225 fun min
: nullable E
do
226 var min
: nullable Int = null
227 var elem
: nullable E
= null
230 if min
== null or v
< min
then
238 # Values average (aka. arithmetic mean)
241 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
242 # assert c.avg == 2.0
245 if values
.is_empty
then return 0.0
246 return (sum
/ values
.length
).to_f
249 # The standard derivation of the counter values
252 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
253 # assert c.std_dev > 0.81
254 # assert c.std_dev < 0.82
256 fun std_dev
: Float do
259 for value
in map
.values
do
260 sum
+= (value
.to_f
- avg
).pow
(2.0)
262 return (sum
/ map
.length
.to_f
).sqrt
265 # The information entropy (Shannon entropy) of the elements in the counter (in bits).
269 var sum
= self.sum
.to_f
272 res
= res
- f
* f
.log_base
(2.0)
277 # Prints the content of the counter along with statistics
279 # Content is printed in order (if available) from lowest to highest on the keys.
280 # Else, it is printed as-is
283 if a
isa Array[Comparable] then default_comparator
.sort
(a
)
287 printn
("* ", i
or else "null", " = ", self[i
], " => occurences ", self[i
].to_f
/ sum
.to_f
* 100.0, "%, cumulative ", subtotal
.to_f
/ sum
.to_f
* 100.0, "% \n")
291 # Packs elements into separate arrays based on their occurences
294 # var x = "aaaabbbeeecccddhhgjt"
295 # var c = new Counter[Char]
296 # for i in x do c.inc i
298 # assert ret.join(", ") == """[t,g,j], [d,h], [c,b,e], [a]"""
300 fun pack
: Array[Array[E
]] do
301 var ret
= new Array[Array[E
]]
303 if base
.is_empty
then return ret
304 var curr
= new Array[E
]
306 var curr_score
= self[base
.first
]
309 if self[i
] == curr_score
then
318 if not curr
.is_empty
then ret
.push curr
323 redef class Collection[E
]
324 # Create and fill up a counter with the elements of `self.
327 # var cpt = "abaa".chars.to_counter
328 # assert cpt['a'] == 3
329 # assert cpt['b'] == 1
330 # assert cpt.length == 2
331 # assert cpt.sum == 4
333 fun to_counter
: Counter[E
]
335 var res
= new Counter[E
]
341 private class CounterComparator[E
]
343 redef type COMPARED: E
344 var counter
: Counter[E
]
345 redef fun compare
(a
,b
) do return self.counter
.map
[a
] <=> self.counter
.map
[b
]
349 private fun show_counter
(c
: Counter[Int])
352 default_comparator
.sort
(list
)
354 print
" {e} -> {c[e]} times ({div(c[e]*100, c.sum)}%)"
358 # Display exhaustive metrics about the poset
361 var nb_greaters
= new Counter[E
]
362 var nb_direct_greaters
= new Counter[E
]
363 var nb_smallers
= new Counter[E
]
364 var nb_direct_smallers
= new Counter[E
]
365 var nb_direct_edges
= 0
369 nb_edges
+= ne
.greaters
.length
370 nb_direct_edges
+= ne
.direct_greaters
.length
371 nb_greaters
[n
] = ne
.greaters
.length
372 nb_direct_greaters
[n
] = ne
.direct_greaters
.length
373 nb_smallers
[n
] = ne
.smallers
.length
374 nb_direct_smallers
[n
] = ne
.direct_smallers
.length
376 print
"Number of nodes: {self.length}"
377 print
"Number of edges: {nb_edges} ({div(nb_edges,self.length)} per node)"
378 print
"Number of direct edges: {nb_direct_edges} ({div(nb_direct_edges,self.length)} per node)"
379 print
"Distribution of greaters"
380 nb_greaters
.print_summary
381 print
"Distribution of direct greaters"
382 nb_direct_greaters
.print_summary
383 print
"Distribution of smallers"
384 nb_smallers
.print_summary
385 print
"Distribution of direct smallers"
386 nb_direct_smallers
.print_summary
390 # Helper function to display `n/d` and handle division by 0
391 fun div
(n
: Int, d
: Int): String
393 if d
== 0 then return "na"
394 return ((100*n
/d
).to_f
/100.0).to_precision
(2)