1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Simple numerical statistical analysis and presentation
20 # A counter counts occurrences of things
21 # Use this instead of a `HashMap[E, Int]`
24 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
31 # The counter class can also be used to gather statistical informations.
34 # assert c.length == 3 # because 3 distinct values
35 # assert c.max == "b" # because "b" has the most count (3)
36 # assert c.avg == 2.0 # because it is the mean of the counts
41 # Total number of counted occurrences
44 # var c = new Counter[String]
46 # c.inc_all(["a", "a", "b", "b", "b", "c"])
51 private var map
= new HashMap[E
, Int]
53 redef fun iterator
do return map
.iterator
55 # The number of counted occurrences of `e`
59 if map
.has_key
(e
) then return map
[e
]
63 redef fun []=(e
, value
)
70 redef fun keys
do return map
.keys
72 redef fun values
do return map
.values
74 redef fun length
do return map
.length
76 redef fun is_empty
do return map
.is_empty
83 # Count one more occurrence of `e`
86 self.map
[e
] = self[e
] + 1
90 # Count one more for each element of `es`
91 fun inc_all
(es
: Collection[E
])
96 # Decrement the value of `e` by 1
98 if not has_key
(e
) then
101 self.map
[e
] = self[e
] - 1
106 # Decrement the value for each element of `es`
107 fun dec_all
(es
: Collection[E
])
109 for e
in es
do dec
(e
)
112 # A new Counter initialized with `inc_all`.
113 init from
(es
: Collection[E
])
118 # Return an array of elements sorted by occurrences
121 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
122 # assert c.sort == ["c", "a", "b"]
126 var res
= map
.keys
.to_a
127 var sorter
= new CounterComparator[E
](self)
132 # The method used to display an element
133 # @toimplement by default just call `to_s` on the element
134 protected fun element_to_s
(e
: E
): String
139 # Display statistical information
143 print
" population: {list.length}"
144 if list
.is_empty
then return
145 print
" minimum value: {self[list.first]}"
146 print
" maximum value: {self[list.last]}"
147 print
" total value: {self.sum}"
148 print
" average value: {div(self.sum,list.length)}"
149 print
" distribution:"
152 var limit
= self[list
.first
]
154 if self[t
] > limit
then
155 print
" <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
158 while self[t
] > limit
do
160 if limit
== 0 then limit
= 1
166 print
" <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
169 # Display up to `count` most used elements and `count` least used elements
170 # Use `element_to_s` to display the element
171 fun print_elements
(count
: Int)
176 if list
.length
<= count
*2 then min
= list
.length
178 var t
= list
[list
.length-i-1
]
179 print
" {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
181 if list
.length
<= count
*2 then return
184 var t
= list
[min-i-1
]
185 print
" {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
189 # Return the element with the highest value (aka. the mode)
192 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
193 # assert c.max == "b"
196 # If more than one max exists, the first one is returned.
197 fun max
: nullable E
do
198 var max
: nullable Int = null
199 var elem
: nullable E
= null
202 if max
== null or v
> max
then
210 # Return the couple with the lowest value
213 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
214 # assert c.min == "c"
217 # If more than one min exists, the first one is returned.
218 fun min
: nullable E
do
219 var min
: nullable Int = null
220 var elem
: nullable E
= null
223 if min
== null or v
< min
then
231 # Values average (aka. arithmetic mean)
234 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
235 # assert c.avg == 2.0
238 if values
.is_empty
then return 0.0
239 return (sum
/ values
.length
).to_f
242 # The standard derivation of the counter values
245 # var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
246 # assert c.std_dev > 0.81
247 # assert c.std_dev < 0.82
249 fun std_dev
: Float do
252 for value
in map
.values
do
253 sum
+= (value
.to_f
- avg
).pow
(2.0)
255 return (sum
/ map
.length
.to_f
).sqrt
258 # The information entropy (Shannon entropy) of the elements in the counter (in bits).
262 var sum
= self.sum
.to_f
265 res
= res
- f
* f
.log_base
(2.0)
270 # Prints the content of the counter along with statistics
272 # Content is printed in order (if available) from lowest to highest on the keys.
273 # Else, it is printed as-is
276 if a
isa Array[Comparable] then default_comparator
.sort
(a
)
280 printn
("* ", i
or else "null", " = ", self[i
], " => occurences ", self[i
].to_f
/ sum
.to_f
* 100.0, "%, cumulative ", subtotal
.to_f
/ sum
.to_f
* 100.0, "% \n")
285 redef class Collection[E
]
286 # Create and fill up a counter with the elements of `self.
289 # var cpt = "abaa".chars.to_counter
290 # assert cpt['a'] == 3
291 # assert cpt['b'] == 1
292 # assert cpt.length == 2
293 # assert cpt.sum == 4
295 fun to_counter
: Counter[E
]
297 var res
= new Counter[E
]
303 private class CounterComparator[E
]
305 redef type COMPARED: E
306 var counter
: Counter[E
]
307 redef fun compare
(a
,b
) do return self.counter
.map
[a
] <=> self.counter
.map
[b
]
311 private fun show_counter
(c
: Counter[Int])
314 default_comparator
.sort
(list
)
316 print
" {e} -> {c[e]} times ({div(c[e]*100, c.sum)}%)"
320 # Display exhaustive metrics about the poset
323 var nb_greaters
= new Counter[E
]
324 var nb_direct_greaters
= new Counter[E
]
325 var nb_smallers
= new Counter[E
]
326 var nb_direct_smallers
= new Counter[E
]
327 var nb_direct_edges
= 0
331 nb_edges
+= ne
.greaters
.length
332 nb_direct_edges
+= ne
.direct_greaters
.length
333 nb_greaters
[n
] = ne
.greaters
.length
334 nb_direct_greaters
[n
] = ne
.direct_greaters
.length
335 nb_smallers
[n
] = ne
.smallers
.length
336 nb_direct_smallers
[n
] = ne
.direct_smallers
.length
338 print
"Number of nodes: {self.length}"
339 print
"Number of edges: {nb_edges} ({div(nb_edges,self.length)} per node)"
340 print
"Number of direct edges: {nb_direct_edges} ({div(nb_direct_edges,self.length)} per node)"
341 print
"Distribution of greaters"
342 nb_greaters
.print_summary
343 print
"Distribution of direct greaters"
344 nb_direct_greaters
.print_summary
345 print
"Distribution of smallers"
346 nb_smallers
.print_summary
347 print
"Distribution of direct smallers"
348 nb_direct_smallers
.print_summary
352 # Helper function to display `n/d` and handle division by 0
353 fun div
(n
: Int, d
: Int): String
355 if d
== 0 then return "na"
356 return ((100*n
/d
).to_f
/100.0).to_precision
(2)