metrics: skip `not a number` values when computinh the sum and std_dev
[nit.git] / src / metrics / metrics_base.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2012 Jean Privat <jean@pryen.org>
4 # Copyright 2014 Alexandre Terrasa <alexandre@moz-code.org>
5 #
6 # Licensed under the Apache License, Version 2.0 (the "License");
7 # you may not use this file except in compliance with the License.
8 # You may obtain a copy of the License at
9 #
10 # http://www.apache.org/licenses/LICENSE-2.0
11 #
12 # Unless required by applicable law or agreed to in writing, software
13 # distributed under the License is distributed on an "AS IS" BASIS,
14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 # See the License for the specific language governing permissions and
16 # limitations under the License.
17
18 # Helpers for various statistics tools.
19 module metrics_base
20
21 import modelbuilder
22 import csv
23 import counter
24 import console
25
26 redef class ToolContext
27
28 # --all
29 var opt_all = new OptionBool("Compute all metrics", "--all")
30
31 # --mmodules
32 var opt_mmodules = new OptionBool("Compute metrics about mmodules", "--mmodules")
33 # --mclassses
34 var opt_mclasses = new OptionBool("Compute metrics about mclasses", "--mclasses")
35 # --mendel
36 var opt_mendel = new OptionBool("Compute mendel metrics", "--mendel")
37 # --inheritance
38 var opt_inheritance = new OptionBool("Compute metrics about inheritance usage", "--inheritance")
39 # --genericity
40 var opt_refinement = new OptionBool("Compute metrics about refinement usage", "--refinement")
41 # --self
42 var opt_self = new OptionBool("Compute metrics about the usage of explicit and implicit self", "--self")
43 # --ast
44 var opt_ast = new OptionBool("Compute metrics about the usage of nodes and identifiers in the AST", "--ast")
45 # --nullables
46 var opt_nullables = new OptionBool("Compute metrics on nullables send", "--nullables")
47 # --static-types
48 var opt_static_types = new OptionBool("Compute explicit static types metrics", "--static-types")
49 # --tables
50 var opt_tables = new OptionBool("Compute tables metrics", "--tables")
51 # --rta
52 var opt_rta = new OptionBool("Compute RTA metrics", "--rta")
53 # --generate-csv
54 var opt_csv = new OptionBool("Export metrics in CSV format", "--csv")
55 # --generate_hyperdoc
56 var opt_generate_hyperdoc = new OptionBool("Generate Hyperdoc", "--generate_hyperdoc")
57 # --poset
58 var opt_poset = new OptionBool("Complete metrics on posets", "--poset")
59 # --no-colors
60 var opt_nocolors = new OptionBool("Disable colors in console outputs", "--no-colors")
61 # --dir
62 var opt_dir = new OptionString("Directory where some statistics files are generated", "-d", "--dir")
63
64 # Output directory for metrics files.
65 var output_dir: String = "."
66
67 redef init
68 do
69 super
70 self.option_context.add_option(opt_all)
71 self.option_context.add_option(opt_mmodules)
72 self.option_context.add_option(opt_mclasses)
73 self.option_context.add_option(opt_mendel)
74 self.option_context.add_option(opt_inheritance)
75 self.option_context.add_option(opt_refinement)
76 self.option_context.add_option(opt_self)
77 self.option_context.add_option(opt_ast)
78 self.option_context.add_option(opt_nullables)
79 self.option_context.add_option(opt_static_types)
80 self.option_context.add_option(opt_tables)
81 self.option_context.add_option(opt_rta)
82 self.option_context.add_option(opt_csv)
83 self.option_context.add_option(opt_generate_hyperdoc)
84 self.option_context.add_option(opt_poset)
85 self.option_context.add_option(opt_dir)
86 self.option_context.add_option(opt_nocolors)
87 end
88
89 redef fun process_options(args)
90 do
91 super
92 var val = self.opt_dir.value
93 if val != null then
94 val = val.simplify_path
95 val.mkdir
96 self.output_dir = val
97 end
98 end
99
100 # Format and colorize a string heading of level 1 for console output.
101 #
102 # Default style is yellow and bold.
103 fun format_h1(str: String): String do
104 if opt_nocolors.value then return str
105 return str.yellow.bold
106 end
107
108 # Format and colorize a string heading of level 2 for console output.
109 #
110 # Default style is white and bold.
111 fun format_h2(str: String): String do
112 if opt_nocolors.value then return str
113 return str.bold
114 end
115
116 # Format and colorize a string heading of level 3 for console output.
117 #
118 # Default style is white and nobold.
119 fun format_h3(str: String): String do
120 if opt_nocolors.value then return str
121 return str
122 end
123
124 # Format and colorize a string heading of level 4 for console output.
125 #
126 # Default style is green.
127 fun format_h4(str: String): String do
128 if opt_nocolors.value then return str
129 return str.green
130 end
131
132 # Format and colorize a string heading of level 5 for console output.
133 #
134 # Default style is light gray.
135 fun format_p(str: String): String do
136 if opt_nocolors.value then return str
137 return str.light_gray
138 end
139
140 end
141
142 # A Metric is used to collect data about things
143 #
144 # The concept is reified here for a better organization and documentation
145 interface Metric
146
147 # Type of elements measured by this metric.
148 type ELM: Object
149
150 # Type of values used to measure elements.
151 type VAL: Object
152
153 # Type of data representation used to associate elements and values.
154 type RES: Map[ELM, VAL]
155
156 # The name of this metric (generally an acronym about the metric).
157 fun name: String is abstract
158
159 # A long and understandable description about what is measured by this metric.
160 fun desc: String is abstract
161
162 # Clear all results for this metric
163 fun clear is abstract
164
165 # Values for each element
166 fun values: RES is abstract
167
168 # Collect metric values on elements
169 fun collect(elements: Set[ELM]) is abstract
170
171 # The value calculated for the element
172 fun [](element: ELM): VAL do return values[element]
173
174 # Does the element have a value for this metric?
175 fun has_element(element: ELM): Bool do return values.has_key(element)
176
177 # The values average
178 fun avg: Float is abstract
179
180 # Pretty print the metric results in console
181 fun to_console(indent: Int, colors: Bool) do
182 if values.is_empty then
183 if colors then
184 print "{"\t" * indent}{name}: {desc} -- nothing".green
185 else
186 print "{"\t" * indent}{name}: {desc} -- nothing"
187 end
188 return
189 end
190
191 var max = self.max
192 var min = self.min
193 if colors then
194 print "{"\t" * indent}{name}: {desc}".green
195 print "{"\t" * indent} avg: {avg}".light_gray
196 print "{"\t" * indent} max: {max} ({self[max]})".light_gray
197 print "{"\t" * indent} min: {min} ({self[min]})".light_gray
198 print "{"\t" * indent} std: {std_dev}".light_gray
199 else
200 print "{"\t" * indent}{name}: {desc}"
201 print "{"\t" * indent} avg: {avg}"
202 print "{"\t" * indent} max: {max} ({self[max]})"
203 print "{"\t" * indent} min: {min} ({self[min]})"
204 print "{"\t" * indent} std: {std_dev}"
205 end
206 end
207
208 # The sum of all the values.
209 fun sum: VAL is abstract
210
211 # The values standard derivation
212 fun std_dev: Float is abstract
213
214 # The element with the highest value
215 fun max: ELM is abstract
216
217 # The element with the lowest value
218 fun min: ELM is abstract
219
220 # The value threshold above what elements are considered as 'interesting'
221 fun threshold: Float do return avg + std_dev
222
223 # The set of element above the threshold
224 fun above_threshold: Set[ELM] is abstract
225
226 # Sort the metric keys by values
227 fun sort: Array[ELM] do
228 return values.keys_sorted_by_values(default_reverse_comparator)
229 end
230 end
231
232 # A Metric that collects integer data
233 #
234 # Used to count things
235 class IntMetric
236 super Metric
237
238 redef type VAL: Int
239 redef type RES: Counter[ELM]
240
241 # `IntMetric` uses a Counter to store values in intern.
242 protected var values_cache = new Counter[ELM]
243
244 redef fun values do return values_cache
245
246 redef fun clear do values_cache.clear
247
248 redef fun sum do return values_cache.sum
249
250 redef fun max do
251 assert not values_cache.is_empty
252 return values_cache.max.as(not null)
253 end
254
255 redef fun min do
256 assert not values_cache.is_empty
257 return values_cache.min.as(not null)
258 end
259
260 # Values average
261 redef fun avg do return values_cache.avg
262
263 redef fun std_dev do return values_cache.std_dev
264
265 redef fun above_threshold do
266 var above = new HashSet[ELM]
267 var threshold = threshold
268 for element, value in values do
269 if value.to_f > threshold then above.add(element)
270 end
271 return above
272 end
273
274 redef fun to_console(indent, colors) do
275 super
276 if colors then
277 print "{"\t" * indent} sum: {sum}".light_gray
278 else
279 print "{"\t" * indent} sum: {sum}"
280 end
281 end
282 end
283
284 # A Metric that collects float datas
285 #
286 # Used sor summarization
287 class FloatMetric
288 super Metric
289
290 redef type VAL: Float
291
292 # `FloatMetric` uses a Map to store values in intern.
293 protected var values_cache = new HashMap[ELM, VAL]
294
295 redef fun values do return values_cache
296
297 redef fun clear do values_cache.clear
298
299
300 redef fun sum do
301 var sum = 0.0
302 for v in values.values do
303 if v.is_nan then continue
304 sum += v
305 end
306 return sum
307 end
308
309 redef fun max do
310 assert not values.is_empty
311 var max: nullable Float = null
312 var elem: nullable ELM = null
313 for e, v in values do
314 if max == null or v > max then
315 max = v
316 elem = e
317 end
318 end
319 return elem.as(not null)
320 end
321
322 redef fun min do
323 assert not values.is_empty
324 var min: nullable Float = null
325 var elem: nullable ELM = null
326 for e, v in values do
327 if min == null or v < min then
328 min = v
329 elem = e
330 end
331 end
332 return elem.as(not null)
333 end
334
335 redef fun avg do
336 if values.is_empty then return 0.0
337 return sum / values.length.to_f
338 end
339
340 redef fun std_dev do
341 var sum = 0.0
342 for value in values.values do
343 if value.is_nan then continue
344 sum += (value - avg).pow(2.to_f)
345 end
346 return (sum / values.length.to_f).sqrt
347 end
348
349 redef fun above_threshold do
350 var above = new HashSet[ELM]
351 var threshold = threshold
352 for element, value in values do
353 if value > threshold then above.add(element)
354 end
355 return above
356 end
357
358 redef fun to_console(indent, colors) do
359 super
360 if colors then
361 print "{"\t" * indent} sum: {sum}".light_gray
362 else
363 print "{"\t" * indent} sum: {sum}"
364 end
365 end
366 end
367
368 # A MetricSet is a metric holder
369 #
370 # It purpose is to be extended with a metric collect service
371 class MetricSet
372
373 # Type of element measured by this `MetricSet`.
374 type ELM: Object
375
376 # Metrics to compute
377 var metrics: Set[Metric] = new HashSet[Metric]
378
379 # Add a metric to the set
380 fun register(metrics: Metric...) do for metric in metrics do self.metrics.add(metric)
381
382 # Clear all results for all metrics
383 fun clear do for metric in metrics do metric.clear
384
385 # Collect all metrics for this set of class
386 fun collect(elements: Set[ELM]) do
387 for metric in metrics do metric.collect(elements)
388 end
389
390 # Pretty print the resuls in console
391 fun to_console(indent: Int, colors: Bool) do
392 for metric in metrics do metric.to_console(indent, colors)
393 end
394
395 # Export the metric set in CSV format
396 fun to_csv: CsvDocument do
397 var csv = new CsvDocument
398
399 csv.format = new CsvFormat('"', ';', "\n")
400
401 # set csv headers
402 csv.header.add("entry")
403 for metric in metrics do csv.header.add(metric.name)
404
405 # collect all entries to merge metric results
406 var entries = new HashSet[ELM]
407 for metric in metrics do
408 for entry in metric.values.keys do entries.add(entry)
409 end
410
411 # collect results
412 for entry in entries do
413 var line = [entry.to_s]
414 for metric in metrics do
415 if metric.has_element(entry) then
416 line.add(metric[entry].to_s)
417 else
418 line.add("n/a")
419 end
420 end
421 csv.records.add(line)
422 end
423 return csv
424 end
425 end