Simple numerical statistical analysis and presentation

Introduced classes

class Counter[E: nullable Object]

counter :: Counter

A counter counts occurrences of things
private class CounterComparator[E: nullable Object]

counter :: CounterComparator

Redefined classes

redef interface Collection[E: nullable Object]

counter :: counter $ Collection

The root of the collection hierarchy.
redef class POSet[E: nullable Object]

counter :: counter $ POSet

Pre-order set graph.
redef class Sys

counter :: counter $ Sys

The main class of the program.

All class definitions

redef interface Collection[E: nullable Object]

counter :: counter $ Collection

The root of the collection hierarchy.
class Counter[E: nullable Object]

counter $ Counter

A counter counts occurrences of things
private class CounterComparator[E: nullable Object]

counter $ CounterComparator

redef class POSet[E: nullable Object]

counter :: counter $ POSet

Pre-order set graph.
redef class Sys

counter :: counter $ Sys

The main class of the program.
package_diagram counter::counter counter poset poset counter::counter->poset serialization serialization poset->serialization ...serialization ... ...serialization->serialization array_debug::array_debug array_debug array_debug::array_debug->counter::counter bucketed_game::bucketed_game bucketed_game bucketed_game::bucketed_game->counter::counter vsm::vsm vsm vsm::vsm->counter::counter text_stat::text_stat text_stat text_stat::text_stat->counter::counter nitc::abstract_compiler abstract_compiler nitc::abstract_compiler->counter::counter nitc::catalog catalog nitc::catalog->counter::counter nitc::metrics_base metrics_base nitc::metrics_base->counter::counter nitc::nitsmell_toolcontext nitsmell_toolcontext nitc::nitsmell_toolcontext->counter::counter nitc::test_model_visitor test_model_visitor nitc::test_model_visitor->counter::counter a_star-m a_star-m a_star-m->array_debug::array_debug a_star-m->bucketed_game::bucketed_game a_star-m->text_stat::text_stat a_star-m->nitc::test_model_visitor a_star-m... ... a_star-m...->a_star-m nlp::nlp nlp nlp::nlp->vsm::vsm vsm::example_vsm example_vsm vsm::example_vsm->vsm::vsm nlp::nlp... ... nlp::nlp...->nlp::nlp vsm::example_vsm... ... vsm::example_vsm...->vsm::example_vsm nitc::separate_compiler separate_compiler nitc::separate_compiler->nitc::abstract_compiler nitc::global_compiler global_compiler nitc::global_compiler->nitc::abstract_compiler nitc::light light nitc::light->nitc::abstract_compiler nitc::memory_logger memory_logger nitc::memory_logger->nitc::abstract_compiler nitc::compiler_serialization compiler_serialization nitc::compiler_serialization->nitc::abstract_compiler nitc::android android nitc::android->nitc::abstract_compiler nitc::emscripten emscripten nitc::emscripten->nitc::abstract_compiler nitc::xcode_templates xcode_templates nitc::xcode_templates->nitc::abstract_compiler nitc::separate_compiler... ... nitc::separate_compiler...->nitc::separate_compiler nitc::global_compiler... ... nitc::global_compiler...->nitc::global_compiler nitc::light... ... nitc::light...->nitc::light nitc::memory_logger... ... nitc::memory_logger...->nitc::memory_logger nitc::compiler_serialization... ... nitc::compiler_serialization...->nitc::compiler_serialization nitc::android... ... nitc::android...->nitc::android nitc::emscripten... ... nitc::emscripten...->nitc::emscripten nitc::xcode_templates... ... nitc::xcode_templates...->nitc::xcode_templates nitc::html_model html_model nitc::html_model->nitc::catalog nitc::commands_catalog commands_catalog nitc::commands_catalog->nitc::catalog nitc::html_model... ... nitc::html_model...->nitc::html_model nitc::commands_catalog... ... nitc::commands_catalog...->nitc::commands_catalog nitc::metrics_base... ... nitc::metrics_base...->nitc::metrics_base nitc::method_analyze_metrics method_analyze_metrics nitc::method_analyze_metrics->nitc::nitsmell_toolcontext nitc::method_analyze_metrics... ... nitc::method_analyze_metrics...->nitc::method_analyze_metrics

Ancestors

module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module array

core :: array

This module introduces the standard array structure.
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module list

core :: list

This module handle double linked lists
module math

core :: math

Mathematical operations
module meta

meta :: meta

Simple user-defined meta-level to manipulate types of instances as object.
module native

core :: native

Native structures for text and bytes
module numeric

core :: numeric

Advanced services for Numeric types
module protocol

core :: protocol

module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module serialization_core

serialization :: serialization_core

Abstract services to serialize Nit objects to different formats
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stream

core :: stream

Input and output streams of characters
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O

Parents

module poset

poset :: poset

Pre order sets and partial order set (ie hierarchies)

Children

module abstract_compiler

nitc :: abstract_compiler

Abstract compiler
module array_debug

array_debug :: array_debug

Exposes functions to help profile or debug Arrays.
module bucketed_game

bucketed_game :: bucketed_game

Game framework with an emphasis on efficient event coordination
module catalog

nitc :: catalog

Basic catalog generator for Nit packages
module metrics_base

nitc :: metrics_base

Helpers for various statistics tools.
module test_model_visitor

nitc :: test_model_visitor

Example of model_visitor
module text_stat

text_stat :: text_stat

Injects stat-calculating functionalities to Text and its variants
module vsm

vsm :: vsm

Vector Space Model

Descendants

module a_star-m

a_star-m

module android

nitc :: android

Compile program for the Android platform
module api

nitc :: api

Components required to build a web server about the nit model.
module api_auth

nitc :: api_auth

module api_base

nitc :: api_base

Base classes used by nitweb.
module api_docdown

nitc :: api_docdown

Nitdoc specific Markdown format handling for Nitweb
module api_feedback

nitc :: api_feedback

Feedback related features
module api_light

nitc :: api_light

Highlight and collect messages from a piece of code
module api_model

nitc :: api_model

module ast_metrics

nitc :: ast_metrics

Metrics about the nodes and identifiers in the AST
module commands_catalog

nitc :: commands_catalog

Commands to retrieve Catalog related data
module commands_docdown

nitc :: commands_docdown

Doc down related queries
module commands_http

nitc :: commands_http

Initialize commands from HTTP requests
module commands_parser

nitc :: commands_parser

A parser that create DocCommand from a string
module compiler

nitc :: compiler

Compilation to C
module compiler_ffi

nitc :: compiler_ffi

Full FFI support for the compiler
module compiler_serialization

nitc :: compiler_serialization

Serialization support for the compiler
module detect_covariance

nitc :: detect_covariance

Detect the static usage of covariance in the code.
module detect_variance_constraints

nitc :: detect_variance_constraints

Collect metrics about detected variances constraints on formal types.
module emscripten

nitc :: emscripten

Compile to JavaScript using the Emscripten SDK
module example_vsm

vsm :: example_vsm

Example using a FileIndex
module generate_hierarchies

nitc :: generate_hierarchies

Create dot files for various hierarchies of a model.
module global_compiler

nitc :: global_compiler

Global compilation of a Nit program
module html_commands

nitc :: html_commands

Render commands results as HTML
module html_model

nitc :: html_model

Translate mentities to html blocks.
module inheritance_metrics

nitc :: inheritance_metrics

Collect metrics about inheritance usage
module ios

nitc :: ios

Compile programs for the iOS platform
module json_commands

nitc :: json_commands

Translate command results to json
module json_model

nitc :: json_model

Make model entities Serializable.
module light

nitc :: light

Light FFI support for the compiler
module light_only

nitc :: light_only

Compiler support for the light FFI only, detects unsupported usage of callbacks
module mclasses_metrics

nitc :: mclasses_metrics

Collect common metrics about mclasses
module md_commands

nitc :: md_commands

Render commands results as Markdown
module memory_logger

nitc :: memory_logger

Extension to inject memory-tracing instrumentation in code generated by nitc.
module mendel_metrics

nitc :: mendel_metrics

The Mendel model helps to understand class hierarchies.
module metrics

nitc :: metrics

Various statistics about Nit models and programs
module mmodules_metrics

nitc :: mmodules_metrics

Collect common metrics about modules
module model_hyperdoc

nitc :: model_hyperdoc

Dump of Nit model into hypertext human-readable format.
module nitc

nitc :: nitc

A Nit compiler
module nitcatalog

nitc :: nitcatalog

Basic catalog generator for Nit packages
module nitdoc

nitc :: nitdoc

Generator of static API documentation for the Nit language
module nith

nitc :: nith

A ligHt Nit compiler
module nitmetrics

nitc :: nitmetrics

A program that collects various metrics on nit programs and libraries
module nitsmells

nitc :: nitsmells

module nitweb

nitc :: nitweb

Runs a webserver based on nitcorn that render things from model.
module nitx

nitc :: nitx

nitx, a command tool that displays useful data about Nit code
module nlp

nlp :: nlp

Natural Language Processor based on the StanfordNLP core.
module nlp_index

nlp :: nlp_index

Example showing how to use a NLPFileIndex.
module nullables_metrics

nitc :: nullables_metrics

Statistics about the usage of nullables
module poset_metrics

nitc :: poset_metrics

Metrics about the various posets of the model of a Nit program
module readme_metrics

nitc :: readme_metrics

Collect common metrics about README files
module refinement_metrics

nitc :: refinement_metrics

Collect metrics about refinement usage
module rta_metrics

nitc :: rta_metrics

Metrics from RTA
module self_metrics

nitc :: self_metrics

Metrics about the usage of explicit and implicit self
module separate_compiler

nitc :: separate_compiler

Separate compilation of a Nit program
module separate_erasure_compiler

nitc :: separate_erasure_compiler

Separate compilation of a Nit program with generic type erasure
module static

nitc :: static

Nitdoc generation framework
module static_base

nitc :: static_base

Base entities shared by all the nitdoc code
module static_cards

nitc :: static_cards

Cards templates for the static documentation
module static_html

nitc :: static_html

Render documentation pages as HTML
module static_index

nitc :: static_index

Manage indexing of Nit model for Nitdoc QuickSearch.
module static_structure

nitc :: static_structure

Composes the pages of the static documentation
module static_types_metrics

nitc :: static_types_metrics

Metrics on the usage of explicit static types.
module tables_metrics

nitc :: tables_metrics

Metrics on table generation
module term

nitc :: term

module xcode_templates

nitc :: xcode_templates

Templates and other services to create XCode projects
# Simple numerical statistical analysis and presentation
module counter

import poset

# A counter counts occurrences of things
# Use this instead of a `HashMap[E, Int]`
#
# ~~~
# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
# assert c["a"]   == 2
# assert c["b"]   == 3
# assert c["c"]   == 1
# assert c["d"]   == 0
# ~~~
#
# The counter class can also be used to gather statistical informations.
#
# ~~~~
# assert c.length == 3   # because 3 distinct values
# assert c.max    == "b" # because "b" has the most count (3)
# assert c.avg    == 2.0 # because it is the mean of the counts
# ~~~~
class Counter[E]
	super Map[E, Int]

	# Total number of counted occurrences
	#
	# ~~~
	# var c = new Counter[String]
	# assert c.sum == 0
	# c.inc_all(["a", "a", "b", "b", "b", "c"])
	# assert c.sum == 6
	# ~~~
	var sum: Int = 0

	private var map = new HashMap[E, Int]

	redef fun iterator do return map.iterator

	# The number of counted occurrences of `e`
	redef fun [](e)
	do
		var map = self.map
		if map.has_key(e) then return map[e]
		return 0
	end

	redef fun []=(e, value)
	do
		sum -= self[e]
		self.map[e] = value
		sum += value
	end

	redef fun keys do return map.keys

	redef fun values do return map.values

	redef fun length do return map.length

	redef fun is_empty do return map.is_empty

	redef fun clear do
		sum = 0
		map.clear
	end

	redef fun add_all(other) do
		for k, v in other do
			self[k] += v
		end
	end

	# Count one more occurrence of `e`
	fun inc(e: E)
	do
		self.map[e] = self[e] + 1
		sum += 1
	end

	# Count one more for each element of `es`
	fun inc_all(es: Collection[E])
	do
		for e in es do inc(e)
	end

	# Decrement the value of `e` by 1
	fun dec(e: E) do
		if not has_key(e) then
			self.map[e] = 0
		else
			self.map[e] = self[e] - 1
			sum += - 1
		end
	end

	# Decrement the value for each element of `es`
	fun dec_all(es: Collection[E])
	do
		for e in es do dec(e)
	end

	# A new Counter initialized with `inc_all`.
	init from(es: Collection[E])
	do
		inc_all(es)
	end

	# Return an array of elements sorted by occurrences
	#
	# ~~~
	# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
	# assert c.sort == ["c", "a", "b"]
	# ~~~
	fun sort: Array[E]
	do
		var res = map.keys.to_a
		var sorter = new CounterComparator[E](self)
		sorter.sort(res)
		return res
	end

	# The method used to display an element
	# @toimplement by default just call `to_s` on the element
	protected fun element_to_s(e: E): String
	do
		if e == null then return "null"
		return e.to_s
	end

	# Display statistical information
	fun print_summary
	do
		var list = self.sort
		print " population: {list.length}"
		if list.is_empty then return
		print " minimum value: {self[list.first]}"
		print " maximum value: {self[list.last]}"
		print " total value: {self.sum}"
		print " average value: {div(self.sum,list.length)}"
		print " distribution:"
		var count = 0
		var sum = 0
		var limit = self[list.first]
		for t in list do
			if self[t] > limit then
				print "  <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
				count = 0
				sum = 0
				while self[t] > limit do
					limit = limit * 2
					if limit == 0 then limit = 1
				end
			end
			count += 1
			sum += self[t]
		end
		print "  <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
	end

	# Display up to `count` most used elements and `count` least used elements
	# Use `element_to_s` to display the element
	fun print_elements(count: Int)
	do
		print " list:"
		var list = self.sort
		var min = count
		if list.length <= count*2 then min = list.length
		for i in [0..min[ do
			var t = list[list.length-i-1]
			print "  {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
		end
		if list.length <= count*2 then return
		print "  ..."
		for i in [0..min[ do
			var t = list[min-i-1]
			print "  {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
		end
	end

	# Return the element with the highest value (aka. the mode)
	#
	# ~~~
	# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
	# assert c.max == "b"
	# ~~~
	#
	# If more than one max exists, the first one is returned.
	fun max: nullable E do
		var max: nullable Int = null
		var elem: nullable E = null
		for e in map.keys do
			var v = map[e]
			if max == null or v > max then
				max = v
				elem = e
			end
		end
		return elem
	end

	# Return the couple with the lowest value
	#
	# ~~~
	# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
	# assert c.min == "c"
	# ~~~
	#
	# If more than one min exists, the first one is returned.
	fun min: nullable E do
		var min: nullable Int = null
		var elem: nullable E = null
		for e in map.keys do
			var v = map[e]
			if min == null or v < min then
				min = v
				elem = e
			end
		end
		return elem
	end

	# Values average (aka. arithmetic mean)
	#
	# ~~~
	# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
	# assert c.avg == 2.0
	# ~~~
	fun avg: Float do
		if values.is_empty then return 0.0
		return (sum / values.length).to_f
	end

	# The standard derivation of the counter values
	#
	# ~~~
	# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
	# assert c.std_dev > 0.81
	# assert c.std_dev < 0.82
	# ~~~
	fun std_dev: Float do
		var avg = self.avg
		var sum = 0.0
		for value in map.values do
			sum += (value.to_f - avg).pow(2.0)
		end
		return (sum / map.length.to_f).sqrt
	end

	# The information entropy (Shannon entropy) of the elements in the counter (in bits).
	fun entropy: Float
	do
		var res = 0.0
		var sum = self.sum.to_f
		for k, v in self do
			var f = v.to_f / sum
			res = res - f * f.log_base(2.0)
		end
		return res
	end

	# Prints the content of the counter along with statistics
	#
	# Content is printed in order (if available) from lowest to highest on the keys.
	# Else, it is printed as-is
	fun print_content do
		var a = keys.to_a
		if a isa Array[Comparable] then default_comparator.sort(a)
		var subtotal = 0
		for i in a do
			subtotal += self[i]
			printn("* ", i or else "null", " = ", self[i], " => occurences ", self[i].to_f / sum.to_f * 100.0, "%, cumulative ", subtotal.to_f / sum.to_f * 100.0, "% \n")
		end
	end

	# Packs elements into separate arrays based on their occurences
	#
	# ~~~nit
	#	var x = "aaaabbbeeecccddhhgjt"
	#	var c = new Counter[Char]
	#	for i in x do c.inc i
	#	var ret = c.pack
	#	assert ret.join(", ") == """[t,g,j], [d,h], [c,b,e], [a]"""
	# ~~~
	fun pack: Array[Array[E]] do
		var ret = new Array[Array[E]]
		var base = self.sort
		if base.is_empty then return ret
		var curr = new Array[E]
		curr.push base.first
		var curr_score = self[base.first]
		base.shift
		for i in base do
			if self[i] == curr_score then
				curr.push i
				continue
			end
			curr_score = self[i]
			ret.push curr
			curr = new Array[E]
			curr.push i
		end
		if not curr.is_empty then ret.push curr
		return ret
	end
end

redef class Collection[E]
	# Create and fill up a counter with the elements of `self.
	#
	# ~~~
	# var cpt = "abaa".chars.to_counter
	# assert cpt['a'] == 3
	# assert cpt['b'] == 1
	# assert cpt.length == 2
	# assert cpt.sum == 4
	# ~~~
	fun to_counter: Counter[E]
	do
		var res = new Counter[E]
		res.inc_all(self)
		return res
	end
end

private class CounterComparator[E]
	super Comparator
	redef type COMPARED: E
	var counter: Counter[E]
	redef fun compare(a,b) do return self.counter.map[a] <=> self.counter.map[b]
end

redef class POSet[E]
	private fun show_counter(c: Counter[Int])
	do
		var list = c.sort
		default_comparator.sort(list)
		for e in list do
			print " {e} -> {c[e]} times ({div(c[e]*100, c.sum)}%)"
		end
	end

	# Display exhaustive metrics about the poset
	fun print_metrics
	do
		var nb_greaters = new Counter[E]
		var nb_direct_greaters = new Counter[E]
		var nb_smallers = new Counter[E]
		var nb_direct_smallers = new Counter[E]
		var nb_direct_edges = 0
		var nb_edges = 0
		for n in self do
			var ne = self[n]
			nb_edges += ne.greaters.length
			nb_direct_edges += ne.direct_greaters.length
			nb_greaters[n] = ne.greaters.length
			nb_direct_greaters[n] = ne.direct_greaters.length
			nb_smallers[n] = ne.smallers.length
			nb_direct_smallers[n] = ne.direct_smallers.length
		end
		print "Number of nodes: {self.length}"
		print "Number of edges: {nb_edges} ({div(nb_edges,self.length)} per node)"
		print "Number of direct edges: {nb_direct_edges} ({div(nb_direct_edges,self.length)} per node)"
		print "Distribution of greaters"
		nb_greaters.print_summary
		print "Distribution of direct greaters"
		nb_direct_greaters.print_summary
		print "Distribution of smallers"
		nb_smallers.print_summary
		print "Distribution of direct smallers"
		nb_direct_smallers.print_summary
	end
end

# Helper function to display `n/d` and handle division by 0
fun div(n: Int, d: Int): String
do
	if d == 0 then return "na"
	return ((100*n/d).to_f/100.0).to_precision(2)
end
lib/counter/counter.nit:15,1--395,3