# This file is part of NIT ( http://www.nitlanguage.org ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Simple numerical statistical analysis and presentation
module counter

import poset

# A counter counts occurrences of things
# Use this instead of a `HashMap[E, Int]`
#
# ~~~
# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
# assert c["a"]   == 2
# assert c["b"]   == 3
# assert c["c"]   == 1
# assert c["d"]   == 0
# ~~~
#
# The counter class can also be used to gather statistical informations.
#
# ~~~~
# assert c.length == 3   # because 3 distinct values
# assert c.max    == "b" # because "b" has the most count (3)
# assert c.avg    == 2.0 # because it is the mean of the counts
# ~~~~
class Counter[E]
	super Map[E, Int]

	# Total number of counted occurrences
	#
	# ~~~
	# var c = new Counter[String]
	# assert c.sum == 0
	# c.inc_all(["a", "a", "b", "b", "b", "c"])
	# assert c.sum == 6
	# ~~~
	var sum: Int = 0

	private var map = new HashMap[E, Int]

	redef fun iterator do return map.iterator

	# The number of counted occurrences of `e`
	redef fun [](e: E): Int
	do
		var map = self.map
		if map.has_key(e) then return map[e]
		return 0
	end

	redef fun []=(e: E, value: Int)
	do
		sum -= self[e]
		self.map[e] = value
		sum += value
	end

	redef fun keys do return map.keys

	redef fun values do return map.values

	redef fun length do return map.length

	redef fun is_empty do return map.is_empty

	redef fun clear do
		sum = 0
		map.clear
	end

	# Count one more occurrence of `e`
	fun inc(e: E)
	do
		self.map[e] = self[e] + 1
		sum += 1
	end

	# Count one more for each element of `es`
	fun inc_all(es: Collection[E])
	do
		for e in es do inc(e)
	end

	# A new Counter initialized with `inc_all`.
	init from(es: Collection[E])
	do
		inc_all(es)
	end

	# Return an array of elements sorted by occurrences
	#
	# ~~~
	# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
	# assert c.sort == ["c", "a", "b"]
	# ~~~
	fun sort: Array[E]
	do
		var res = map.keys.to_a
		var sorter = new CounterComparator[E](self)
		sorter.sort(res)
		return res
	end

	# The method used to display an element
	# @toimplement by default just call `to_s` on the element
	protected fun element_to_s(e: E): String
	do
		return e.to_s
	end

	# Display statistical information
	fun print_summary
	do
		var list = self.sort
		print " population: {list.length}"
		if list.is_empty then return
		print " minimum value: {self[list.first]}"
		print " maximum value: {self[list.last]}"
		print " total value: {self.sum}"
		print " average value: {div(self.sum,list.length)}"
		print " distribution:"
		var count = 0
		var sum = 0
		var limit = self[list.first]
		for t in list do
			if self[t] > limit then
				print "  <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
				count = 0
				sum = 0
				while self[t] > limit do
					limit = limit * 2
					if limit == 0 then limit = 1
				end
			end
			count += 1
			sum += self[t]
		end
		print "  <={limit}: sub-population={count} ({div(count*100,list.length)}%); cumulated value={sum} ({div(sum*100,self.sum)}%)"
	end

	# Display up to `count` most used elements and `count` least used elements
	# Use `element_to_s` to display the element
	fun print_elements(count: Int)
	do
		print " list:"
		var list = self.sort
		var min = count
		if list.length <= count*2 then min = list.length
		for i in [0..min[ do
			var t = list[list.length-i-1]
			print "  {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
		end
		if list.length <= count*2 then return
		print "  ..."
		for i in [0..min[ do
			var t = list[min-i-1]
			print "  {element_to_s(t)}: {self[t]} ({div(self[t]*100,self.sum)}%)"
		end
	end

	# Return the element with the highest value (aka. the mode)
	#
	# ~~~
	# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
	# assert c.max == "b"
	# ~~~
	#
	# If more than one max exists, the first one is returned.
	fun max: nullable E do
		var max: nullable Int = null
		var elem: nullable E = null
		for e in map.keys do
			var v = map[e]
			if max == null or v > max then
				max = v
				elem = e
			end
		end
		return elem
	end

	# Return the couple with the lowest value
	#
	# ~~~
	# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
	# assert c.min == "c"
	# ~~~
	#
	# If more than one min exists, the first one is returned.
	fun min: nullable E do
		var min: nullable Int = null
		var elem: nullable E = null
		for e in map.keys do
			var v = map[e]
			if min == null or v < min then
				min = v
				elem = e
			end
		end
		return elem
	end

	# Values average (aka. arithmetic mean)
	#
	# ~~~
	# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
	# assert c.avg == 2.0
	# ~~~
	fun avg: Float do
		if values.is_empty then return 0.0
		return (sum / values.length).to_f
	end

	# The standard derivation of the counter values
	#
	# ~~~
	# var c = new Counter[String].from(["a", "a", "b", "b", "b", "c"])
	# assert c.std_dev > 0.81
	# assert c.std_dev < 0.82
	# ~~~
	fun std_dev: Float do
		var avg = self.avg
		var sum = 0.0
		for value in map.values do
			sum += (value.to_f - avg).pow(2.0)
		end
		return (sum / map.length.to_f).sqrt
	end
end

private class CounterComparator[E]
	super Comparator
	redef type COMPARED: E
	var counter: Counter[E]
	redef fun compare(a,b) do return self.counter.map[a] <=> self.counter.map[b]
end

redef class POSet[E]
	private fun show_counter(c: Counter[Int])
	do
		var list = c.sort
		default_comparator.sort(list)
		for e in list do
			print " {e} -> {c[e]} times ({div(c[e]*100, c.sum)}%)"
		end
	end

	# Display exhaustive metrics about the poset
	fun print_metrics
	do
		var nb_greaters = new Counter[E]
		var nb_direct_greaters = new Counter[E]
		var nb_smallers = new Counter[E]
		var nb_direct_smallers = new Counter[E]
		var nb_direct_edges = 0
		var nb_edges = 0
		for n in self do
			var ne = self[n]
			nb_edges += ne.greaters.length
			nb_direct_edges += ne.direct_greaters.length
			nb_greaters[n] = ne.greaters.length
			nb_direct_greaters[n] = ne.direct_greaters.length
			nb_smallers[n] = ne.smallers.length
			nb_direct_smallers[n] = ne.direct_smallers.length
		end
		print "Number of nodes: {self.length}"
		print "Number of edges: {nb_edges} ({div(nb_edges,self.length)} per node)"
		print "Number of direct edges: {nb_direct_edges} ({div(nb_direct_edges,self.length)} per node)"
		print "Distribution of greaters"
		nb_greaters.print_summary
		print "Distribution of direct greaters"
		nb_direct_greaters.print_summary
		print "Distribution of smallers"
		nb_smallers.print_summary
		print "Distribution of direct smallers"
		nb_direct_smallers.print_summary
	end
end

# Helper function to display `n/d` and handle division by 0
fun div(n: Int, d: Int): String
do
	if d == 0 then return "na"
	return ((100*n/d).to_f/100.0).to_precision(2)
end