Inject behavior analysis to hash-collections (HashMap, HashSet, etc.)

Accesses to hash collections are instrumented, and statistics are automatically displayed at the end of the program.

This module helps to detect, and track bad behavior on hash-collections, especially collisions.

Simple usage:

  1. compile your program with -m hash_debug
  2. execute your program.

Advanced usage:

import this module and use the functions Sys::show_hash_stats and Sys::clear_hash_stats at strategic points.

You can also use some dynamic call-graph tools (like valgrind) and look at callers of HashCollection::gt_collide and HashCollection::st_collide.

Redefined classes

redef class Sys

hash_debug :: hash_debug $ Sys

The main class of the program.

All class definitions

redef class Sys

hash_debug :: hash_debug $ Sys

The main class of the program.
package_diagram hash_debug::hash_debug hash_debug core core hash_debug::hash_debug->core a_star-m a_star-m a_star-m->hash_debug::hash_debug

Ancestors

module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module array

core :: array

This module introduces the standard array structure.
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module list

core :: list

This module handle double linked lists
module math

core :: math

Mathematical operations
module native

core :: native

Native structures for text and bytes
module numeric

core :: numeric

Advanced services for Numeric types
module protocol

core :: protocol

module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stream

core :: stream

Input and output streams of characters
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O

Parents

module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.

Children

module a_star-m

a_star-m

# Inject behavior analysis to hash-collections (HashMap, HashSet, etc.)
# Accesses to hash collections are instrumented, and statistics are
# automatically displayed at the end of the program.
#
# This module helps to detect, and track bad behavior on hash-collections,
# especially collisions.
#
# Simple usage:
#
# 1. compile your program with `-m hash_debug`
# 2. execute your program.
#
# Advanced usage:
#
# import this module and use the functions `Sys::show_hash_stats` and
# `Sys::clear_hash_stats` at strategic points.
#
# You can also use some dynamic call-graph tools (like valgrind) and look
# at callers of `HashCollection::gt_collide` and `HashCollection::st_collide`.
module hash_debug

intrude import core::collection::hash_collection
import core

redef class Sys
	# Number of calls of `HashCollection::node_at_idx`
	var gt_count = 0
	# Number of calls of `HashCollection::node_at_idx` that collided
	var gt_coll = 0
	# Total length of collisions on calls of `HashCollection::node_at_idx` that collided
	var gt_tot_coll = 0
	# Total length of hash collections receiver of `HashCollection::node_at_idx`
	var gt_tot_length = 0
	# Total capacity of hash collections receiver `HashCollection::node_at_idx`
	var gt_tot_cap = 0

	# Number of calls of `HashCollection::store`
	var st_count = 0
	# Number of calls of `HashCollection::store` that collided
	var st_coll = 0
	# Total length of collisions on calls of `HashCollection::store` that collided
	var st_tot_coll = 0
	# Total length of hash collections receiver of `HashCollection::store`
	var st_tot_length = 0
	# Total capacity of hash collections receiver `HashCollection::store`
	var st_tot_cap = 0

	# Number of calls of `HashCollection::enlarge`
	var en_count = 0
	# Total length of hash collections receiver of `HashCollection::enlarge`
	var en_tot_length = 0
	# Total capacity of hash collections receiver `HashCollection::enlarge`
	var en_tot_cap = 0

	private fun div(n,d: Int): String
	do
		if d == 0 then return "NA"
		return (n.to_f / d.to_f).to_precision(2)
	end

	# Activate the automatic call of `show_hash_stats` at the end of programs
	# If false, you can still manually invoke `show_hash_stats`.
	#
	# Default: true
	var auto_hash_display = true is writable

	# Force the display of the current hash statistics
	fun show_hash_stats
	do
		if gt_count == 0 and st_count == 0 then
			print "~~~No hash statistics~~~"
			return
		end

		print """
~~~Hash statistics~~~
GET:
number of get and has_key: {{{gt_count}}}
number of collisions: {{{gt_coll}}} ({{{div(gt_coll*100,gt_count)}}}%)
average length of collisions: {{{div(gt_tot_coll,gt_coll)}}}
average length of considered collections: {{{div(gt_tot_length,sys.gt_count)}}}
average capacity of considered collections: {{{div(gt_tot_cap,sys.gt_count)}}} ({{{div(gt_tot_cap*100,gt_tot_length)}}}%)

STORE:
number of stores: {{{st_count}}}
number of collisions: {{{st_coll}}} ({{{div(st_coll*100,st_count)}}}%)
average length of collisions: {{{div(st_tot_coll,st_coll)}}}
average length of considered collections: {{{div(st_tot_length,sys.st_count)}}}
average capacity or considered collections: {{{div(st_tot_cap,sys.st_count)}}} ({{{div(st_tot_cap*100,st_tot_length)}}}%)

ENLARGE:
number of enlarge: {{{en_count}}}
average length of considered collections: {{{div(en_tot_length,sys.en_count)}}}
average capacity or considered collections: {{{div(en_tot_cap,sys.en_count)}}} ({{{div(en_tot_cap*100,en_tot_length)}}}%)
~~~~~~"""
	end

	# Reset the hash statistics to 0
	fun clear_hash_stats
	do
		gt_count = 0
		gt_coll = 0
		gt_tot_coll = 0
		gt_tot_length = 0
		gt_tot_cap = 0

		st_count = 0
		st_coll = 0
		st_tot_coll = 0
		st_tot_length = 0
		st_tot_cap = 0
	end

	redef fun run do
		super
		if auto_hash_display then show_hash_stats
	end
end

redef fun exit(i)
do
	if sys.auto_hash_display then sys.show_hash_stats
	super
end

redef class HashCollection[K]
	redef fun node_at_idx(i,k)
	do
		sys.gt_count += 1
		sys.gt_tot_length += _the_length
		sys.gt_tot_cap += _capacity
		var c = _array[i]
		if c != null and c._next_in_bucklet != null then gt_collide(i,k)

		return super
	end

	redef fun enlarge(c)
	do
		super
		sys.en_count += 1
		sys.en_tot_length += _the_length
		sys.en_tot_cap += _capacity
	end

	# Count and update length of collisions for `node_at_idx`
	# Note for dynamic call-graph analysis: callers of this functions are
	# responsible of collisions.
	fun gt_collide(i: Int, k: K)
	do
		var c = _array[i]
		sys.gt_coll += 1
		while c != null do
			sys.gt_tot_coll += 1
			c = c._next_in_bucklet
		end
	end

	redef fun store(i, n)
	do
		sys.st_count += 1
		if _array[i] != null then st_collide(i,n)
		sys.st_tot_length += _the_length
		sys.st_tot_cap += _capacity

		super
	end

	# Count and update length of collisions for `store`
	# Note for dynamic call-graph analysis: callers of this functions are
	# responsible of collisions.
	fun st_collide(i: Int, n: N)
	do
		var c = _array[i]
		sys.st_coll += 1
		sys.st_tot_coll += 1
		while c != null do
			sys.st_tot_coll += 1
			c = c._next_in_bucklet
		end
	end
end
lib/hash_debug/hash_debug.nit:11,1--192,3