lib: introduce module hash_debug
authorJean Privat <jean@pryen.org>
Wed, 13 Aug 2014 03:17:00 +0000 (23:17 -0400)
committerJean Privat <jean@pryen.org>
Wed, 13 Aug 2014 13:43:23 +0000 (09:43 -0400)
Signed-off-by: Jean Privat <jean@pryen.org>

lib/hash_debug.nit [new file with mode: 0644]
tests/sav/hash_debug.res [new file with mode: 0644]
tests/sav/test_hash_debug.res [new file with mode: 0644]
tests/test_hash_debug.nit [new file with mode: 0644]

diff --git a/lib/hash_debug.nit b/lib/hash_debug.nit
new file mode 100644 (file)
index 0000000..d25c671
--- /dev/null
@@ -0,0 +1,172 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# This file is free software, which comes along with NIT.  This software is
+# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+# without  even  the implied warranty of  MERCHANTABILITY or  FITNESS FOR A
+# PARTICULAR PURPOSE.  You can modify it is you want,  provided this header
+# is kept unaltered, and a notification of the changes is added.
+# You  are  allowed  to  redistribute it and sell it, alone or is a part of
+# another product.
+
+# Inject behavior analysis to hash-collections (HashMap, HashSet, etc.)
+# Accesses to hash collections are instrumented, and statistics are
+# automatically displayed at the end of the program.
+#
+# This module helps to detect, and track bad behavior on hash-collections,
+# especially collisions.
+#
+# Simple usage:
+#
+# 1. compile your program with `-m hash_debug`
+# 2. execute your program.
+#
+# Advanced usage:
+#
+# import this module and use the functions `Sys::show_hash_stats` and
+# `Sys::clear_hash_stats` at strategic points.
+#
+# You can also use some dynamic call-graph tools (like valgrind) and look
+# at callers of `HashCollection::gt_collide` and `HashCollection::st_collide`.
+module hash_debug
+
+intrude import standard::collection::hash_collection
+import standard
+
+redef class Sys
+       # Number of calls of `HashCollection::node_at_idx`
+       var gt_count = 0
+       # Number of calls of `HashCollection::node_at_idx` that collided
+       var gt_coll = 0
+       # Total length of collisions on calls of `HashCollection::node_at_idx` that collided
+       var gt_tot_coll = 0
+       # Total length of hash collections receiver of `HashCollection::node_at_idx`
+       var gt_tot_length = 0
+       # Total capacity of hash collections receiver `HashCollection::node_at_idx`
+       var gt_tot_cap = 0
+
+       # Number of calls of `HashCollection::store`
+       var st_count = 0
+       # Number of calls of `HashCollection::store` that collided
+       var st_coll = 0
+       # Total length of collisions on calls of `HashCollection::store` that collided
+       var st_tot_coll = 0
+       # Total length of hash collections receiver of `HashCollection::store`
+       var st_tot_length = 0
+       # Total capacity of hash collections receiver `HashCollection::store`
+       var st_tot_cap = 0
+
+       private fun div(n,d: Int): String
+       do
+               if d == 0 then return "NA"
+               return (n.to_f / d.to_f).to_precision(2)
+       end
+
+       # Activate the automatic call of `show_hash_stats` at the end of programs
+       # If false, you can still manually invoke `show_hash_stats`.
+       #
+       # Default: true
+       var auto_hash_display = true is writable
+
+       # Force the display of the current hash statistics
+       fun show_hash_stats
+       do
+               if gt_count == 0 and st_count == 0 then
+                       print "~~~No hash statistics~~~"
+                       return
+               end
+
+               print """
+~~~Hash statistics~~~
+GET:
+number of get and has_key: {{{gt_count}}}
+number of collisions: {{{gt_coll}}} ({{{div(gt_coll*100,gt_count)}}}%)
+average length of collisions: {{{div(gt_tot_coll,gt_coll)}}}
+average length of considered collections: {{{div(gt_tot_length,sys.gt_count)}}}
+average capacity of considered collections: {{{div(gt_tot_cap,sys.gt_count)}}} ({{{div(gt_tot_cap*100,gt_tot_length)}}}%)
+
+STORE:
+number of stores: {{{st_count}}}
+number of collisions: {{{st_coll}}} ({{{div(st_coll*100,st_count)}}}%)
+average length of collisions: {{{div(st_tot_coll,st_coll)}}}
+average length of considered collections: {{{div(st_tot_length,sys.st_count)}}}
+average capacity or considered collections: {{{div(st_tot_cap,sys.st_count)}}} ({{{div(st_tot_cap*100,st_tot_length)}}}%)
+~~~~~~"""
+       end
+
+       # Reset the hash statistics to 0
+       fun clear_hash_stats
+       do
+               gt_count = 0
+               gt_coll = 0
+               gt_tot_coll = 0
+               gt_tot_length = 0
+               gt_tot_cap = 0
+
+               st_count = 0
+               st_coll = 0
+               st_tot_coll = 0
+               st_tot_length = 0
+               st_tot_cap = 0
+       end
+
+       redef fun run do
+               super
+               if auto_hash_display then show_hash_stats
+       end
+end
+
+redef fun exit(i)
+do
+       if sys.auto_hash_display then sys.show_hash_stats
+       super
+end
+
+redef class HashCollection[K,N]
+       redef fun node_at_idx(i,k)
+       do
+               sys.gt_count += 1
+               sys.gt_tot_length += _length
+               sys.gt_tot_cap += _capacity
+               var c = _array[i]
+               if c != null and c._next_in_bucklet != null then gt_collide(i,k)
+
+               return super
+       end
+
+       # Count and update length of collisions for `node_at_idx`
+       # Note for dynamic call-graph analysis: callers of this functions are
+       # responsible of collisions.
+       private fun gt_collide(i: Int, k: K)
+       do
+               var c = _array[i]
+               sys.gt_coll += 1
+               while c != null do
+                       sys.gt_tot_coll += 1
+                       c = c._next_in_bucklet
+               end
+       end
+
+       redef fun store(i, n)
+       do
+               sys.st_count += 1
+               if _array[i] != null then st_collide(i,n)
+               sys.st_tot_length += _length
+               sys.st_tot_cap += _capacity
+
+               super
+       end
+
+       # Count and update length of collisions for `store`
+       # Note for dynamic call-graph analysis: callers of this functions are
+       # responsible of collisions.
+       private fun st_collide(i: Int, n: N)
+       do
+               var c = _array[i]
+               sys.st_coll += 1
+               sys.st_tot_coll += 1
+               while c != null do
+                       sys.st_tot_coll += 1
+                       c = c._next_in_bucklet
+               end
+       end
+end
diff --git a/tests/sav/hash_debug.res b/tests/sav/hash_debug.res
new file mode 100644 (file)
index 0000000..5946805
--- /dev/null
@@ -0,0 +1 @@
+~~~No hash statistics~~~
diff --git a/tests/sav/test_hash_debug.res b/tests/sav/test_hash_debug.res
new file mode 100644 (file)
index 0000000..9748c49
--- /dev/null
@@ -0,0 +1,147 @@
+~~~No hash statistics~~~
+~~~No hash statistics~~~
+
+a1
+false
+~~~Hash statistics~~~
+GET:
+number of get and has_key: 1
+number of collisions: 0 (0.0%)
+average length of collisions: NA
+average length of considered collections: 0.0
+average capacity of considered collections: 1.00 (NA%)
+
+STORE:
+number of stores: 0
+number of collisions: 0 (NA%)
+average length of collisions: NA
+average length of considered collections: NA
+average capacity or considered collections: NA (NA%)
+~~~~~~
+~~~Hash statistics~~~
+GET:
+number of get and has_key: 2
+number of collisions: 0 (0.0%)
+average length of collisions: NA
+average length of considered collections: 0.0
+average capacity of considered collections: 1.00 (NA%)
+
+STORE:
+number of stores: 1
+number of collisions: 0 (0.0%)
+average length of collisions: NA
+average length of considered collections: 0.0
+average capacity or considered collections: 1.00 (NA%)
+~~~~~~
+~~~Hash statistics~~~
+GET:
+number of get and has_key: 3
+number of collisions: 0 (0.0%)
+average length of collisions: NA
+average length of considered collections: 0.33
+average capacity of considered collections: 6.33 (1900.00%)
+
+STORE:
+number of stores: 1
+number of collisions: 0 (0.0%)
+average length of collisions: NA
+average length of considered collections: 0.0
+average capacity or considered collections: 1.00 (NA%)
+~~~~~~
+true
+~~~Hash statistics~~~
+GET:
+number of get and has_key: 4
+number of collisions: 0 (0.0%)
+average length of collisions: NA
+average length of considered collections: 0.50
+average capacity of considered collections: 9.00 (1800.00%)
+
+STORE:
+number of stores: 1
+number of collisions: 0 (0.0%)
+average length of collisions: NA
+average length of considered collections: 0.0
+average capacity or considered collections: 1.00 (NA%)
+~~~~~~
+
+a2
+false
+~~~Hash statistics~~~
+GET:
+number of get and has_key: 5
+number of collisions: 0 (0.0%)
+average length of collisions: NA
+average length of considered collections: 0.60
+average capacity of considered collections: 10.60 (1766.67%)
+
+STORE:
+number of stores: 1
+number of collisions: 0 (0.0%)
+average length of collisions: NA
+average length of considered collections: 0.0
+average capacity or considered collections: 1.00 (NA%)
+~~~~~~
+~~~Hash statistics~~~
+GET:
+number of get and has_key: 6
+number of collisions: 0 (0.0%)
+average length of collisions: NA
+average length of considered collections: 0.67
+average capacity of considered collections: 11.67 (1750.00%)
+
+STORE:
+number of stores: 2
+number of collisions: 1 (50.00%)
+average length of collisions: 2.00
+average length of considered collections: 0.50
+average capacity or considered collections: 9.00 (1800.00%)
+~~~~~~
+~~~Hash statistics~~~
+GET:
+number of get and has_key: 7
+number of collisions: 1 (14.29%)
+average length of collisions: 2.00
+average length of considered collections: 0.86
+average capacity of considered collections: 12.43 (1450.00%)
+
+STORE:
+number of stores: 2
+number of collisions: 1 (50.00%)
+average length of collisions: 2.00
+average length of considered collections: 0.50
+average capacity or considered collections: 9.00 (1800.00%)
+~~~~~~
+true
+~~~Hash statistics~~~
+GET:
+number of get and has_key: 7
+number of collisions: 1 (14.29%)
+average length of collisions: 2.00
+average length of considered collections: 0.86
+average capacity of considered collections: 12.43 (1450.00%)
+
+STORE:
+number of stores: 2
+number of collisions: 1 (50.00%)
+average length of collisions: 2.00
+average length of considered collections: 0.50
+average capacity or considered collections: 9.00 (1800.00%)
+~~~~~~
+
+end
+~~~Hash statistics~~~
+GET:
+number of get and has_key: 7
+number of collisions: 1 (14.29%)
+average length of collisions: 2.00
+average length of considered collections: 0.86
+average capacity of considered collections: 12.43 (1450.00%)
+
+STORE:
+number of stores: 2
+number of collisions: 1 (50.00%)
+average length of collisions: 2.00
+average length of considered collections: 0.50
+average capacity or considered collections: 9.00 (1800.00%)
+~~~~~~
diff --git a/tests/test_hash_debug.nit b/tests/test_hash_debug.nit
new file mode 100644 (file)
index 0000000..00858fe
--- /dev/null
@@ -0,0 +1,47 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import hash_debug
+
+class A
+       redef fun hash do return 0
+end
+
+sys.show_hash_stats
+var s = new HashSet[A]
+sys.show_hash_stats
+
+print "\na1"
+var a1 = new A
+print s.has(a1)
+sys.show_hash_stats
+s.add(a1)
+sys.show_hash_stats
+s.add(a1)
+sys.show_hash_stats
+print s.has(a1)
+sys.show_hash_stats
+
+print "\na2"
+var a2 = new A
+print s.has(a2)
+sys.show_hash_stats
+s.add(a2)
+sys.show_hash_stats
+s.add(a2)
+sys.show_hash_stats
+print s.has(a2)
+sys.show_hash_stats
+
+print "\nend"