Merge: Optimize hash collection
authorJean Privat <jean@pryen.org>
Tue, 20 Oct 2015 02:32:54 +0000 (22:32 -0400)
committerJean Privat <jean@pryen.org>
Tue, 20 Oct 2015 02:32:54 +0000 (22:32 -0400)
Some optimization when dealing with empty hash-based collection

user time for nitc nitc.nit:

* before: 0m6.640s
* after: 0m6.344s (-4.5%)

Pull-Request: #1768
Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>
Reviewed-by: Alexandre Terrasa <alexandre@moz-code.org>

lib/core/collection/hash_collection.nit
lib/hash_debug.nit
tests/sav/test_hash_debug.res

index 8ee3491..890ecd9 100644 (file)
@@ -24,7 +24,7 @@ end
 private abstract class HashCollection[K]
        type N: HashNode[K]
 
-       var array: nullable NativeArray[nullable N] = null # Used to store items
+       var array: NativeArray[nullable N] is noautoinit # Used to store items
        var capacity: Int = 0 # Size of _array
        var the_length: Int = 0 # Number of items in the map
 
@@ -50,6 +50,7 @@ private abstract class HashCollection[K]
        # Return the node associated with the key
        fun node_at(k: nullable Object): nullable N
        do
+               if _the_length == 0 then return null
                # cache: `is` is used instead of `==` because it is a faster filter (even if not exact)
                if k.is_same_instance(_last_accessed_key) then return _last_accessed_node
 
@@ -62,6 +63,7 @@ private abstract class HashCollection[K]
        # Return the node associated with the key (but with the index already known)
        fun node_at_idx(i: Int, k: nullable Object): nullable N
        do
+               if _the_length == 0 then return null
                var c = _array[i]
                while c != null do
                        var ck = c._key
@@ -111,6 +113,7 @@ private abstract class HashCollection[K]
        # Remove the node assosiated with the key
        fun remove_node(k: nullable Object)
        do
+               if _the_length == 0 then return
                var i = index_at(k)
                var node = node_at_idx(i, k)
                if node == null then return
@@ -162,7 +165,6 @@ private abstract class HashCollection[K]
        # Force a capacity
        fun enlarge(cap: Int)
        do
-               var old_cap = _capacity
                # get a new capacity
                if cap < _the_length + 1 then cap = _the_length + 1
                if cap <= _capacity then return
@@ -173,15 +175,6 @@ private abstract class HashCollection[K]
                var new_array = new NativeArray[nullable N](cap)
                _array = new_array
 
-               # clean the new array
-               var i = cap - 1
-               while i >=0 do
-                       new_array[i] = null
-                       i -= 1
-               end
-
-               if _capacity <= old_cap then return
-
                # Reput items in the array
                var node = _first_item
                while node != null do
@@ -253,6 +246,7 @@ class HashMap[K, V]
 
        redef fun []=(key, v)
        do
+               if _capacity == 0 then enlarge(17) # 17 because magic in `store`
                var i = index_at(key)
                var c = node_at_idx(i, key)
                if c != null then
@@ -269,7 +263,6 @@ class HashMap[K, V]
        do
                _capacity = 0
                _the_length = 0
-               enlarge(0)
        end
 
        redef var keys: RemovableCollection[K] = new HashMapKeys[K, V](self) is lazy
@@ -442,6 +435,7 @@ class HashSet[E]
 
        redef fun add(item)
        do
+               if _capacity == 0 then enlarge(17) # 17 because magic in `store`
                var i = index_at(item)
                var c = node_at_idx(i, item)
                if c != null then
@@ -461,7 +455,6 @@ class HashSet[E]
        do
                _capacity = 0
                _the_length = 0
-               enlarge(0)
        end
 
        # Build a list filled with the items of `coll`.
index 75d4160..5caf83d 100644 (file)
@@ -55,6 +55,13 @@ redef class Sys
        # Total capacity of hash collections receiver `HashCollection::store`
        var st_tot_cap = 0
 
+       # Number of calls of `HashCollection::enlarge`
+       var en_count = 0
+       # Total length of hash collections receiver of `HashCollection::enlarge`
+       var en_tot_length = 0
+       # Total capacity of hash collections receiver `HashCollection::enlarge`
+       var en_tot_cap = 0
+
        private fun div(n,d: Int): String
        do
                if d == 0 then return "NA"
@@ -90,6 +97,11 @@ number of collisions: {{{st_coll}}} ({{{div(st_coll*100,st_count)}}}%)
 average length of collisions: {{{div(st_tot_coll,st_coll)}}}
 average length of considered collections: {{{div(st_tot_length,sys.st_count)}}}
 average capacity or considered collections: {{{div(st_tot_cap,sys.st_count)}}} ({{{div(st_tot_cap*100,st_tot_length)}}}%)
+
+ENLARGE:
+number of enlarge: {{{en_count}}}
+average length of considered collections: {{{div(en_tot_length,sys.en_count)}}}
+average capacity or considered collections: {{{div(en_tot_cap,sys.en_count)}}} ({{{div(en_tot_cap*100,en_tot_length)}}}%)
 ~~~~~~"""
        end
 
@@ -133,6 +145,14 @@ redef class HashCollection[K]
                return super
        end
 
+       redef fun enlarge(c)
+       do
+               super
+               sys.en_count += 1
+               sys.en_tot_length += _the_length
+               sys.en_tot_cap += _capacity
+       end
+
        # Count and update length of collisions for `node_at_idx`
        # Note for dynamic call-graph analysis: callers of this functions are
        # responsible of collisions.
index b52a9c5..c9b5f48 100644 (file)
 
 a1
 false
+~~~No hash statistics~~~
 ~~~Hash statistics~~~
 GET:
 number of get and has_key: 1
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.00
-average capacity of considered collections: 1.00 (NA%)
+average capacity of considered collections: 17.00 (NA%)
 
 STORE:
-number of stores: 0
-number of collisions: 0 (NA%)
-average length of collisions: NA
-average length of considered collections: NA
-average capacity or considered collections: NA (NA%)
-~~~~~~
-~~~Hash statistics~~~
-GET:
-number of get and has_key: 2
+number of stores: 1
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.00
-average capacity of considered collections: 1.00 (NA%)
+average capacity or considered collections: 17.00 (NA%)
 
-STORE:
-number of stores: 1
-number of collisions: 0 (0.00%)
-average length of collisions: NA
+ENLARGE:
+number of enlarge: 1
 average length of considered collections: 0.00
-average capacity or considered collections: 1.00 (NA%)
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 3
+number of get and has_key: 2
 number of collisions: 0 (0.00%)
 average length of collisions: NA
-average length of considered collections: 0.33
-average capacity of considered collections: 6.33 (1900.00%)
+average length of considered collections: 0.50
+average capacity of considered collections: 17.00 (3400.00%)
 
 STORE:
 number of stores: 1
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.00
-average capacity or considered collections: 1.00 (NA%)
+average capacity or considered collections: 17.00 (NA%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 true
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 4
+number of get and has_key: 2
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.50
-average capacity of considered collections: 9.00 (1800.00%)
+average capacity of considered collections: 17.00 (3400.00%)
 
 STORE:
 number of stores: 1
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.00
-average capacity or considered collections: 1.00 (NA%)
+average capacity or considered collections: 17.00 (NA%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 
 a2
 false
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 5
+number of get and has_key: 3
 number of collisions: 0 (0.00%)
 average length of collisions: NA
-average length of considered collections: 0.60
-average capacity of considered collections: 10.60 (1766.67%)
+average length of considered collections: 0.67
+average capacity of considered collections: 17.00 (2550.00%)
 
 STORE:
 number of stores: 1
 number of collisions: 0 (0.00%)
 average length of collisions: NA
 average length of considered collections: 0.00
-average capacity or considered collections: 1.00 (NA%)
+average capacity or considered collections: 17.00 (NA%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 6
+number of get and has_key: 4
 number of collisions: 0 (0.00%)
 average length of collisions: NA
-average length of considered collections: 0.67
-average capacity of considered collections: 11.67 (1750.00%)
+average length of considered collections: 0.75
+average capacity of considered collections: 17.00 (2266.67%)
 
 STORE:
 number of stores: 2
 number of collisions: 1 (50.00%)
 average length of collisions: 2.00
 average length of considered collections: 0.50
-average capacity or considered collections: 9.00 (1800.00%)
+average capacity or considered collections: 17.00 (3400.00%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 7
-number of collisions: 1 (14.29%)
+number of get and has_key: 5
+number of collisions: 1 (20.00%)
 average length of collisions: 2.00
-average length of considered collections: 0.86
-average capacity of considered collections: 12.43 (1450.00%)
+average length of considered collections: 1.00
+average capacity of considered collections: 17.00 (1700.00%)
 
 STORE:
 number of stores: 2
 number of collisions: 1 (50.00%)
 average length of collisions: 2.00
 average length of considered collections: 0.50
-average capacity or considered collections: 9.00 (1800.00%)
+average capacity or considered collections: 17.00 (3400.00%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 true
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 7
-number of collisions: 1 (14.29%)
+number of get and has_key: 5
+number of collisions: 1 (20.00%)
 average length of collisions: 2.00
-average length of considered collections: 0.86
-average capacity of considered collections: 12.43 (1450.00%)
+average length of considered collections: 1.00
+average capacity of considered collections: 17.00 (1700.00%)
 
 STORE:
 number of stores: 2
 number of collisions: 1 (50.00%)
 average length of collisions: 2.00
 average length of considered collections: 0.50
-average capacity or considered collections: 9.00 (1800.00%)
+average capacity or considered collections: 17.00 (3400.00%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~
 
 end
 ~~~Hash statistics~~~
 GET:
-number of get and has_key: 7
-number of collisions: 1 (14.29%)
+number of get and has_key: 5
+number of collisions: 1 (20.00%)
 average length of collisions: 2.00
-average length of considered collections: 0.86
-average capacity of considered collections: 12.43 (1450.00%)
+average length of considered collections: 1.00
+average capacity of considered collections: 17.00 (1700.00%)
 
 STORE:
 number of stores: 2
 number of collisions: 1 (50.00%)
 average length of collisions: 2.00
 average length of considered collections: 0.50
-average capacity or considered collections: 9.00 (1800.00%)
+average capacity or considered collections: 17.00 (3400.00%)
+
+ENLARGE:
+number of enlarge: 1
+average length of considered collections: 0.00
+average capacity or considered collections: 17.00 (NA%)
 ~~~~~~