lib/map: add factory to Map returning a HashMap
[nit.git] / lib / standard / collection / hash_collection.nit
index 13d5a7a..f385729 100644 (file)
 # You  are  allowed  to  redistribute it and sell it, alone or is a part of
 # another product.
 
-# Introduce Hashmap and Hashset.
-package hash_collection
+# Introduce `HashMap` and `HashSet`.
+module hash_collection
 
 import array
-import hash
+
+redef class Map[K, V]
+       # Get a `HashMap[K, V]` as default implementation
+       new do return new HashMap[K, V]
+end
 
 # A HashCollection is an array of HashNode[K] indexed by the K hash value
-private class HashCollection[K: Object, N: HashNode[K], E]
-       super Collection[E]
-       super ArrayCapable[nullable N]
-       var _array: nullable NativeArray[nullable N] = null # Used to store items
-       var _capacity: Int = 0 # Size of _array
-       redef readable var _length: Int = 0 # Number of items in the map
+private abstract class HashCollection[K]
+       type N: HashNode[K]
 
-       readable var _first_item: nullable N = null # First added item (used to visit items in nice order)
-       var _last_item: nullable N = null # Last added item (same)
+       var array: nullable NativeArray[nullable N] = null # Used to store items
+       var capacity: Int = 0 # Size of _array
+       var the_length: Int = 0 # Number of items in the map
+
+       var first_item: nullable N = null # First added item (used to visit items in nice order)
+       var last_item: nullable N = null # Last added item (same)
 
        # The last key accessed (used for cache)
-       var _last_accessed_key: nullable K = null
+       var last_accessed_key: nullable K = null
 
        # The last node accessed (used for cache)
-       var _last_accessed_node: nullable N = null
+       var last_accessed_node: nullable N = null
 
        # Return the index of the key k
        fun index_at(k: K): Int
        do
+               if k == null then return 0
+
                var i = k.hash % _capacity
                if i < 0 then i = - i
                return i
        end
 
-       # Return the node assosiated with the key
+       # Return the node associated with the key
        fun node_at(k: K): nullable N
        do
-               # cache: `is' is used instead of `==' because it is a faster filter (even if not exact)
-               if k is _last_accessed_key then return _last_accessed_node
+               # cache: `is` is used instead of `==` because it is a faster filter (even if not exact)
+               if k.is_same_instance(_last_accessed_key) then return _last_accessed_node
 
                var res = node_at_idx(index_at(k), k)
                _last_accessed_key = k
@@ -53,13 +59,13 @@ private class HashCollection[K: Object, N: HashNode[K], E]
                return res
        end
 
-       # Return the node assosiated with the key (but with the index already known)
+       # Return the node associated with the key (but with the index already known)
        fun node_at_idx(i: Int, k: K): nullable N
        do
                var c = _array[i]
                while c != null do
                        var ck = c._key
-                       if ck is k or ck == k then # prefilter with `is' because the compiler is not smart enought yet
+                       if ck.is_same_instance(k) or ck == k then # FIXME prefilter because the compiler is not smart enought yet
                                break
                        end
                        c = c._next_in_bucklet
@@ -90,11 +96,15 @@ private class HashCollection[K: Object, N: HashNode[K], E]
                _last_accessed_node = node
 
                # Enlarge if needed
-               var l = _length
-               _length = l + 1
-               l = (l + 5) * 3 / 2
+               var l = _the_length
+               _the_length = l + 1
+
+               # Magic values determined empirically
+               # We do not want to enlarge too much
+               # We also want a odd capacity so that the modulo is more distributive
+               l = (l + 5) * 2 + 1
                if l >= _capacity then
-                       enlarge(l * 2)
+                       enlarge(l * 3 / 2 + 1)
                end
        end
 
@@ -120,7 +130,7 @@ private class HashCollection[K: Object, N: HashNode[K], E]
                end
 
                # Remove the item in the array
-               _length -= 1
+               _the_length -= 1
                prev = node._prev_in_bucklet
                next = node._next_in_bucklet
                if prev != null then
@@ -135,6 +145,7 @@ private class HashCollection[K: Object, N: HashNode[K], E]
                _last_accessed_key = null
        end
 
+       # Clear the whole structure
        fun raz
        do
                var i = _capacity - 1
@@ -142,23 +153,24 @@ private class HashCollection[K: Object, N: HashNode[K], E]
                        _array[i] = null
                        i -= 1
                end
-               _length = 0
+               _the_length = 0
                _first_item = null
                _last_item = null
                _last_accessed_key = null
        end
 
+       # Force a capacity
        fun enlarge(cap: Int)
        do
                var old_cap = _capacity
                # get a new capacity
-               if cap < _length + 1 then cap = _length + 1
+               if cap < _the_length + 1 then cap = _the_length + 1
                if cap <= _capacity then return
                _capacity = cap
                _last_accessed_key = null
 
                # get a new array
-               var new_array = calloc_array(cap)
+               var new_array = new NativeArray[nullable N](cap)
                _array = new_array
 
                # clean the new array
@@ -177,6 +189,7 @@ private class HashCollection[K: Object, N: HashNode[K], E]
                        # Then store it in the array
                        var next = new_array[index]
                        new_array[index] = node
+                       node._prev_in_bucklet = null
                        node._next_in_bucklet = next
                        if next != null then next._prev_in_bucklet = node
                        node = node._next_item
@@ -184,69 +197,117 @@ private class HashCollection[K: Object, N: HashNode[K], E]
        end
 end
 
-private class HashNode[K: Object]
-       var _key: K
+private abstract class HashNode[K]
+       var key: K
        type N: HashNode[K]
-       readable writable var _next_item: nullable N = null
-       readable writable var _prev_item: nullable N = null
-       var _prev_in_bucklet: nullable N = null
-       var _next_in_bucklet: nullable N = null
-       init(k: K)
-       do
-               _key = k
-       end
+       var next_item: nullable N = null
+       var prev_item: nullable N = null
+       var prev_in_bucklet: nullable N = null
+       var next_in_bucklet: nullable N = null
 end
 
-class HashMap[K: Object, V]
+# A `Map` implemented with a hash table.
+#
+# ~~~
+# var map = new HashMap[nullable String, Int]
+# map[null] = 0
+# map["one"] = 1
+# map["two"] = 2
+#
+# assert map[null] == 0
+# assert map["one"] == 1
+# assert map.keys.has("two")
+# assert map.values.length == 3
+# ~~~
+class HashMap[K, V]
        super Map[K, V]
-       super HashCollection[K, HashMapNode[K, V], V]
+       super HashCollection[K]
+
+       redef type N: HashMapNode[K, V] is fixed
 
        redef fun [](key)
        do
                var c = node_at(key)
                if c == null then
-                       abort
+                       return provide_default_value(key)
                else
                        return c._value
                end
        end
 
-       redef fun has_key(key) do return node_at(key) != null
-
        redef fun iterator: HashMapIterator[K, V] do return new HashMapIterator[K,V](self)
 
-       redef fun iterate
-               !each(e: V)
+       redef fun length do return _the_length
+
+       redef fun is_empty do return _the_length == 0
+
+       redef fun []=(key, v)
        do
-               var c = _first_item
-               while c != null do
-                       each(c._value)
-                       c = c._next_item
+               var i = index_at(key)
+               var c = node_at_idx(i, key)
+               if c != null then
+                       c._key = key
+                       c._value = v
+               else
+                       store(i, new HashMapNode[K, V](key, v))
                end
        end
 
-       redef fun first
+       redef fun clear do raz
+
+       init
        do
-               assert _length > 0
-               return _first_item._value
+               _capacity = 0
+               _the_length = 0
+               enlarge(0)
        end
 
-       redef fun is_empty do return _length == 0
+       redef var keys: RemovableCollection[K] = new HashMapKeys[K, V](self)
+       redef var values: RemovableCollection[V] = new HashMapValues[K, V](self)
+end
+
+# View of the keys of a HashMap
+private class HashMapKeys[K, V]
+       super RemovableCollection[K]
+       # The original map
+       var map: HashMap[K, V]
+
+       redef fun count(k) do if self.has(k) then return 1 else return 0
+       redef fun first do return self.map._first_item._key
+       redef fun has(k) do return self.map.node_at(k) != null
+       redef fun has_only(k) do return (self.has(k) and self.length == 1) or self.is_empty
+       redef fun is_empty do return self.map.is_empty
+       redef fun length do return self.map.length
+
+       redef fun iterator do return new MapKeysIterator[K, V](self.map.iterator)
+
+       redef fun clear do self.map.clear
+
+       redef fun remove(key) do self.map.remove_node(key)
+       redef fun remove_all(key) do self.map.remove_node(key)
+end
+
+# View of the values of a Map
+private class HashMapValues[K, V]
+       super RemovableCollection[V]
+       # The original map
+       var map: HashMap[K, V]
 
        redef fun count(item)
        do
                var nb = 0
-               var c = _first_item
+               var c = self.map._first_item
                while c != null do
                        if c._value == item then nb += 1
                        c = c._next_item
                end
                return nb
        end
+       redef fun first do return self.map._first_item._value
 
        redef fun has(item)
        do
-               var c = _first_item
+               var c = self.map._first_item
                while c != null do
                        if c._value == item then return true
                        c = c._next_item
@@ -256,7 +317,7 @@ class HashMap[K: Object, V]
 
        redef fun has_only(item)
        do
-               var c = _first_item
+               var c = self.map._first_item
                while c != null do
                        if c._value != item then return false
                        c = c._next_item
@@ -264,55 +325,47 @@ class HashMap[K: Object, V]
                return true
        end
 
-       redef fun []=(key, v)
-       do
-               var i = index_at(key)
-               var c = node_at_idx(i, key)
-               if c != null then
-                       c._key = key
-                       c._value = v
-               else
-                       store(i, new HashMapNode[K, V](key, v))
-               end
-       end
+       redef fun is_empty do return self.map.is_empty
+       redef fun length do return self.map.length
+
+       redef fun iterator do return new MapValuesIterator[K, V](self.map.iterator)
+
+       redef fun clear do self.map.clear
 
        redef fun remove(item)
        do
-               var c = _first_item
+               var map = self.map
+               var c = map._first_item
                while c != null do
                        if c._value == item then
-                               remove_node(c._key)
+                               map.remove_node(c._key)
                                return
                        end
                        c = c._next_item
                end
        end
 
-       redef fun remove_at(key) do remove_node(key)
-
-       redef fun clear do raz
-
-       init
+       redef fun remove_all(item)
        do
-               _capacity = 0
-               _length = 0
-               enlarge(0)
+               var map = self.map
+               var c = map._first_item
+               while c != null do
+                       if c._value == item then
+                               map.remove_node(c._key)
+                       end
+                       c = c._next_item
+               end
        end
 end
 
-class HashMapNode[K: Object, V]
+private class HashMapNode[K, V]
        super HashNode[K]
        redef type N: HashMapNode[K, V]
-       var _value: V
-
-       init(k: K, v: V)
-       do
-               super(k)
-               _value = v
-       end
+       var value: V
 end
 
-class HashMapIterator[K: Object, V]
+# A `MapIterator` over a `HashMap`.
+class HashMapIterator[K, V]
        super MapIterator[K, V]
        redef fun is_ok do return _node != null
 
@@ -341,27 +394,33 @@ class HashMapIterator[K: Object, V]
        end
 
        # The map to iterate on
-       var _map: HashMap[K, V]
+       private var map: HashMap[K, V]
 
        # The current node
-       var _node: nullable HashMapNode[K, V]
+       private var node: nullable HashMapNode[K, V] = null
 
-       init(map: HashMap[K, V])
+       init
        do
                _map = map
-               _node = map.first_item
+               _node = _map._first_item
        end
 end
 
+# A `Set` implemented with a hash table.
+# Keys of such a map cannot be null and require a working `hash` method
 class HashSet[E: Object]
        super Set[E]
-       super HashCollection[E, HashSetNode[E], E]
+       super HashCollection[E]
 
-       redef fun is_empty do return _length == 0
+       redef type N: HashSetNode[E] is fixed
+
+       redef fun length do return _the_length
+
+       redef fun is_empty do return _the_length == 0
 
        redef fun first
        do
-               assert _length > 0
+               assert _the_length > 0
                return _first_item._key
        end
 
@@ -390,22 +449,25 @@ class HashSet[E: Object]
        init
        do
                _capacity = 0
-               _length = 0
+               _the_length = 0
                enlarge(0)
        end
+
+       # Build a list filled with the items of `coll`.
+       init from(coll: Collection[E]) do
+               init
+               add_all(coll)
+       end
+
+       redef fun new_set do return new HashSet[E]
 end
 
-class HashSetNode[E: Object]
+private class HashSetNode[E: Object]
        super HashNode[E]
        redef type N: HashSetNode[E]
-
-       init(e: E)
-       do
-               _key = e
-       end
 end
 
-class HashSetIterator[E: Object]
+private class HashSetIterator[E: Object]
        super Iterator[E]
        redef fun is_ok do return _node != null
 
@@ -422,15 +484,14 @@ class HashSetIterator[E: Object]
        end
 
        # The set to iterate on
-       var _set: HashSet[E]
+       var set: HashSet[E]
 
        # The position in the internal map storage
-       var _node: nullable HashSetNode[E]
+       var node: nullable HashSetNode[E] = null
 
-       init(set: HashSet[E])
+       init
        do
-               _set = set
-               _node = set._first_item
+               _node = _set._first_item
        end
 end