X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/collection/hash_collection.nit b/lib/standard/collection/hash_collection.nit index 0c772b8..e5ff507 100644 --- a/lib/standard/collection/hash_collection.nit +++ b/lib/standard/collection/hash_collection.nit @@ -10,42 +10,48 @@ # You are allowed to redistribute it and sell it, alone or is a part of # another product. -# Introduce Hashmap and Hashset. -package hash_collection +# Introduce `HashMap` and `HashSet`. +module hash_collection import array -import hash + +redef class Map[K, V] + # Get a `HashMap[K, V]` as default implementation + new do return new HashMap[K, V] +end # A HashCollection is an array of HashNode[K] indexed by the K hash value -private abstract class HashCollection[K: Object, N: HashNode[Object]] - super ArrayCapable[nullable N] +private abstract class HashCollection[K] + type N: HashNode[K] - var _array: nullable NativeArray[nullable N] = null # Used to store items - var _capacity: Int = 0 # Size of _array - var _length: Int = 0 # Number of items in the map + var array: nullable NativeArray[nullable N] = null # Used to store items + var capacity: Int = 0 # Size of _array + var the_length: Int = 0 # Number of items in the map - readable var _first_item: nullable N = null # First added item (used to visit items in nice order) - var _last_item: nullable N = null # Last added item (same) + var first_item: nullable N = null # First added item (used to visit items in nice order) + var last_item: nullable N = null # Last added item (same) # The last key accessed (used for cache) - var _last_accessed_key: nullable K = null + var last_accessed_key: nullable K = null # The last node accessed (used for cache) - var _last_accessed_node: nullable N = null + var last_accessed_node: nullable N = null # Return the index of the key k fun index_at(k: K): Int do + if k == null then return 0 + var i = k.hash % _capacity if i < 0 then i = - i return i end - # Return the node assosiated with the key + # Return the node associated with the key fun node_at(k: K): nullable N do - # cache: `is' is used instead of `==' because it is a faster filter (even if not exact) - if k is _last_accessed_key then return _last_accessed_node + # cache: `is` is used instead of `==` because it is a faster filter (even if not exact) + if k.is_same_instance(_last_accessed_key) then return _last_accessed_node var res = node_at_idx(index_at(k), k) _last_accessed_key = k @@ -53,13 +59,13 @@ private abstract class HashCollection[K: Object, N: HashNode[Object]] return res end - # Return the node assosiated with the key (but with the index already known) + # Return the node associated with the key (but with the index already known) fun node_at_idx(i: Int, k: K): nullable N do var c = _array[i] while c != null do var ck = c._key - if ck is k or ck == k then # prefilter with `is' because the compiler is not smart enought yet + if ck.is_same_instance(k) or ck == k then # FIXME prefilter because the compiler is not smart enought yet break end c = c._next_in_bucklet @@ -90,11 +96,15 @@ private abstract class HashCollection[K: Object, N: HashNode[Object]] _last_accessed_node = node # Enlarge if needed - var l = _length - _length = l + 1 - l = (l + 5) * 3 / 2 + var l = _the_length + _the_length = l + 1 + + # Magic values determined empirically + # We do not want to enlarge too much + # We also want a odd capacity so that the modulo is more distributive + l = (l + 5) * 2 + 1 if l >= _capacity then - enlarge(l * 2) + enlarge(l * 3 / 2 + 1) end end @@ -120,7 +130,7 @@ private abstract class HashCollection[K: Object, N: HashNode[Object]] end # Remove the item in the array - _length -= 1 + _the_length -= 1 prev = node._prev_in_bucklet next = node._next_in_bucklet if prev != null then @@ -143,7 +153,7 @@ private abstract class HashCollection[K: Object, N: HashNode[Object]] _array[i] = null i -= 1 end - _length = 0 + _the_length = 0 _first_item = null _last_item = null _last_accessed_key = null @@ -154,13 +164,13 @@ private abstract class HashCollection[K: Object, N: HashNode[Object]] do var old_cap = _capacity # get a new capacity - if cap < _length + 1 then cap = _length + 1 + if cap < _the_length + 1 then cap = _the_length + 1 if cap <= _capacity then return _capacity = cap _last_accessed_key = null # get a new array - var new_array = calloc_array(cap) + var new_array = new NativeArray[nullable N](cap) _array = new_array # clean the new array @@ -179,6 +189,7 @@ private abstract class HashCollection[K: Object, N: HashNode[Object]] # Then store it in the array var next = new_array[index] new_array[index] = node + node._prev_in_bucklet = null node._next_in_bucklet = next if next != null then next._prev_in_bucklet = node node = node._next_item @@ -186,50 +197,59 @@ private abstract class HashCollection[K: Object, N: HashNode[Object]] end end -private abstract class HashNode[K: Object] - var _key: K +private abstract class HashNode[K] + var key: K type N: HashNode[K] - readable writable var _next_item: nullable N = null - readable writable var _prev_item: nullable N = null - var _prev_in_bucklet: nullable N = null - var _next_in_bucklet: nullable N = null - init(k: K) - do - _key = k - end + var next_item: nullable N = null + var prev_item: nullable N = null + var prev_in_bucklet: nullable N = null + var next_in_bucklet: nullable N = null end -# A map implemented with a hash table. -# Keys of such a map cannot be null and require a working `hash' method -class HashMap[K: Object, V] +# A `Map` implemented with a hash table. +# +# ~~~ +# var map = new HashMap[nullable String, Int] +# map[null] = 0 +# map["one"] = 1 +# map["two"] = 2 +# +# assert map[null] == 0 +# assert map["one"] == 1 +# assert map.keys.has("two") +# assert map.values.length == 3 +# ~~~ +class HashMap[K, V] super Map[K, V] - super HashCollection[K, HashMapNode[K, V]] + super HashCollection[K] + + redef type N: HashMapNode[K, V] is fixed redef fun [](key) do var c = node_at(key) if c == null then - abort + return provide_default_value(key) else return c._value end end - redef fun iterator: HashMapIterator[K, V] do return new HashMapIterator[K,V](self) - - redef fun iterate - !each(k: K, v: V) + redef fun get_or_null(key) do - var c = _first_item - while c != null do - each(c._key, c._value) - c = c._next_item + var c = node_at(key) + if c == null then + return null + else + return c._value end end - redef fun length do return _length + redef fun iterator: HashMapIterator[K, V] do return new HashMapIterator[K,V](self) + + redef fun length do return _the_length - redef fun is_empty do return _length == 0 + redef fun is_empty do return _the_length == 0 redef fun []=(key, v) do @@ -248,16 +268,17 @@ class HashMap[K: Object, V] init do _capacity = 0 - _length = 0 + _the_length = 0 enlarge(0) end - redef var keys: HashMapKeys[K, V] = new HashMapKeys[K, V](self) - redef var values: HashMapValues[K, V] = new HashMapValues[K, V](self) + redef var keys: RemovableCollection[K] = new HashMapKeys[K, V](self) is lazy + redef var values: RemovableCollection[V] = new HashMapValues[K, V](self) is lazy + redef fun has_key(k) do return node_at(k) != null end # View of the keys of a HashMap -class HashMapKeys[K: Object, V] +private class HashMapKeys[K, V] super RemovableCollection[K] # The original map var map: HashMap[K, V] @@ -278,7 +299,7 @@ class HashMapKeys[K: Object, V] end # View of the values of a Map -class HashMapValues[K: Object, V] +private class HashMapValues[K, V] super RemovableCollection[V] # The original map var map: HashMap[K, V] @@ -348,19 +369,14 @@ class HashMapValues[K: Object, V] end end -private class HashMapNode[K: Object, V] +private class HashMapNode[K, V] super HashNode[K] redef type N: HashMapNode[K, V] - var _value: V - - init(k: K, v: V) - do - super(k) - _value = v - end + var value: V end -class HashMapIterator[K: Object, V] +# A `MapIterator` over a `HashMap`. +class HashMapIterator[K, V] super MapIterator[K, V] redef fun is_ok do return _node != null @@ -389,31 +405,33 @@ class HashMapIterator[K: Object, V] end # The map to iterate on - var _map: HashMap[K, V] + private var map: HashMap[K, V] # The current node - var _node: nullable HashMapNode[K, V] + private var node: nullable HashMapNode[K, V] = null - init(map: HashMap[K, V]) + init do _map = map - _node = map.first_item + _node = _map._first_item end end -# A `Set' implemented with a hash table. -# Keys of such a map cannot be null and require a working `hash' method -class HashSet[E: Object] +# A `Set` implemented with a hash table. +# Keys of such a map cannot be null and require a working `hash` method +class HashSet[E] super Set[E] - super HashCollection[E, HashSetNode[E]] + super HashCollection[E] - redef fun length do return _length + redef type N: HashSetNode[E] is fixed - redef fun is_empty do return _length == 0 + redef fun length do return _the_length + + redef fun is_empty do return _the_length == 0 redef fun first do - assert _length > 0 + assert _the_length > 0 return _first_item._key end @@ -442,22 +460,25 @@ class HashSet[E: Object] init do _capacity = 0 - _length = 0 + _the_length = 0 enlarge(0) end + + # Build a list filled with the items of `coll`. + init from(coll: Collection[E]) do + init + add_all(coll) + end + + redef fun new_set do return new HashSet[E] end -private class HashSetNode[E: Object] +private class HashSetNode[E] super HashNode[E] redef type N: HashSetNode[E] - - init(e: E) - do - _key = e - end end -class HashSetIterator[E: Object] +private class HashSetIterator[E] super Iterator[E] redef fun is_ok do return _node != null @@ -474,15 +495,14 @@ class HashSetIterator[E: Object] end # The set to iterate on - var _set: HashSet[E] + var set: HashSet[E] # The position in the internal map storage - var _node: nullable HashSetNode[E] + var node: nullable HashSetNode[E] = null - init(set: HashSet[E]) + init do - _set = set - _node = set._first_item + _node = _set._first_item end end