X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/collection/hash_collection.nit b/lib/standard/collection/hash_collection.nit index 818da57..f3857293 100644 --- a/lib/standard/collection/hash_collection.nit +++ b/lib/standard/collection/hash_collection.nit @@ -10,120 +10,142 @@ # You are allowed to redistribute it and sell it, alone or is a part of # another product. -# Introduce Hashmap and Hashset. -package hash_collection +# Introduce `HashMap` and `HashSet`. +module hash_collection import array -import hash + +redef class Map[K, V] + # Get a `HashMap[K, V]` as default implementation + new do return new HashMap[K, V] +end # A HashCollection is an array of HashNode[K] indexed by the K hash value -private class HashCollection[K: Object, N: HashNode[K], E] -special Collection[E] -special ArrayCapable[nullable N] - var _array: nullable NativeArray[nullable N] = null # Used to store items - var _capacity: Int = 0 # Size of _array - redef readable var _length: Int = 0 # Number of items in the map +private abstract class HashCollection[K] + type N: HashNode[K] + + var array: nullable NativeArray[nullable N] = null # Used to store items + var capacity: Int = 0 # Size of _array + var the_length: Int = 0 # Number of items in the map - readable var _first_item: nullable N = null # First added item (used to visit items in nice order) - var _last_item: nullable N = null # Last added item (same) + var first_item: nullable N = null # First added item (used to visit items in nice order) + var last_item: nullable N = null # Last added item (same) - # The last index accessed - var _last_accessed_index: Int = -1 + # The last key accessed (used for cache) + var last_accessed_key: nullable K = null - # The last key accessed - var _last_accessed_key: nullable K = null + # The last node accessed (used for cache) + var last_accessed_node: nullable N = null - # Return the index of the k element + # Return the index of the key k fun index_at(k: K): Int do - var arr = _array - - # Fisrt step: look in the last indexed elt - if k == _last_accessed_key then return _last_accessed_index - - # Compute the base position of the key - var base = k.hash % _capacity - if base < 0 then base = - base - - # Look for the key in the array - var cur = base - loop - var c = arr[cur] - if c == null or c.key == k then # REAL equals - _last_accessed_index = cur - _last_accessed_key = k - return cur + if k == null then return 0 + + var i = k.hash % _capacity + if i < 0 then i = - i + return i + end + + # Return the node associated with the key + fun node_at(k: K): nullable N + do + # cache: `is` is used instead of `==` because it is a faster filter (even if not exact) + if k.is_same_instance(_last_accessed_key) then return _last_accessed_node + + var res = node_at_idx(index_at(k), k) + _last_accessed_key = k + _last_accessed_node = res + return res + end + + # Return the node associated with the key (but with the index already known) + fun node_at_idx(i: Int, k: K): nullable N + do + var c = _array[i] + while c != null do + var ck = c._key + if ck.is_same_instance(k) or ck == k then # FIXME prefilter because the compiler is not smart enought yet + break end - cur -= 1 - if cur < 0 then cur = _capacity - 1 - assert no_loop: cur != base + c = c._next_in_bucklet end - abort + return c end - # Add a new node (should be free) + # Add a new node at a given index fun store(index: Int, node: N) do # Store the item in the list if _first_item == null then _first_item = node else - _last_item.next_item = node + _last_item._next_item = node end - node.prev_item = _last_item - node.next_item = null + node._prev_item = _last_item + node._next_item = null _last_item = node + # Then store it in the array - assert _array[index] == null + var next = _array[index] _array[index] = node - var l = _length - _length = l + 1 - l = (l + 5) * 150 / 100 + node._next_in_bucklet = next + if next != null then next._prev_in_bucklet = node + + _last_accessed_key = node._key + _last_accessed_node = node + + # Enlarge if needed + var l = _the_length + _the_length = l + 1 + + # Magic values determined empirically + # We do not want to enlarge too much + # We also want a odd capacity so that the modulo is more distributive + l = (l + 5) * 2 + 1 if l >= _capacity then - enlarge(l * 2) + enlarge(l * 3 / 2 + 1) end end - fun remove_index(i: Int) + # Remove the node assosiated with the key + fun remove_node(k: K) do - assert correct_index: i >= 0 and i < _capacity - var node = _array[i] - assert has_couple: node != null + var i = index_at(k) + var node = node_at_idx(i, k) + if node == null then return + # Remove the item in the list - var prev = node.prev_item - var next = node.next_item + var prev = node._prev_item + var next = node._next_item if prev != null then - prev.next_item = next + prev._next_item = next else _first_item = next end if next != null then - next.prev_item = prev + next._prev_item = prev else _last_item = prev end + # Remove the item in the array - _array[i] = null - _length -= 1 - # Now replaces things before if needed - loop - i -= 1 - if i < 0 then - i = _capacity - 1 - end - var n = _array[i] - if n == null then - return - end - var i2 = index_at(n.key) - if i != i2 then - _array[i] = null - assert _array[i2] == null - _array[i2] = n - end + _the_length -= 1 + prev = node._prev_in_bucklet + next = node._next_in_bucklet + if prev != null then + prev._next_in_bucklet = next + else + _array[i] = next end + if next != null then + next._prev_in_bucklet = prev + end + + _last_accessed_key = null end + # Clear the whole structure fun raz do var i = _capacity - 1 @@ -131,24 +153,24 @@ special ArrayCapable[nullable N] _array[i] = null i -= 1 end - _length = 0 + _the_length = 0 _first_item = null _last_item = null _last_accessed_key = null end + # Force a capacity fun enlarge(cap: Int) do var old_cap = _capacity # get a new capacity - # cap = cap * 130 / 100 + 5 + 1000 # / - if cap < _length + 1 then cap = _length + 1 + if cap < _the_length + 1 then cap = _the_length + 1 if cap <= _capacity then return _capacity = cap _last_accessed_key = null # get a new array - var new_array = calloc_array(cap) + var new_array = new NativeArray[nullable N](cap) _array = new_array # clean the new array @@ -163,141 +185,194 @@ special ArrayCapable[nullable N] # Reput items in the array var node = _first_item while node != null do - var ind = index_at(node.key) - assert new_array[ind] == null - new_array[ind] = node - node = node.next_item + var index = index_at(node._key) + # Then store it in the array + var next = new_array[index] + new_array[index] = node + node._prev_in_bucklet = null + node._next_in_bucklet = next + if next != null then next._prev_in_bucklet = node + node = node._next_item end - _last_accessed_key = null end end -private class HashNode[K] - fun key: K is abstract +private abstract class HashNode[K] + var key: K type N: HashNode[K] - readable writable var _next_item: nullable N = null - readable writable var _prev_item: nullable N = null + var next_item: nullable N = null + var prev_item: nullable N = null + var prev_in_bucklet: nullable N = null + var next_in_bucklet: nullable N = null end +# A `Map` implemented with a hash table. +# +# ~~~ +# var map = new HashMap[nullable String, Int] +# map[null] = 0 +# map["one"] = 1 +# map["two"] = 2 +# +# assert map[null] == 0 +# assert map["one"] == 1 +# assert map.keys.has("two") +# assert map.values.length == 3 +# ~~~ class HashMap[K, V] -special CoupleMap[K, V] -special HashCollection[K, HashMapNode[K, V], V] + super Map[K, V] + super HashCollection[K] + + redef type N: HashMapNode[K, V] is fixed + + redef fun [](key) + do + var c = node_at(key) + if c == null then + return provide_default_value(key) + else + return c._value + end + end redef fun iterator: HashMapIterator[K, V] do return new HashMapIterator[K,V](self) - redef fun iterate - !each(e: V) + redef fun length do return _the_length + + redef fun is_empty do return _the_length == 0 + + redef fun []=(key, v) do - var c = _first_item - while c != null do - each(c.second) - c = c._next_item + var i = index_at(key) + var c = node_at_idx(i, key) + if c != null then + c._key = key + c._value = v + else + store(i, new HashMapNode[K, V](key, v)) end end - redef fun first + redef fun clear do raz + + init do - assert _length > 0 - return _first_item.second + _capacity = 0 + _the_length = 0 + enlarge(0) end - redef fun is_empty do return _length == 0 + redef var keys: RemovableCollection[K] = new HashMapKeys[K, V](self) + redef var values: RemovableCollection[V] = new HashMapValues[K, V](self) +end + +# View of the keys of a HashMap +private class HashMapKeys[K, V] + super RemovableCollection[K] + # The original map + var map: HashMap[K, V] + + redef fun count(k) do if self.has(k) then return 1 else return 0 + redef fun first do return self.map._first_item._key + redef fun has(k) do return self.map.node_at(k) != null + redef fun has_only(k) do return (self.has(k) and self.length == 1) or self.is_empty + redef fun is_empty do return self.map.is_empty + redef fun length do return self.map.length + + redef fun iterator do return new MapKeysIterator[K, V](self.map.iterator) + + redef fun clear do self.map.clear + + redef fun remove(key) do self.map.remove_node(key) + redef fun remove_all(key) do self.map.remove_node(key) +end + +# View of the values of a Map +private class HashMapValues[K, V] + super RemovableCollection[V] + # The original map + var map: HashMap[K, V] redef fun count(item) do var nb = 0 - var i = 0 - while i < _capacity do - var c = _array[i] - if c != null and c.second == item then nb += 1 - i += 1 + var c = self.map._first_item + while c != null do + if c._value == item then nb += 1 + c = c._next_item end return nb end + redef fun first do return self.map._first_item._value redef fun has(item) do - var i = 0 - while i < _capacity do - var c = _array[i] - if c != null and c.second == item then return true - i += 1 + var c = self.map._first_item + while c != null do + if c._value == item then return true + c = c._next_item end return false end redef fun has_only(item) do - var i = 0 - while i < _capacity do - var c = _array[i] - if c != null and c.second != item then return false - i += 1 + var c = self.map._first_item + while c != null do + if c._value != item then return false + c = c._next_item end return true end - redef fun []=(key, v) - do - assert key != null - var i = index_at(key) - var c = _array[i] - if c != null then - c.first = key - c.second = v - else - store(i, new HashMapNode[K, V](key, v)) - end - end + redef fun is_empty do return self.map.is_empty + redef fun length do return self.map.length + + redef fun iterator do return new MapValuesIterator[K, V](self.map.iterator) + + redef fun clear do self.map.clear redef fun remove(item) do - var i = 0 - while i < _capacity do - var c = _array[i] - if c != null and c.second == item then - remove_index(i) + var map = self.map + var c = map._first_item + while c != null do + if c._value == item then + map.remove_node(c._key) return end - i += 1 + c = c._next_item end end - redef fun remove_at(key) do remove_index(index_at(key)) - - redef fun clear do raz - - redef fun couple_at(key) do return _array[index_at(key)] - - init + redef fun remove_all(item) do - _capacity = 0 - _length = 0 - enlarge(0) + var map = self.map + var c = map._first_item + while c != null do + if c._value == item then + map.remove_node(c._key) + end + c = c._next_item + end end end -class HashMapNode[K, V] -special Couple[K, V] -special HashNode[K] - redef fun key do return first +private class HashMapNode[K, V] + super HashNode[K] redef type N: HashMapNode[K, V] - - init(k: K, v: V) - do - first = k - second = v - end + var value: V end +# A `MapIterator` over a `HashMap`. class HashMapIterator[K, V] -special MapIterator[K, V] + super MapIterator[K, V] redef fun is_ok do return _node != null redef fun item do assert is_ok - return _node.second + return _node._value end #redef fun item=(value) @@ -309,57 +384,63 @@ special MapIterator[K, V] redef fun key do assert is_ok - return _node.first + return _node._key end redef fun next do assert is_ok - _node = _node.next_item + _node = _node._next_item end # The map to iterate on - var _map: HashMap[K, V] + private var map: HashMap[K, V] # The current node - var _node: nullable HashMapNode[K, V] + private var node: nullable HashMapNode[K, V] = null - init(map: HashMap[K, V]) + init do _map = map - _node = map.first_item + _node = _map._first_item end end -class HashSet[E] -special Set[E] -special HashCollection[E, HashSetNode[E], E] +# A `Set` implemented with a hash table. +# Keys of such a map cannot be null and require a working `hash` method +class HashSet[E: Object] + super Set[E] + super HashCollection[E] + + redef type N: HashSetNode[E] is fixed + + redef fun length do return _the_length - redef fun is_empty do return _length == 0 + redef fun is_empty do return _the_length == 0 redef fun first do - assert _length > 0 - return _first_item.key + assert _the_length > 0 + return _first_item._key end redef fun has(item) do - return _array[index_at(item)] != null + return node_at(item) != null end redef fun add(item) do var i = index_at(item) - var c = _array[i] + var c = node_at_idx(i, item) if c != null then - c.key = item + c._key = item else store(i,new HashSetNode[E](item)) end end - redef fun remove(item) do remove_index(index_at(item)) + redef fun remove(item) do remove_node(item) redef fun clear do raz @@ -368,49 +449,49 @@ special HashCollection[E, HashSetNode[E], E] init do _capacity = 0 - _length = 0 + _the_length = 0 enlarge(0) end -end -class HashSetNode[E] -special HashNode[E] - redef type N: HashSetNode[E] + # Build a list filled with the items of `coll`. + init from(coll: Collection[E]) do + init + add_all(coll) + end - redef readable writable var _key: E + redef fun new_set do return new HashSet[E] +end - init(e: E) - do - _key = e - end +private class HashSetNode[E: Object] + super HashNode[E] + redef type N: HashSetNode[E] end -class HashSetIterator[E] -special Iterator[E] +private class HashSetIterator[E: Object] + super Iterator[E] redef fun is_ok do return _node != null redef fun item do assert is_ok - return _node.key + return _node._key end redef fun next do assert is_ok - _node = _node.next_item + _node = _node._next_item end # The set to iterate on - var _set: HashSet[E] + var set: HashSet[E] # The position in the internal map storage - var _node: nullable HashSetNode[E] + var node: nullable HashSetNode[E] = null - init(set: HashSet[E]) + init do - _set = set - _node = set.first_item + _node = _set._first_item end end