core::hash_collection: simplify `enlarge`
[nit.git] / lib / core / collection / hash_collection.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2009 Jean Privat <jean@pryen.org>
4 #
5 # This file is free software, which comes along with NIT. This software is
6 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
7 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
8 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
9 # is kept unaltered, and a notification of the changes is added.
10 # You are allowed to redistribute it and sell it, alone or is a part of
11 # another product.
12
13 # Introduce `HashMap` and `HashSet`.
14 module hash_collection
15
16 import array
17
18 redef class Map[K, V]
19 # Get a `HashMap[K, V]` as default implementation
20 new do return new HashMap[K, V]
21 end
22
23 # A HashCollection is an array of HashNode[K] indexed by the K hash value
24 private abstract class HashCollection[K]
25 type N: HashNode[K]
26
27 var array: NativeArray[nullable N] is noautoinit # Used to store items
28 var capacity: Int = 0 # Size of _array
29 var the_length: Int = 0 # Number of items in the map
30
31 var first_item: nullable N = null # First added item (used to visit items in nice order)
32 var last_item: nullable N = null # Last added item (same)
33
34 # The last key accessed (used for cache)
35 var last_accessed_key: nullable Object = null
36
37 # The last node accessed (used for cache)
38 var last_accessed_node: nullable N = null
39
40 # Return the index of the key k
41 fun index_at(k: nullable Object): Int
42 do
43 if k == null then return 0
44
45 var i = k.hash % _capacity
46 if i < 0 then i = - i
47 return i
48 end
49
50 # Return the node associated with the key
51 fun node_at(k: nullable Object): nullable N
52 do
53 if _the_length == 0 then return null
54 # cache: `is` is used instead of `==` because it is a faster filter (even if not exact)
55 if k.is_same_instance(_last_accessed_key) then return _last_accessed_node
56
57 var res = node_at_idx(index_at(k), k)
58 _last_accessed_key = k
59 _last_accessed_node = res
60 return res
61 end
62
63 # Return the node associated with the key (but with the index already known)
64 fun node_at_idx(i: Int, k: nullable Object): nullable N
65 do
66 if _the_length == 0 then return null
67 var c = _array[i]
68 while c != null do
69 var ck = c._key
70 if ck.is_same_instance(k) or ck == k then # FIXME prefilter because the compiler is not smart enought yet
71 break
72 end
73 c = c._next_in_bucklet
74 end
75 return c
76 end
77
78 # Add a new node at a given index
79 fun store(index: Int, node: N)
80 do
81 # Store the item in the list
82 if _first_item == null then
83 _first_item = node
84 else
85 _last_item._next_item = node
86 end
87 node._prev_item = _last_item
88 node._next_item = null
89 _last_item = node
90
91 # Then store it in the array
92 var next = _array[index]
93 _array[index] = node
94 node._next_in_bucklet = next
95 if next != null then next._prev_in_bucklet = node
96
97 _last_accessed_key = node._key
98 _last_accessed_node = node
99
100 # Enlarge if needed
101 var l = _the_length
102 _the_length = l + 1
103
104 # Magic values determined empirically
105 # We do not want to enlarge too much
106 # We also want a odd capacity so that the modulo is more distributive
107 l = (l + 5) * 2 + 1
108 if l >= _capacity then
109 enlarge(l * 3 / 2 + 1)
110 end
111 end
112
113 # Remove the node assosiated with the key
114 fun remove_node(k: nullable Object)
115 do
116 if _the_length == 0 then return
117 var i = index_at(k)
118 var node = node_at_idx(i, k)
119 if node == null then return
120
121 # Remove the item in the list
122 var prev = node._prev_item
123 var next = node._next_item
124 if prev != null then
125 prev._next_item = next
126 else
127 _first_item = next
128 end
129 if next != null then
130 next._prev_item = prev
131 else
132 _last_item = prev
133 end
134
135 # Remove the item in the array
136 _the_length -= 1
137 prev = node._prev_in_bucklet
138 next = node._next_in_bucklet
139 if prev != null then
140 prev._next_in_bucklet = next
141 else
142 _array[i] = next
143 end
144 if next != null then
145 next._prev_in_bucklet = prev
146 end
147
148 _last_accessed_key = null
149 end
150
151 # Clear the whole structure
152 fun raz
153 do
154 var i = _capacity - 1
155 while i >= 0 do
156 _array[i] = null
157 i -= 1
158 end
159 _the_length = 0
160 _first_item = null
161 _last_item = null
162 _last_accessed_key = null
163 end
164
165 # Force a capacity
166 fun enlarge(cap: Int)
167 do
168 # get a new capacity
169 if cap < _the_length + 1 then cap = _the_length + 1
170 if cap <= _capacity then return
171 _capacity = cap
172 _last_accessed_key = null
173
174 # get a new array
175 var new_array = new NativeArray[nullable N](cap)
176 _array = new_array
177
178 # Reput items in the array
179 var node = _first_item
180 while node != null do
181 var index = index_at(node._key)
182 # Then store it in the array
183 var next = new_array[index]
184 new_array[index] = node
185 node._prev_in_bucklet = null
186 node._next_in_bucklet = next
187 if next != null then next._prev_in_bucklet = node
188 node = node._next_item
189 end
190 end
191 end
192
193 private abstract class HashNode[K]
194 var key: K
195 type N: HashNode[K]
196 var next_item: nullable N = null
197 var prev_item: nullable N = null
198 var prev_in_bucklet: nullable N = null
199 var next_in_bucklet: nullable N = null
200 end
201
202 # A `Map` implemented with a hash table.
203 #
204 # ~~~
205 # var map = new HashMap[nullable String, Int]
206 # map[null] = 0
207 # map["one"] = 1
208 # map["two"] = 2
209 #
210 # assert map[null] == 0
211 # assert map["one"] == 1
212 # assert map.keys.has("two")
213 # assert map.values.length == 3
214 # ~~~
215 class HashMap[K, V]
216 super Map[K, V]
217 super HashCollection[K]
218
219 redef type N: HashMapNode[K, V] is fixed
220
221 redef fun [](key)
222 do
223 var c = node_at(key)
224 if c == null then
225 return provide_default_value(key)
226 else
227 return c._value
228 end
229 end
230
231 redef fun get_or_null(key)
232 do
233 var c = node_at(key)
234 if c == null then
235 return null
236 else
237 return c._value
238 end
239 end
240
241 redef fun iterator: HashMapIterator[K, V] do return new HashMapIterator[K,V](self)
242
243 redef fun length do return _the_length
244
245 redef fun is_empty do return _the_length == 0
246
247 redef fun []=(key, v)
248 do
249 if _capacity == 0 then enlarge(17) # 17 because magic in `store`
250 var i = index_at(key)
251 var c = node_at_idx(i, key)
252 if c != null then
253 c._key = key
254 c._value = v
255 else
256 store(i, new HashMapNode[K, V](key, v))
257 end
258 end
259
260 redef fun clear do raz
261
262 init
263 do
264 _capacity = 0
265 _the_length = 0
266 end
267
268 redef var keys: RemovableCollection[K] = new HashMapKeys[K, V](self) is lazy
269 redef var values: RemovableCollection[V] = new HashMapValues[K, V](self) is lazy
270 redef fun has_key(k) do return node_at(k) != null
271 end
272
273 # View of the keys of a HashMap
274 private class HashMapKeys[K, V]
275 super RemovableCollection[K]
276 # The original map
277 var map: HashMap[K, V]
278
279 redef fun count(k) do if self.has(k) then return 1 else return 0
280 redef fun first do return self.map._first_item._key
281 redef fun has(k) do return self.map.node_at(k) != null
282 redef fun has_only(k) do return (self.has(k) and self.length == 1) or self.is_empty
283 redef fun is_empty do return self.map.is_empty
284 redef fun length do return self.map.length
285
286 redef fun iterator do return new MapKeysIterator[K, V](self.map.iterator)
287
288 redef fun clear do self.map.clear
289
290 redef fun remove(key) do self.map.remove_node(key)
291 redef fun remove_all(key) do self.map.remove_node(key)
292 end
293
294 # View of the values of a Map
295 private class HashMapValues[K, V]
296 super RemovableCollection[V]
297 # The original map
298 var map: HashMap[K, V]
299
300 redef fun count(item)
301 do
302 var nb = 0
303 var c = self.map._first_item
304 while c != null do
305 if c._value == item then nb += 1
306 c = c._next_item
307 end
308 return nb
309 end
310 redef fun first do return self.map._first_item._value
311
312 redef fun has(item)
313 do
314 var c = self.map._first_item
315 while c != null do
316 if c._value == item then return true
317 c = c._next_item
318 end
319 return false
320 end
321
322 redef fun has_only(item)
323 do
324 var c = self.map._first_item
325 while c != null do
326 if c._value != item then return false
327 c = c._next_item
328 end
329 return true
330 end
331
332 redef fun is_empty do return self.map.is_empty
333 redef fun length do return self.map.length
334
335 redef fun iterator do return new MapValuesIterator[K, V](self.map.iterator)
336
337 redef fun clear do self.map.clear
338
339 redef fun remove(item)
340 do
341 var map = self.map
342 var c = map._first_item
343 while c != null do
344 if c._value == item then
345 map.remove_node(c._key)
346 return
347 end
348 c = c._next_item
349 end
350 end
351
352 redef fun remove_all(item)
353 do
354 var map = self.map
355 var c = map._first_item
356 while c != null do
357 if c._value == item then
358 map.remove_node(c._key)
359 end
360 c = c._next_item
361 end
362 end
363 end
364
365 private class HashMapNode[K, V]
366 super HashNode[K]
367 redef type N: HashMapNode[K, V]
368 var value: V
369 end
370
371 # A `MapIterator` over a `HashMap`.
372 private class HashMapIterator[K, V]
373 super MapIterator[K, V]
374 redef fun is_ok do return _node != null
375
376 redef fun item
377 do
378 assert is_ok
379 return _node._value
380 end
381
382 #redef fun item=(value)
383 #do
384 # assert is_ok
385 # _node.second = value
386 #end
387
388 redef fun key
389 do
390 assert is_ok
391 return _node._key
392 end
393
394 redef fun next
395 do
396 assert is_ok
397 _node = _node._next_item
398 end
399
400 # The map to iterate on
401 var map: HashMap[K, V]
402
403 # The current node
404 var node: nullable HashMapNode[K, V] = null
405
406 init
407 do
408 _map = map
409 _node = _map._first_item
410 end
411 end
412
413 # A `Set` implemented with a hash table.
414 # Keys of such a map cannot be null and require a working `hash` method
415 class HashSet[E]
416 super Set[E]
417 super HashCollection[E]
418
419 redef type N: HashSetNode[E] is fixed
420
421 redef fun length do return _the_length
422
423 redef fun is_empty do return _the_length == 0
424
425 redef fun first
426 do
427 assert _the_length > 0
428 return _first_item._key
429 end
430
431 redef fun has(item)
432 do
433 return node_at(item) != null
434 end
435
436 redef fun add(item)
437 do
438 if _capacity == 0 then enlarge(17) # 17 because magic in `store`
439 var i = index_at(item)
440 var c = node_at_idx(i, item)
441 if c != null then
442 c._key = item
443 else
444 store(i,new HashSetNode[E](item))
445 end
446 end
447
448 redef fun remove(item) do remove_node(item)
449
450 redef fun clear do raz
451
452 redef fun iterator do return new HashSetIterator[E](self)
453
454 init
455 do
456 _capacity = 0
457 _the_length = 0
458 end
459
460 # Build a list filled with the items of `coll`.
461 init from(coll: Collection[E]) do
462 init
463 add_all(coll)
464 end
465
466 redef fun new_set do return new HashSet[E]
467 end
468
469 private class HashSetNode[E]
470 super HashNode[E]
471 redef type N: HashSetNode[E]
472 end
473
474 private class HashSetIterator[E]
475 super Iterator[E]
476 redef fun is_ok do return _node != null
477
478 redef fun item
479 do
480 assert is_ok
481 return _node._key
482 end
483
484 redef fun next
485 do
486 assert is_ok
487 _node = _node._next_item
488 end
489
490 # The set to iterate on
491 var set: HashSet[E]
492
493 # The position in the internal map storage
494 var node: nullable HashSetNode[E] = null
495
496 init
497 do
498 _node = _set._first_item
499 end
500 end