8ee3491db615f4e516523283992243e9d22ee370
[nit.git] / lib / core / collection / hash_collection.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2009 Jean Privat <jean@pryen.org>
4 #
5 # This file is free software, which comes along with NIT. This software is
6 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
7 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
8 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
9 # is kept unaltered, and a notification of the changes is added.
10 # You are allowed to redistribute it and sell it, alone or is a part of
11 # another product.
12
13 # Introduce `HashMap` and `HashSet`.
14 module hash_collection
15
16 import array
17
18 redef class Map[K, V]
19 # Get a `HashMap[K, V]` as default implementation
20 new do return new HashMap[K, V]
21 end
22
23 # A HashCollection is an array of HashNode[K] indexed by the K hash value
24 private abstract class HashCollection[K]
25 type N: HashNode[K]
26
27 var array: nullable NativeArray[nullable N] = null # Used to store items
28 var capacity: Int = 0 # Size of _array
29 var the_length: Int = 0 # Number of items in the map
30
31 var first_item: nullable N = null # First added item (used to visit items in nice order)
32 var last_item: nullable N = null # Last added item (same)
33
34 # The last key accessed (used for cache)
35 var last_accessed_key: nullable Object = null
36
37 # The last node accessed (used for cache)
38 var last_accessed_node: nullable N = null
39
40 # Return the index of the key k
41 fun index_at(k: nullable Object): Int
42 do
43 if k == null then return 0
44
45 var i = k.hash % _capacity
46 if i < 0 then i = - i
47 return i
48 end
49
50 # Return the node associated with the key
51 fun node_at(k: nullable Object): nullable N
52 do
53 # cache: `is` is used instead of `==` because it is a faster filter (even if not exact)
54 if k.is_same_instance(_last_accessed_key) then return _last_accessed_node
55
56 var res = node_at_idx(index_at(k), k)
57 _last_accessed_key = k
58 _last_accessed_node = res
59 return res
60 end
61
62 # Return the node associated with the key (but with the index already known)
63 fun node_at_idx(i: Int, k: nullable Object): nullable N
64 do
65 var c = _array[i]
66 while c != null do
67 var ck = c._key
68 if ck.is_same_instance(k) or ck == k then # FIXME prefilter because the compiler is not smart enought yet
69 break
70 end
71 c = c._next_in_bucklet
72 end
73 return c
74 end
75
76 # Add a new node at a given index
77 fun store(index: Int, node: N)
78 do
79 # Store the item in the list
80 if _first_item == null then
81 _first_item = node
82 else
83 _last_item._next_item = node
84 end
85 node._prev_item = _last_item
86 node._next_item = null
87 _last_item = node
88
89 # Then store it in the array
90 var next = _array[index]
91 _array[index] = node
92 node._next_in_bucklet = next
93 if next != null then next._prev_in_bucklet = node
94
95 _last_accessed_key = node._key
96 _last_accessed_node = node
97
98 # Enlarge if needed
99 var l = _the_length
100 _the_length = l + 1
101
102 # Magic values determined empirically
103 # We do not want to enlarge too much
104 # We also want a odd capacity so that the modulo is more distributive
105 l = (l + 5) * 2 + 1
106 if l >= _capacity then
107 enlarge(l * 3 / 2 + 1)
108 end
109 end
110
111 # Remove the node assosiated with the key
112 fun remove_node(k: nullable Object)
113 do
114 var i = index_at(k)
115 var node = node_at_idx(i, k)
116 if node == null then return
117
118 # Remove the item in the list
119 var prev = node._prev_item
120 var next = node._next_item
121 if prev != null then
122 prev._next_item = next
123 else
124 _first_item = next
125 end
126 if next != null then
127 next._prev_item = prev
128 else
129 _last_item = prev
130 end
131
132 # Remove the item in the array
133 _the_length -= 1
134 prev = node._prev_in_bucklet
135 next = node._next_in_bucklet
136 if prev != null then
137 prev._next_in_bucklet = next
138 else
139 _array[i] = next
140 end
141 if next != null then
142 next._prev_in_bucklet = prev
143 end
144
145 _last_accessed_key = null
146 end
147
148 # Clear the whole structure
149 fun raz
150 do
151 var i = _capacity - 1
152 while i >= 0 do
153 _array[i] = null
154 i -= 1
155 end
156 _the_length = 0
157 _first_item = null
158 _last_item = null
159 _last_accessed_key = null
160 end
161
162 # Force a capacity
163 fun enlarge(cap: Int)
164 do
165 var old_cap = _capacity
166 # get a new capacity
167 if cap < _the_length + 1 then cap = _the_length + 1
168 if cap <= _capacity then return
169 _capacity = cap
170 _last_accessed_key = null
171
172 # get a new array
173 var new_array = new NativeArray[nullable N](cap)
174 _array = new_array
175
176 # clean the new array
177 var i = cap - 1
178 while i >=0 do
179 new_array[i] = null
180 i -= 1
181 end
182
183 if _capacity <= old_cap then return
184
185 # Reput items in the array
186 var node = _first_item
187 while node != null do
188 var index = index_at(node._key)
189 # Then store it in the array
190 var next = new_array[index]
191 new_array[index] = node
192 node._prev_in_bucklet = null
193 node._next_in_bucklet = next
194 if next != null then next._prev_in_bucklet = node
195 node = node._next_item
196 end
197 end
198 end
199
200 private abstract class HashNode[K]
201 var key: K
202 type N: HashNode[K]
203 var next_item: nullable N = null
204 var prev_item: nullable N = null
205 var prev_in_bucklet: nullable N = null
206 var next_in_bucklet: nullable N = null
207 end
208
209 # A `Map` implemented with a hash table.
210 #
211 # ~~~
212 # var map = new HashMap[nullable String, Int]
213 # map[null] = 0
214 # map["one"] = 1
215 # map["two"] = 2
216 #
217 # assert map[null] == 0
218 # assert map["one"] == 1
219 # assert map.keys.has("two")
220 # assert map.values.length == 3
221 # ~~~
222 class HashMap[K, V]
223 super Map[K, V]
224 super HashCollection[K]
225
226 redef type N: HashMapNode[K, V] is fixed
227
228 redef fun [](key)
229 do
230 var c = node_at(key)
231 if c == null then
232 return provide_default_value(key)
233 else
234 return c._value
235 end
236 end
237
238 redef fun get_or_null(key)
239 do
240 var c = node_at(key)
241 if c == null then
242 return null
243 else
244 return c._value
245 end
246 end
247
248 redef fun iterator: HashMapIterator[K, V] do return new HashMapIterator[K,V](self)
249
250 redef fun length do return _the_length
251
252 redef fun is_empty do return _the_length == 0
253
254 redef fun []=(key, v)
255 do
256 var i = index_at(key)
257 var c = node_at_idx(i, key)
258 if c != null then
259 c._key = key
260 c._value = v
261 else
262 store(i, new HashMapNode[K, V](key, v))
263 end
264 end
265
266 redef fun clear do raz
267
268 init
269 do
270 _capacity = 0
271 _the_length = 0
272 enlarge(0)
273 end
274
275 redef var keys: RemovableCollection[K] = new HashMapKeys[K, V](self) is lazy
276 redef var values: RemovableCollection[V] = new HashMapValues[K, V](self) is lazy
277 redef fun has_key(k) do return node_at(k) != null
278 end
279
280 # View of the keys of a HashMap
281 private class HashMapKeys[K, V]
282 super RemovableCollection[K]
283 # The original map
284 var map: HashMap[K, V]
285
286 redef fun count(k) do if self.has(k) then return 1 else return 0
287 redef fun first do return self.map._first_item._key
288 redef fun has(k) do return self.map.node_at(k) != null
289 redef fun has_only(k) do return (self.has(k) and self.length == 1) or self.is_empty
290 redef fun is_empty do return self.map.is_empty
291 redef fun length do return self.map.length
292
293 redef fun iterator do return new MapKeysIterator[K, V](self.map.iterator)
294
295 redef fun clear do self.map.clear
296
297 redef fun remove(key) do self.map.remove_node(key)
298 redef fun remove_all(key) do self.map.remove_node(key)
299 end
300
301 # View of the values of a Map
302 private class HashMapValues[K, V]
303 super RemovableCollection[V]
304 # The original map
305 var map: HashMap[K, V]
306
307 redef fun count(item)
308 do
309 var nb = 0
310 var c = self.map._first_item
311 while c != null do
312 if c._value == item then nb += 1
313 c = c._next_item
314 end
315 return nb
316 end
317 redef fun first do return self.map._first_item._value
318
319 redef fun has(item)
320 do
321 var c = self.map._first_item
322 while c != null do
323 if c._value == item then return true
324 c = c._next_item
325 end
326 return false
327 end
328
329 redef fun has_only(item)
330 do
331 var c = self.map._first_item
332 while c != null do
333 if c._value != item then return false
334 c = c._next_item
335 end
336 return true
337 end
338
339 redef fun is_empty do return self.map.is_empty
340 redef fun length do return self.map.length
341
342 redef fun iterator do return new MapValuesIterator[K, V](self.map.iterator)
343
344 redef fun clear do self.map.clear
345
346 redef fun remove(item)
347 do
348 var map = self.map
349 var c = map._first_item
350 while c != null do
351 if c._value == item then
352 map.remove_node(c._key)
353 return
354 end
355 c = c._next_item
356 end
357 end
358
359 redef fun remove_all(item)
360 do
361 var map = self.map
362 var c = map._first_item
363 while c != null do
364 if c._value == item then
365 map.remove_node(c._key)
366 end
367 c = c._next_item
368 end
369 end
370 end
371
372 private class HashMapNode[K, V]
373 super HashNode[K]
374 redef type N: HashMapNode[K, V]
375 var value: V
376 end
377
378 # A `MapIterator` over a `HashMap`.
379 private class HashMapIterator[K, V]
380 super MapIterator[K, V]
381 redef fun is_ok do return _node != null
382
383 redef fun item
384 do
385 assert is_ok
386 return _node._value
387 end
388
389 #redef fun item=(value)
390 #do
391 # assert is_ok
392 # _node.second = value
393 #end
394
395 redef fun key
396 do
397 assert is_ok
398 return _node._key
399 end
400
401 redef fun next
402 do
403 assert is_ok
404 _node = _node._next_item
405 end
406
407 # The map to iterate on
408 var map: HashMap[K, V]
409
410 # The current node
411 var node: nullable HashMapNode[K, V] = null
412
413 init
414 do
415 _map = map
416 _node = _map._first_item
417 end
418 end
419
420 # A `Set` implemented with a hash table.
421 # Keys of such a map cannot be null and require a working `hash` method
422 class HashSet[E]
423 super Set[E]
424 super HashCollection[E]
425
426 redef type N: HashSetNode[E] is fixed
427
428 redef fun length do return _the_length
429
430 redef fun is_empty do return _the_length == 0
431
432 redef fun first
433 do
434 assert _the_length > 0
435 return _first_item._key
436 end
437
438 redef fun has(item)
439 do
440 return node_at(item) != null
441 end
442
443 redef fun add(item)
444 do
445 var i = index_at(item)
446 var c = node_at_idx(i, item)
447 if c != null then
448 c._key = item
449 else
450 store(i,new HashSetNode[E](item))
451 end
452 end
453
454 redef fun remove(item) do remove_node(item)
455
456 redef fun clear do raz
457
458 redef fun iterator do return new HashSetIterator[E](self)
459
460 init
461 do
462 _capacity = 0
463 _the_length = 0
464 enlarge(0)
465 end
466
467 # Build a list filled with the items of `coll`.
468 init from(coll: Collection[E]) do
469 init
470 add_all(coll)
471 end
472
473 redef fun new_set do return new HashSet[E]
474 end
475
476 private class HashSetNode[E]
477 super HashNode[E]
478 redef type N: HashSetNode[E]
479 end
480
481 private class HashSetIterator[E]
482 super Iterator[E]
483 redef fun is_ok do return _node != null
484
485 redef fun item
486 do
487 assert is_ok
488 return _node._key
489 end
490
491 redef fun next
492 do
493 assert is_ok
494 _node = _node._next_item
495 end
496
497 # The set to iterate on
498 var set: HashSet[E]
499
500 # The position in the internal map storage
501 var node: nullable HashSetNode[E] = null
502
503 init
504 do
505 _node = _set._first_item
506 end
507 end