lib/core: add a factory to `Set` to instantiate a `HashSet` by default
[nit.git] / lib / core / collection / hash_collection.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2009 Jean Privat <jean@pryen.org>
4 #
5 # This file is free software, which comes along with NIT. This software is
6 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
7 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
8 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
9 # is kept unaltered, and a notification of the changes is added.
10 # You are allowed to redistribute it and sell it, alone or is a part of
11 # another product.
12
13 # Introduce `HashMap` and `HashSet`.
14 module hash_collection
15
16 import array
17
18 redef class Map[K, V]
19 # Get a `HashMap[K, V]` as default implementation
20 new do return new HashMap[K, V]
21 end
22
23 redef class Set[E]
24 # Get an instance of `HashMap[K, V]`, the default implementation
25 new do return new HashSet[E]
26 end
27
28 # A HashCollection is an array of HashNode[K] indexed by the K hash value
29 private abstract class HashCollection[K]
30 type N: HashNode[K]
31
32 var array: NativeArray[nullable N] is noautoinit # Used to store items
33 var capacity: Int = 0 # Size of _array
34 var the_length: Int = 0 # Number of items in the map
35
36 var first_item: nullable N = null # First added item (used to visit items in nice order)
37 var last_item: nullable N = null # Last added item (same)
38
39 # The last key accessed (used for cache)
40 var last_accessed_key: nullable Object = null
41
42 # The last node accessed (used for cache)
43 var last_accessed_node: nullable N = null
44
45 # Return the index of the key k
46 fun index_at(k: nullable Object): Int
47 do
48 if k == null then return 0
49
50 var i = k.hash % _capacity
51 if i < 0 then i = - i
52 return i
53 end
54
55 # Return the node associated with the key
56 fun node_at(k: nullable Object): nullable N
57 do
58 if _the_length == 0 then return null
59 # cache: `is` is used instead of `==` because it is a faster filter (even if not exact)
60 if k.is_same_instance(_last_accessed_key) then return _last_accessed_node
61
62 var res = node_at_idx(index_at(k), k)
63 _last_accessed_key = k
64 _last_accessed_node = res
65 return res
66 end
67
68 # Return the node associated with the key (but with the index already known)
69 fun node_at_idx(i: Int, k: nullable Object): nullable N
70 do
71 if _the_length == 0 then return null
72 var c = _array[i]
73 while c != null do
74 var ck = c._key
75 if ck.is_same_instance(k) or ck == k then # FIXME prefilter because the compiler is not smart enought yet
76 break
77 end
78 c = c._next_in_bucklet
79 end
80 return c
81 end
82
83 # Add a new node at a given index
84 fun store(index: Int, node: N)
85 do
86 # Store the item in the list
87 if _first_item == null then
88 _first_item = node
89 else
90 _last_item._next_item = node
91 end
92 node._prev_item = _last_item
93 node._next_item = null
94 _last_item = node
95
96 # Then store it in the array
97 var next = _array[index]
98 _array[index] = node
99 node._next_in_bucklet = next
100 if next != null then next._prev_in_bucklet = node
101
102 _last_accessed_key = node._key
103 _last_accessed_node = node
104
105 # Enlarge if needed
106 var l = _the_length
107 _the_length = l + 1
108
109 # Magic values determined empirically
110 # We do not want to enlarge too much
111 # We also want a odd capacity so that the modulo is more distributive
112 l = (l + 5) * 2 + 1
113 if l >= _capacity then
114 enlarge(l * 3 / 2 + 1)
115 end
116 end
117
118 # Remove the node assosiated with the key
119 fun remove_node(k: nullable Object)
120 do
121 if _the_length == 0 then return
122 var i = index_at(k)
123 var node = node_at_idx(i, k)
124 if node == null then return
125
126 # Remove the item in the list
127 var prev = node._prev_item
128 var next = node._next_item
129 if prev != null then
130 prev._next_item = next
131 else
132 _first_item = next
133 end
134 if next != null then
135 next._prev_item = prev
136 else
137 _last_item = prev
138 end
139
140 # Remove the item in the array
141 _the_length -= 1
142 prev = node._prev_in_bucklet
143 next = node._next_in_bucklet
144 if prev != null then
145 prev._next_in_bucklet = next
146 else
147 _array[i] = next
148 end
149 if next != null then
150 next._prev_in_bucklet = prev
151 end
152
153 _last_accessed_key = null
154 end
155
156 # Clear the whole structure
157 fun raz
158 do
159 var i = _capacity - 1
160 while i >= 0 do
161 _array[i] = null
162 i -= 1
163 end
164 _the_length = 0
165 _first_item = null
166 _last_item = null
167 _last_accessed_key = null
168 end
169
170 # Force a capacity
171 fun enlarge(cap: Int)
172 do
173 # get a new capacity
174 if cap < _the_length + 1 then cap = _the_length + 1
175 if cap <= _capacity then return
176 _capacity = cap
177 _last_accessed_key = null
178
179 # get a new array
180 var new_array = new NativeArray[nullable N](cap)
181 _array = new_array
182
183 # Reput items in the array
184 var node = _first_item
185 while node != null do
186 var index = index_at(node._key)
187 # Then store it in the array
188 var next = new_array[index]
189 new_array[index] = node
190 node._prev_in_bucklet = null
191 node._next_in_bucklet = next
192 if next != null then next._prev_in_bucklet = node
193 node = node._next_item
194 end
195 end
196 end
197
198 private abstract class HashNode[K]
199 var key: K
200 type N: HashNode[K]
201 var next_item: nullable N = null
202 var prev_item: nullable N = null
203 var prev_in_bucklet: nullable N = null
204 var next_in_bucklet: nullable N = null
205 end
206
207 # A `Map` implemented with a hash table.
208 #
209 # ~~~
210 # var map = new HashMap[nullable String, Int]
211 # map[null] = 0
212 # map["one"] = 1
213 # map["two"] = 2
214 #
215 # assert map[null] == 0
216 # assert map["one"] == 1
217 # assert map.keys.has("two")
218 # assert map.values.length == 3
219 # ~~~
220 class HashMap[K, V]
221 super Map[K, V]
222 super HashCollection[K]
223
224 redef type N: HashMapNode[K, V] is fixed
225
226 redef fun [](key)
227 do
228 var c = node_at(key)
229 if c == null then
230 return provide_default_value(key)
231 else
232 return c._value
233 end
234 end
235
236 redef fun get_or_null(key)
237 do
238 var c = node_at(key)
239 if c == null then
240 return null
241 else
242 return c._value
243 end
244 end
245
246 redef fun iterator do return new HashMapIterator[K,V](self)
247
248 redef fun length do return _the_length
249
250 redef fun is_empty do return _the_length == 0
251
252 redef fun []=(key, v)
253 do
254 if _capacity == 0 then enlarge(17) # 17 because magic in `store`
255 var i = index_at(key)
256 var c = node_at_idx(i, key)
257 if c != null then
258 c._key = key
259 c._value = v
260 else
261 store(i, new HashMapNode[K, V](key, v))
262 end
263 end
264
265 redef fun clear do raz
266
267 init
268 do
269 _capacity = 0
270 _the_length = 0
271 end
272
273 # Build a list filled with the items of `coll`.
274 init from(coll: Map[K, V]) do
275 init
276 recover_with(coll)
277 end
278
279 redef var keys: RemovableCollection[K] = new HashMapKeys[K, V](self) is lazy
280 redef var values: RemovableCollection[V] = new HashMapValues[K, V](self) is lazy
281 redef fun has_key(k) do return node_at(k) != null
282 end
283
284 # View of the keys of a HashMap
285 private class HashMapKeys[K, V]
286 super RemovableCollection[K]
287 # The original map
288 var map: HashMap[K, V]
289
290 redef fun count(k) do if self.has(k) then return 1 else return 0
291 redef fun first do return self.map._first_item._key
292 redef fun has(k) do return self.map.node_at(k) != null
293 redef fun has_only(k) do return (self.has(k) and self.length == 1) or self.is_empty
294 redef fun is_empty do return self.map.is_empty
295 redef fun length do return self.map.length
296
297 redef fun iterator do return new MapKeysIterator[K, V](self.map.iterator)
298
299 redef fun clear do self.map.clear
300
301 redef fun remove(key) do self.map.remove_node(key)
302 redef fun remove_all(key) do self.map.remove_node(key)
303 end
304
305 # View of the values of a Map
306 private class HashMapValues[K, V]
307 super RemovableCollection[V]
308 # The original map
309 var map: HashMap[K, V]
310
311 redef fun count(item)
312 do
313 var nb = 0
314 var c = self.map._first_item
315 while c != null do
316 if c._value == item then nb += 1
317 c = c._next_item
318 end
319 return nb
320 end
321 redef fun first do return self.map._first_item._value
322
323 redef fun has(item)
324 do
325 var c = self.map._first_item
326 while c != null do
327 if c._value == item then return true
328 c = c._next_item
329 end
330 return false
331 end
332
333 redef fun has_only(item)
334 do
335 var c = self.map._first_item
336 while c != null do
337 if c._value != item then return false
338 c = c._next_item
339 end
340 return true
341 end
342
343 redef fun is_empty do return self.map.is_empty
344 redef fun length do return self.map.length
345
346 redef fun iterator do return new MapValuesIterator[K, V](self.map.iterator)
347
348 redef fun clear do self.map.clear
349
350 redef fun remove(item)
351 do
352 var map = self.map
353 var c = map._first_item
354 while c != null do
355 if c._value == item then
356 map.remove_node(c._key)
357 return
358 end
359 c = c._next_item
360 end
361 end
362
363 redef fun remove_all(item)
364 do
365 var map = self.map
366 var c = map._first_item
367 while c != null do
368 if c._value == item then
369 map.remove_node(c._key)
370 end
371 c = c._next_item
372 end
373 end
374 end
375
376 private class HashMapNode[K, V]
377 super HashNode[K]
378 redef type N: HashMapNode[K, V]
379 var value: V
380 end
381
382 # A `MapIterator` over a `HashMap`.
383 private class HashMapIterator[K, V]
384 super MapIterator[K, V]
385 redef fun is_ok do return _node != null
386
387 redef fun item
388 do
389 assert is_ok
390 return _node._value
391 end
392
393 #redef fun item=(value)
394 #do
395 # assert is_ok
396 # _node.second = value
397 #end
398
399 redef fun key
400 do
401 assert is_ok
402 return _node._key
403 end
404
405 redef fun next
406 do
407 assert is_ok
408 _node = _node._next_item
409 end
410
411 # The map to iterate on
412 var map: HashMap[K, V]
413
414 # The current node
415 var node: nullable HashMapNode[K, V] = null
416
417 init
418 do
419 _map = map
420 _node = _map._first_item
421 end
422 end
423
424 # A `Set` implemented with a hash table.
425 # Keys of such a map cannot be null and require a working `hash` method
426 class HashSet[E]
427 super Set[E]
428 super HashCollection[E]
429
430 redef type N: HashSetNode[E] is fixed
431
432 redef fun length do return _the_length
433
434 redef fun is_empty do return _the_length == 0
435
436 redef fun first
437 do
438 assert _the_length > 0
439 return _first_item._key
440 end
441
442 redef fun has(item)
443 do
444 return node_at(item) != null
445 end
446
447 redef fun add(item)
448 do
449 if _capacity == 0 then enlarge(17) # 17 because magic in `store`
450 var i = index_at(item)
451 var c = node_at_idx(i, item)
452 if c != null then
453 c._key = item
454 else
455 store(i,new HashSetNode[E](item))
456 end
457 end
458
459 redef fun remove(item) do remove_node(item)
460
461 redef fun clear do raz
462
463 redef fun iterator do return new HashSetIterator[E](self)
464
465 init
466 do
467 _capacity = 0
468 _the_length = 0
469 end
470
471 # Build a list filled with the items of `coll`.
472 init from(coll: Collection[E]) do
473 init
474 add_all(coll)
475 end
476
477 redef fun new_set do return new HashSet[E]
478 end
479
480 private class HashSetNode[E]
481 super HashNode[E]
482 redef type N: HashSetNode[E]
483 end
484
485 private class HashSetIterator[E]
486 super Iterator[E]
487 redef fun is_ok do return _node != null
488
489 redef fun item
490 do
491 assert is_ok
492 return _node._key
493 end
494
495 redef fun next
496 do
497 assert is_ok
498 _node = _node._next_item
499 end
500
501 # The set to iterate on
502 var set: HashSet[E]
503
504 # The position in the internal map storage
505 var node: nullable HashSetNode[E] = null
506
507 init
508 do
509 _node = _set._first_item
510 end
511 end