cae46fd0fb834737b72ff553682237ca24ef1daf
[nit.git] / lib / standard / collection / hash_collection.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2009 Jean Privat <jean@pryen.org>
4 #
5 # This file is free software, which comes along with NIT. This software is
6 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
7 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
8 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
9 # is kept unaltered, and a notification of the changes is added.
10 # You are allowed to redistribute it and sell it, alone or is a part of
11 # another product.
12
13 # Introduce Hashmap and Hashset.
14 module hash_collection
15
16 import array
17
18 # A HashCollection is an array of HashNode[K] indexed by the K hash value
19 private abstract class HashCollection[K]
20 type N: HashNode[K]
21
22 var array: nullable NativeArray[nullable N] = null # Used to store items
23 var capacity: Int = 0 # Size of _array
24 var the_length: Int = 0 # Number of items in the map
25
26 var first_item: nullable N = null # First added item (used to visit items in nice order)
27 var last_item: nullable N = null # Last added item (same)
28
29 # The last key accessed (used for cache)
30 var last_accessed_key: nullable K = null
31
32 # The last node accessed (used for cache)
33 var last_accessed_node: nullable N = null
34
35 # Return the index of the key k
36 fun index_at(k: K): Int
37 do
38 if k == null then return 0
39
40 var i = k.hash % _capacity
41 if i < 0 then i = - i
42 return i
43 end
44
45 # Return the node associated with the key
46 fun node_at(k: K): nullable N
47 do
48 # cache: `is` is used instead of `==` because it is a faster filter (even if not exact)
49 if k.is_same_instance(_last_accessed_key) then return _last_accessed_node
50
51 var res = node_at_idx(index_at(k), k)
52 _last_accessed_key = k
53 _last_accessed_node = res
54 return res
55 end
56
57 # Return the node associated with the key (but with the index already known)
58 fun node_at_idx(i: Int, k: K): nullable N
59 do
60 var c = _array[i]
61 while c != null do
62 var ck = c._key
63 if ck.is_same_instance(k) or ck == k then # FIXME prefilter because the compiler is not smart enought yet
64 break
65 end
66 c = c._next_in_bucklet
67 end
68 return c
69 end
70
71 # Add a new node at a given index
72 fun store(index: Int, node: N)
73 do
74 # Store the item in the list
75 if _first_item == null then
76 _first_item = node
77 else
78 _last_item._next_item = node
79 end
80 node._prev_item = _last_item
81 node._next_item = null
82 _last_item = node
83
84 # Then store it in the array
85 var next = _array[index]
86 _array[index] = node
87 node._next_in_bucklet = next
88 if next != null then next._prev_in_bucklet = node
89
90 _last_accessed_key = node._key
91 _last_accessed_node = node
92
93 # Enlarge if needed
94 var l = _the_length
95 _the_length = l + 1
96
97 # Magic values determined empirically
98 # We do not want to enlarge too much
99 # We also want a odd capacity so that the modulo is more distributive
100 l = (l + 5) * 2 + 1
101 if l >= _capacity then
102 enlarge(l * 3 / 2 + 1)
103 end
104 end
105
106 # Remove the node assosiated with the key
107 fun remove_node(k: K)
108 do
109 var i = index_at(k)
110 var node = node_at_idx(i, k)
111 if node == null then return
112
113 # Remove the item in the list
114 var prev = node._prev_item
115 var next = node._next_item
116 if prev != null then
117 prev._next_item = next
118 else
119 _first_item = next
120 end
121 if next != null then
122 next._prev_item = prev
123 else
124 _last_item = prev
125 end
126
127 # Remove the item in the array
128 _the_length -= 1
129 prev = node._prev_in_bucklet
130 next = node._next_in_bucklet
131 if prev != null then
132 prev._next_in_bucklet = next
133 else
134 _array[i] = next
135 end
136 if next != null then
137 next._prev_in_bucklet = prev
138 end
139
140 _last_accessed_key = null
141 end
142
143 # Clear the whole structure
144 fun raz
145 do
146 var i = _capacity - 1
147 while i >= 0 do
148 _array[i] = null
149 i -= 1
150 end
151 _the_length = 0
152 _first_item = null
153 _last_item = null
154 _last_accessed_key = null
155 end
156
157 # Force a capacity
158 fun enlarge(cap: Int)
159 do
160 var old_cap = _capacity
161 # get a new capacity
162 if cap < _the_length + 1 then cap = _the_length + 1
163 if cap <= _capacity then return
164 _capacity = cap
165 _last_accessed_key = null
166
167 # get a new array
168 var new_array = new NativeArray[nullable N](cap)
169 _array = new_array
170
171 # clean the new array
172 var i = cap - 1
173 while i >=0 do
174 new_array[i] = null
175 i -= 1
176 end
177
178 if _capacity <= old_cap then return
179
180 # Reput items in the array
181 var node = _first_item
182 while node != null do
183 var index = index_at(node._key)
184 # Then store it in the array
185 var next = new_array[index]
186 new_array[index] = node
187 node._prev_in_bucklet = null
188 node._next_in_bucklet = next
189 if next != null then next._prev_in_bucklet = node
190 node = node._next_item
191 end
192 end
193 end
194
195 private abstract class HashNode[K]
196 var key: K
197 type N: HashNode[K]
198 var next_item: nullable N = null
199 var prev_item: nullable N = null
200 var prev_in_bucklet: nullable N = null
201 var next_in_bucklet: nullable N = null
202 end
203
204 # A `Map` implemented with a hash table.
205 #
206 # ~~~
207 # var map = new HashMap[nullable String, Int]
208 # map[null] = 0
209 # map["one"] = 1
210 # map["two"] = 2
211 #
212 # assert map[null] == 0
213 # assert map["one"] == 1
214 # assert map.keys.has("two")
215 # assert map.values.length == 3
216 # ~~~
217 class HashMap[K, V]
218 super Map[K, V]
219 super HashCollection[K]
220
221 redef type N: HashMapNode[K, V] is fixed
222
223 redef fun [](key)
224 do
225 var c = node_at(key)
226 if c == null then
227 return provide_default_value(key)
228 else
229 return c._value
230 end
231 end
232
233 redef fun iterator: HashMapIterator[K, V] do return new HashMapIterator[K,V](self)
234
235 redef fun length do return _the_length
236
237 redef fun is_empty do return _the_length == 0
238
239 redef fun []=(key, v)
240 do
241 var i = index_at(key)
242 var c = node_at_idx(i, key)
243 if c != null then
244 c._key = key
245 c._value = v
246 else
247 store(i, new HashMapNode[K, V](key, v))
248 end
249 end
250
251 redef fun clear do raz
252
253 init
254 do
255 _capacity = 0
256 _the_length = 0
257 enlarge(0)
258 end
259
260 redef var keys: RemovableCollection[K] = new HashMapKeys[K, V](self)
261 redef var values: RemovableCollection[V] = new HashMapValues[K, V](self)
262 end
263
264 # View of the keys of a HashMap
265 private class HashMapKeys[K, V]
266 super RemovableCollection[K]
267 # The original map
268 var map: HashMap[K, V]
269
270 redef fun count(k) do if self.has(k) then return 1 else return 0
271 redef fun first do return self.map._first_item._key
272 redef fun has(k) do return self.map.node_at(k) != null
273 redef fun has_only(k) do return (self.has(k) and self.length == 1) or self.is_empty
274 redef fun is_empty do return self.map.is_empty
275 redef fun length do return self.map.length
276
277 redef fun iterator do return new MapKeysIterator[K, V](self.map.iterator)
278
279 redef fun clear do self.map.clear
280
281 redef fun remove(key) do self.map.remove_node(key)
282 redef fun remove_all(key) do self.map.remove_node(key)
283 end
284
285 # View of the values of a Map
286 private class HashMapValues[K, V]
287 super RemovableCollection[V]
288 # The original map
289 var map: HashMap[K, V]
290
291 redef fun count(item)
292 do
293 var nb = 0
294 var c = self.map._first_item
295 while c != null do
296 if c._value == item then nb += 1
297 c = c._next_item
298 end
299 return nb
300 end
301 redef fun first do return self.map._first_item._value
302
303 redef fun has(item)
304 do
305 var c = self.map._first_item
306 while c != null do
307 if c._value == item then return true
308 c = c._next_item
309 end
310 return false
311 end
312
313 redef fun has_only(item)
314 do
315 var c = self.map._first_item
316 while c != null do
317 if c._value != item then return false
318 c = c._next_item
319 end
320 return true
321 end
322
323 redef fun is_empty do return self.map.is_empty
324 redef fun length do return self.map.length
325
326 redef fun iterator do return new MapValuesIterator[K, V](self.map.iterator)
327
328 redef fun clear do self.map.clear
329
330 redef fun remove(item)
331 do
332 var map = self.map
333 var c = map._first_item
334 while c != null do
335 if c._value == item then
336 map.remove_node(c._key)
337 return
338 end
339 c = c._next_item
340 end
341 end
342
343 redef fun remove_all(item)
344 do
345 var map = self.map
346 var c = map._first_item
347 while c != null do
348 if c._value == item then
349 map.remove_node(c._key)
350 end
351 c = c._next_item
352 end
353 end
354 end
355
356 private class HashMapNode[K, V]
357 super HashNode[K]
358 redef type N: HashMapNode[K, V]
359 var value: V
360 end
361
362 # A `MapIterator` over a `HashMap`.
363 class HashMapIterator[K, V]
364 super MapIterator[K, V]
365 redef fun is_ok do return _node != null
366
367 redef fun item
368 do
369 assert is_ok
370 return _node._value
371 end
372
373 #redef fun item=(value)
374 #do
375 # assert is_ok
376 # _node.second = value
377 #end
378
379 redef fun key
380 do
381 assert is_ok
382 return _node._key
383 end
384
385 redef fun next
386 do
387 assert is_ok
388 _node = _node._next_item
389 end
390
391 # The map to iterate on
392 private var map: HashMap[K, V]
393
394 # The current node
395 private var node: nullable HashMapNode[K, V] = null
396
397 init
398 do
399 _map = map
400 _node = _map._first_item
401 end
402 end
403
404 # A `Set` implemented with a hash table.
405 # Keys of such a map cannot be null and require a working `hash` method
406 class HashSet[E: Object]
407 super Set[E]
408 super HashCollection[E]
409
410 redef type N: HashSetNode[E] is fixed
411
412 redef fun length do return _the_length
413
414 redef fun is_empty do return _the_length == 0
415
416 redef fun first
417 do
418 assert _the_length > 0
419 return _first_item._key
420 end
421
422 redef fun has(item)
423 do
424 return node_at(item) != null
425 end
426
427 redef fun add(item)
428 do
429 var i = index_at(item)
430 var c = node_at_idx(i, item)
431 if c != null then
432 c._key = item
433 else
434 store(i,new HashSetNode[E](item))
435 end
436 end
437
438 redef fun remove(item) do remove_node(item)
439
440 redef fun clear do raz
441
442 redef fun iterator do return new HashSetIterator[E](self)
443
444 init
445 do
446 _capacity = 0
447 _the_length = 0
448 enlarge(0)
449 end
450
451 # Build a list filled with the items of `coll`.
452 init from(coll: Collection[E]) do
453 init
454 add_all(coll)
455 end
456
457 redef fun new_set do return new HashSet[E]
458 end
459
460 private class HashSetNode[E: Object]
461 super HashNode[E]
462 redef type N: HashSetNode[E]
463 end
464
465 private class HashSetIterator[E: Object]
466 super Iterator[E]
467 redef fun is_ok do return _node != null
468
469 redef fun item
470 do
471 assert is_ok
472 return _node._key
473 end
474
475 redef fun next
476 do
477 assert is_ok
478 _node = _node._next_item
479 end
480
481 # The set to iterate on
482 var set: HashSet[E]
483
484 # The position in the internal map storage
485 var node: nullable HashSetNode[E] = null
486
487 init
488 do
489 _node = _set._first_item
490 end
491 end
492