8e27c806f4fcc5568c17d67c8f1d592be6b38b9f
[nit.git] / lib / standard / collection / hash_collection.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2004-2009 Jean Privat <jean@pryen.org>
4 #
5 # This file is free software, which comes along with NIT. This software is
6 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
7 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
8 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
9 # is kept unaltered, and a notification of the changes is added.
10 # You are allowed to redistribute it and sell it, alone or is a part of
11 # another product.
12
13 # Introduce Hashmap and Hashset.
14 module hash_collection
15
16 import array
17
18 # A HashCollection is an array of HashNode[K] indexed by the K hash value
19 private abstract class HashCollection[K: Object, N: HashNode[Object]]
20
21 private var array: nullable NativeArray[nullable N] = null # Used to store items
22 private var capacity: Int = 0 # Size of _array
23 private var the_length: Int = 0 # Number of items in the map
24
25 private var first_item: nullable N = null # First added item (used to visit items in nice order)
26 private var last_item: nullable N = null # Last added item (same)
27
28 # The last key accessed (used for cache)
29 private var last_accessed_key: nullable K = null
30
31 # The last node accessed (used for cache)
32 private var last_accessed_node: nullable N = null
33
34 # Return the index of the key k
35 fun index_at(k: K): Int
36 do
37 var i = k.hash % _capacity
38 if i < 0 then i = - i
39 return i
40 end
41
42 # Return the node assosiated with the key
43 fun node_at(k: K): nullable N
44 do
45 # cache: `is` is used instead of `==` because it is a faster filter (even if not exact)
46 if k.is_same_instance(_last_accessed_key) then return _last_accessed_node
47
48 var res = node_at_idx(index_at(k), k)
49 _last_accessed_key = k
50 _last_accessed_node = res
51 return res
52 end
53
54 # Return the node assosiated with the key (but with the index already known)
55 fun node_at_idx(i: Int, k: K): nullable N
56 do
57 var c = _array[i]
58 while c != null do
59 var ck = c._key
60 if ck.is_same_instance(k) or ck == k then # FIXME prefilter because the compiler is not smart enought yet
61 break
62 end
63 c = c._next_in_bucklet
64 end
65 return c
66 end
67
68 # Add a new node at a given index
69 fun store(index: Int, node: N)
70 do
71 # Store the item in the list
72 if _first_item == null then
73 _first_item = node
74 else
75 _last_item._next_item = node
76 end
77 node._prev_item = _last_item
78 node._next_item = null
79 _last_item = node
80
81 # Then store it in the array
82 var next = _array[index]
83 _array[index] = node
84 node._next_in_bucklet = next
85 if next != null then next._prev_in_bucklet = node
86
87 _last_accessed_key = node._key
88 _last_accessed_node = node
89
90 # Enlarge if needed
91 var l = _the_length
92 _the_length = l + 1
93
94 # Magic values determined empirically
95 # We do not want to enlarge too much
96 # We also want a odd capacity so that the modulo is more distributive
97 l = (l + 5) * 2 + 1
98 if l >= _capacity then
99 enlarge(l * 3 / 2 + 1)
100 end
101 end
102
103 # Remove the node assosiated with the key
104 fun remove_node(k: K)
105 do
106 var i = index_at(k)
107 var node = node_at_idx(i, k)
108 if node == null then return
109
110 # Remove the item in the list
111 var prev = node._prev_item
112 var next = node._next_item
113 if prev != null then
114 prev._next_item = next
115 else
116 _first_item = next
117 end
118 if next != null then
119 next._prev_item = prev
120 else
121 _last_item = prev
122 end
123
124 # Remove the item in the array
125 _the_length -= 1
126 prev = node._prev_in_bucklet
127 next = node._next_in_bucklet
128 if prev != null then
129 prev._next_in_bucklet = next
130 else
131 _array[i] = next
132 end
133 if next != null then
134 next._prev_in_bucklet = prev
135 end
136
137 _last_accessed_key = null
138 end
139
140 # Clear the whole structure
141 fun raz
142 do
143 var i = _capacity - 1
144 while i >= 0 do
145 _array[i] = null
146 i -= 1
147 end
148 _the_length = 0
149 _first_item = null
150 _last_item = null
151 _last_accessed_key = null
152 end
153
154 # Force a capacity
155 fun enlarge(cap: Int)
156 do
157 var old_cap = _capacity
158 # get a new capacity
159 if cap < _the_length + 1 then cap = _the_length + 1
160 if cap <= _capacity then return
161 _capacity = cap
162 _last_accessed_key = null
163
164 # get a new array
165 var new_array = new NativeArray[nullable N](cap)
166 _array = new_array
167
168 # clean the new array
169 var i = cap - 1
170 while i >=0 do
171 new_array[i] = null
172 i -= 1
173 end
174
175 if _capacity <= old_cap then return
176
177 # Reput items in the array
178 var node = _first_item
179 while node != null do
180 var index = index_at(node._key)
181 # Then store it in the array
182 var next = new_array[index]
183 new_array[index] = node
184 node._prev_in_bucklet = null
185 node._next_in_bucklet = next
186 if next != null then next._prev_in_bucklet = node
187 node = node._next_item
188 end
189 end
190 end
191
192 private abstract class HashNode[K: Object]
193 private var key: K
194 type N: HashNode[K]
195 private var next_item: nullable N = null
196 private var prev_item: nullable N = null
197 private var prev_in_bucklet: nullable N = null
198 private var next_in_bucklet: nullable N = null
199 end
200
201 # A map implemented with a hash table.
202 # Keys of such a map cannot be null and require a working `hash` method
203 class HashMap[K: Object, V]
204 super Map[K, V]
205 super HashCollection[K, HashMapNode[K, V]]
206
207 redef fun [](key)
208 do
209 var c = node_at(key)
210 if c == null then
211 return provide_default_value(key)
212 else
213 return c._value
214 end
215 end
216
217 redef fun iterator: HashMapIterator[K, V] do return new HashMapIterator[K,V](self)
218
219 redef fun length do return _the_length
220
221 redef fun is_empty do return _the_length == 0
222
223 redef fun []=(key, v)
224 do
225 var i = index_at(key)
226 var c = node_at_idx(i, key)
227 if c != null then
228 c._key = key
229 c._value = v
230 else
231 store(i, new HashMapNode[K, V](key, v))
232 end
233 end
234
235 redef fun clear do raz
236
237 init
238 do
239 _capacity = 0
240 _the_length = 0
241 enlarge(0)
242 end
243
244 redef var keys: RemovableCollection[K] = new HashMapKeys[K, V](self)
245 redef var values: RemovableCollection[V] = new HashMapValues[K, V](self)
246 end
247
248 # View of the keys of a HashMap
249 private class HashMapKeys[K: Object, V]
250 super RemovableCollection[K]
251 # The original map
252 var map: HashMap[K, V]
253
254 redef fun count(k) do if self.has(k) then return 1 else return 0
255 redef fun first do return self.map._first_item._key
256 redef fun has(k) do return self.map.node_at(k) != null
257 redef fun has_only(k) do return (self.has(k) and self.length == 1) or self.is_empty
258 redef fun is_empty do return self.map.is_empty
259 redef fun length do return self.map.length
260
261 redef fun iterator do return new MapKeysIterator[K, V](self.map.iterator)
262
263 redef fun clear do self.map.clear
264
265 redef fun remove(key) do self.map.remove_node(key)
266 redef fun remove_all(key) do self.map.remove_node(key)
267 end
268
269 # View of the values of a Map
270 private class HashMapValues[K: Object, V]
271 super RemovableCollection[V]
272 # The original map
273 var map: HashMap[K, V]
274
275 redef fun count(item)
276 do
277 var nb = 0
278 var c = self.map._first_item
279 while c != null do
280 if c._value == item then nb += 1
281 c = c._next_item
282 end
283 return nb
284 end
285 redef fun first do return self.map._first_item._value
286
287 redef fun has(item)
288 do
289 var c = self.map._first_item
290 while c != null do
291 if c._value == item then return true
292 c = c._next_item
293 end
294 return false
295 end
296
297 redef fun has_only(item)
298 do
299 var c = self.map._first_item
300 while c != null do
301 if c._value != item then return false
302 c = c._next_item
303 end
304 return true
305 end
306
307 redef fun is_empty do return self.map.is_empty
308 redef fun length do return self.map.length
309
310 redef fun iterator do return new MapValuesIterator[K, V](self.map.iterator)
311
312 redef fun clear do self.map.clear
313
314 redef fun remove(item)
315 do
316 var map = self.map
317 var c = map._first_item
318 while c != null do
319 if c._value == item then
320 map.remove_node(c._key)
321 return
322 end
323 c = c._next_item
324 end
325 end
326
327 redef fun remove_all(item)
328 do
329 var map = self.map
330 var c = map._first_item
331 while c != null do
332 if c._value == item then
333 map.remove_node(c._key)
334 end
335 c = c._next_item
336 end
337 end
338 end
339
340 private class HashMapNode[K: Object, V]
341 super HashNode[K]
342 redef type N: HashMapNode[K, V]
343 private var value: V
344 end
345
346 class HashMapIterator[K: Object, V]
347 super MapIterator[K, V]
348 redef fun is_ok do return _node != null
349
350 redef fun item
351 do
352 assert is_ok
353 return _node._value
354 end
355
356 #redef fun item=(value)
357 #do
358 # assert is_ok
359 # _node.second = value
360 #end
361
362 redef fun key
363 do
364 assert is_ok
365 return _node._key
366 end
367
368 redef fun next
369 do
370 assert is_ok
371 _node = _node._next_item
372 end
373
374 # The map to iterate on
375 private var map: HashMap[K, V]
376
377 # The current node
378 private var node: nullable HashMapNode[K, V] = null
379
380 init
381 do
382 _map = map
383 _node = _map._first_item
384 end
385 end
386
387 # A `Set` implemented with a hash table.
388 # Keys of such a map cannot be null and require a working `hash` method
389 class HashSet[E: Object]
390 super Set[E]
391 super HashCollection[E, HashSetNode[E]]
392
393 redef fun length do return _the_length
394
395 redef fun is_empty do return _the_length == 0
396
397 redef fun first
398 do
399 assert _the_length > 0
400 return _first_item._key
401 end
402
403 redef fun has(item)
404 do
405 return node_at(item) != null
406 end
407
408 redef fun add(item)
409 do
410 var i = index_at(item)
411 var c = node_at_idx(i, item)
412 if c != null then
413 c._key = item
414 else
415 store(i,new HashSetNode[E](item))
416 end
417 end
418
419 redef fun remove(item) do remove_node(item)
420
421 redef fun clear do raz
422
423 redef fun iterator do return new HashSetIterator[E](self)
424
425 init
426 do
427 _capacity = 0
428 _the_length = 0
429 enlarge(0)
430 end
431
432 # Build a list filled with the items of `coll`.
433 init from(coll: Collection[E]) do
434 init
435 add_all(coll)
436 end
437
438 redef fun new_set do return new HashSet[E]
439 end
440
441 private class HashSetNode[E: Object]
442 super HashNode[E]
443 redef type N: HashSetNode[E]
444 end
445
446 private class HashSetIterator[E: Object]
447 super Iterator[E]
448 redef fun is_ok do return _node != null
449
450 redef fun item
451 do
452 assert is_ok
453 return _node._key
454 end
455
456 redef fun next
457 do
458 assert is_ok
459 _node = _node._next_item
460 end
461
462 # The set to iterate on
463 private var set: HashSet[E]
464
465 # The position in the internal map storage
466 private var node: nullable HashSetNode[E] = null
467
468 init
469 do
470 _node = _set._first_item
471 end
472 end
473