README: document nit_env.sh
[nit.git] / lib / more_collections.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Highly specific, but useful, collections-related classes.
16 module more_collections is serialize
17
18 import serialization
19
20 # Simple way to store an `HashMap[K, Array[V]]`
21 #
22 # Unlike standard HashMap, MultiHashMap provides a new
23 # empty array on the first access on a unknown key.
24 #
25 # var m = new MultiHashMap[String, Char]
26 # assert not m.has_key("four")
27 # m["four"].add('i')
28 # m["four"].add('i')
29 # m["four"].add('i')
30 # m["four"].add('i')
31 # assert m.has_key("four")
32 # assert m["four"] == ['i', 'i', 'i', 'i']
33 # assert m["zzz"] == new Array[Char]
34 class MultiHashMap[K, V]
35 super HashMap[K, Array[V]]
36
37 # Add `v` to the array associated with `k`.
38 # If there is no array associated, then create it.
39 fun add_one(k: K, v: V)
40 do
41 var x = self.get_or_null(k)
42 if x != null then
43 x.add(v)
44 else
45 self[k] = [v]
46 end
47 end
48
49 redef fun provide_default_value(key) do
50 var res = new Array[V]
51 self[key] = res
52 return res
53 end
54 end
55
56 # Simple way to store an `HashMap[K1, HashMap[K2, V]]`
57 #
58 # ~~~~
59 # var hm2 = new HashMap2[Int, String, Float]
60 # hm2[1, "one"] = 1.0
61 # hm2[2, "two"] = 2.0
62 # assert hm2[1, "one"] == 1.0
63 # assert hm2[2, "not-two"] == null
64 # ~~~~
65 class HashMap2[K1, K2, V]
66
67 private var level1 = new HashMap[K1, HashMap[K2, V]]
68
69 # Return the value associated to the keys `k1` and `k2`.
70 # Return `null` if no such a value.
71 fun [](k1: K1, k2: K2): nullable V
72 do
73 var level1 = self.level1
74 var level2 = level1.get_or_null(k1)
75 if level2 == null then return null
76 return level2.get_or_null(k2)
77 end
78
79 # Set `v` the value associated to the keys `k1` and `k2`.
80 fun []=(k1: K1, k2: K2, v: V)
81 do
82 var level1 = self.level1
83 var level2 = level1.get_or_null(k1)
84 if level2 == null then
85 level2 = new HashMap[K2, V]
86 level1[k1] = level2
87 end
88 level2[k2] = v
89 end
90
91 # Remove the item at `k1` and `k2`
92 fun remove_at(k1: K1, k2: K2)
93 do
94 var level1 = self.level1
95 var level2 = level1.get_or_null(k1)
96 if level2 == null then return
97 level2.keys.remove(k2)
98 end
99
100 # Is there a value at `k1, k2`?
101 fun has(k1: K1, k2: K2): Bool
102 do
103 if not level1.keys.has(k1) then return false
104 return level1[k1].keys.has(k2)
105 end
106
107 # Remove all items
108 fun clear do level1.clear
109 end
110
111 # Simple way to store an `HashMap[K1, HashMap[K2, HashMap[K3, V]]]`
112 #
113 # ~~~~
114 # var hm3 = new HashMap3[Int, String, Int, Float]
115 # hm3[1, "one", 11] = 1.0
116 # hm3[2, "two", 22] = 2.0
117 # assert hm3[1, "one", 11] == 1.0
118 # assert hm3[2, "not-two", 22] == null
119 # ~~~~
120 class HashMap3[K1, K2, K3, V]
121
122 private var level1 = new HashMap[K1, HashMap2[K2, K3, V]]
123
124 # Return the value associated to the keys `k1`, `k2`, and `k3`.
125 # Return `null` if no such a value.
126 fun [](k1: K1, k2: K2, k3: K3): nullable V
127 do
128 var level1 = self.level1
129 var level2 = level1.get_or_null(k1)
130 if level2 == null then return null
131 return level2[k2, k3]
132 end
133
134 # Set `v` the value associated to the keys `k1`, `k2`, and `k3`.
135 fun []=(k1: K1, k2: K2, k3: K3, v: V)
136 do
137 var level1 = self.level1
138 var level2 = level1.get_or_null(k1)
139 if level2 == null then
140 level2 = new HashMap2[K2, K3, V]
141 level1[k1] = level2
142 end
143 level2[k2, k3] = v
144 end
145
146 # Remove the item at `k1`, `k2` and `k3`
147 fun remove_at(k1: K1, k2: K2, k3: K3)
148 do
149 var level1 = self.level1
150 var level2 = level1.get_or_null(k1)
151 if level2 == null then return
152 level2.remove_at(k2, k3)
153 end
154
155 # Is there a value at `k1, k2, k3`?
156 fun has(k1: K1, k2: K2, k3: K3): Bool
157 do
158 if not level1.keys.has(k1) then return false
159 return level1[k1].has(k2, k3)
160 end
161
162 # Remove all items
163 fun clear do level1.clear
164 end
165
166 # A map with a default value.
167 #
168 # ~~~~
169 # var dm = new DefaultMap[String, Int](10)
170 # assert dm["a"] == 10
171 # ~~~~
172 #
173 # The default value is used when the key is not present.
174 # And getting a default value does not register the key.
175 #
176 # ~~~~
177 # assert dm["a"] == 10
178 # assert dm.length == 0
179 # assert dm.has_key("a") == false
180 # ~~~~
181 #
182 # It also means that removed key retrieve the default value.
183 #
184 # ~~~~
185 # dm["a"] = 2
186 # assert dm["a"] == 2
187 # dm.keys.remove("a")
188 # assert dm["a"] == 10
189 # ~~~~
190 #
191 # Warning: the default value is used as is, so using mutable object might
192 # cause side-effects.
193 #
194 # ~~~~
195 # var dma = new DefaultMap[String, Array[Int]](new Array[Int])
196 #
197 # dma["a"].add(65)
198 # assert dma["a"] == [65]
199 # assert dma.default == [65]
200 # assert dma["c"] == [65]
201 #
202 # dma["b"] += [66]
203 # assert dma["b"] == [65, 66]
204 # assert dma.default == [65]
205 # ~~~~
206 class DefaultMap[K, V]
207 super HashMap[K, V]
208
209 # The default value.
210 var default: V
211
212 redef fun provide_default_value(key) do return default
213 end
214
215 # An unrolled linked list
216 #
217 # A sequence implemented as a linked list of arrays.
218 #
219 # This data structure is similar to the `List` but it can benefit from
220 # better cache performance, lower data overhead for the nodes metadata and
221 # it spares the GC to allocate many small nodes.
222 class UnrolledList[E]
223 super Sequence[E]
224
225 # Desired capacity for each nodes
226 #
227 # By default at `32`, it can be increased for very large lists.
228 #
229 # It can be modified anytime, but newly created nodes may still be assigned
230 # the same capacity of old nodes when created by `insert`.
231 var nodes_length = 32 is writable
232
233 private var head_node: UnrolledNode[E] = new UnrolledNode[E](nodes_length)
234
235 private var tail_node: UnrolledNode[E] = head_node
236
237 redef var length = 0
238
239 redef fun clear
240 do
241 head_node = new UnrolledNode[E](nodes_length)
242 tail_node = head_node
243 length = 0
244 end
245
246 # Out parameter of `node_at`
247 private var index_within_node = 0
248
249 private fun node_at(index: Int): UnrolledNode[E]
250 do
251 assert index >= 0 and index < length
252
253 var node = head_node
254 while index >= node.length do
255 index -= node.length
256 node = node.next.as(not null)
257 end
258
259 index_within_node = index
260 return node
261 end
262
263 private fun insert_node(node: UnrolledNode[E], prev, next: nullable UnrolledNode[E])
264 do
265 if prev != null then
266 prev.next = node
267 else head_node = node
268
269 if next != null then
270 next.prev = node
271 else tail_node = node
272
273 node.next = next
274 node.prev = prev
275 end
276
277 redef fun [](index)
278 do
279 var node = node_at(index)
280 index = index_within_node + node.head_index
281 return node.items[index]
282 end
283
284 redef fun []=(index, value)
285 do
286 var node = node_at(index)
287 index = index_within_node + node.head_index
288 node.items[index] = value
289 end
290
291 redef fun push(item)
292 do
293 var node = tail_node
294 if not node.full then
295 if node.tail_index < node.capacity then
296 # There's room at the tail
297 node.tail_index += 1
298 else
299 # Move everything over by `d`
300 assert node.head_index > 0
301 var d = node.head_index / 2 + 1
302 node.move_head(node.length, d)
303 for i in d.times do node.items[node.tail_index - i] = null
304 node.head_index -= d
305 node.tail_index += -d+1
306 end
307 node.items[node.tail_index-1] = item
308 else
309 # New node!
310 node = new UnrolledNode[E](nodes_length)
311 insert_node(node, tail_node, null)
312 node.tail_index = 1
313 node.items[0] = item
314 end
315 length += 1
316 end
317
318 redef fun unshift(item)
319 do
320 var node = head_node
321 if not node.full then
322 if node.head_index > 0 then
323 # There's room at the head
324 node.head_index -= 1
325 else
326 # Move everything over by `d`
327 assert node.tail_index < node.capacity
328 var d = (node.capacity-node.tail_index) / 2 + 1
329 node.move_tail(0, d)
330 for i in d.times do node.items[node.head_index+i] = null
331 node.head_index += d-1
332 node.tail_index += d
333 end
334 node.items[node.head_index] = item
335 else
336 # New node!
337 node = new UnrolledNode[E](nodes_length)
338 insert_node(node, null, head_node)
339 node.head_index = node.capacity-1
340 node.tail_index = node.capacity
341 node.items[node.capacity-1] = item
342 end
343 length += 1
344 end
345
346 redef fun pop
347 do
348 assert not_empty
349
350 var node = tail_node
351 while node.length == 0 do
352 # Delete empty node
353 var nullable_node = node.prev
354 assert is_not_empty: nullable_node != null
355 node = nullable_node
356 node.next = null
357 self.tail_node = node
358 end
359
360 var item = node.items[node.tail_index-1]
361 node.tail_index -= 1
362 length -= 1
363 return item
364 end
365
366 redef fun shift
367 do
368 assert not_empty
369
370 var node = head_node
371 while node.length == 0 do
372 # Delete empty node
373 var nullable_node = node.next
374 assert is_not_empty: nullable_node != null
375 node = nullable_node
376 node.prev = null
377 self.head_node = node
378 end
379
380 var item = node.items[node.head_index]
381 node.head_index += 1
382 length -= 1
383 return item
384 end
385
386 redef fun insert(item, index)
387 do
388 if index == length then
389 push item
390 return
391 end
392
393 var node = node_at(index)
394 index = index_within_node
395 if node.full then
396 # Move half to a new node
397 var new_node = new UnrolledNode[E](nodes_length.max(node.capacity))
398
399 # Plug in the new node
400 var next_node = node.next
401 insert_node(new_node, node, next_node)
402
403 # Move items at and after `index` to the new node
404 var to_displace = node.length-index
405 var offset = (new_node.capacity-to_displace)/2
406 for i in [0..to_displace[ do
407 new_node.items[offset+i] = node.items[index+i]
408 node.items[index+i] = null
409 end
410 new_node.head_index = offset
411 new_node.tail_index = offset + to_displace
412 node.tail_index -= to_displace
413
414 # Store `item`
415 if index > node.capacity / 2 then
416 new_node.items[offset-1] = item
417 new_node.head_index -= 1
418 else
419 node.items[node.head_index+index] = item
420 node.tail_index += 1
421 end
422 else
423 if node.tail_index < node.capacity then
424 # Move items towards the tail
425 node.move_tail(index, 1)
426 node.tail_index += 1
427 node.items[node.head_index + index] = item
428 else
429 # Move items towards the head
430 node.move_head(index, 1)
431 node.items[node.head_index + index-1] = item
432 node.head_index -= 1
433 end
434 end
435 length += 1
436 end
437
438 redef fun remove_at(index)
439 do
440 var node = node_at(index)
441 index = index_within_node + node.head_index
442
443 # Shift left all the elements after `index`
444 for i in [index+1 .. node.tail_index[ do
445 node.items[i-1] = node.items[i]
446 end
447 node.tail_index -= 1
448 node.items[node.tail_index] = null
449
450 length -= 1
451
452 var next_node = node.next
453 var prev_node = node.prev
454 if node.is_empty and (next_node != null or prev_node != null) then
455 # Empty and non-head or tail node, delete
456 if next_node != null then
457 next_node.prev = node.prev
458 else tail_node = prev_node.as(not null)
459
460 if prev_node != null then
461 prev_node.next = node.next
462 else head_node = next_node.as(not null)
463 end
464 end
465
466 redef fun iterator do return new UnrolledIterator[E](self)
467 end
468
469 # Node composing an `UnrolledList`
470 #
471 # Stores the elements in the `items` array. The elements in the `items` array
472 # begin at `head_index` and end right before `tail_index`. The data is contiguous,
473 # but there can be empty cells at the beginning and the end of the array.
474 private class UnrolledNode[E]
475
476 var prev: nullable UnrolledNode[E] = null
477
478 var next: nullable UnrolledNode[E] = null
479
480 # Desired length of `items`
481 var capacity: Int
482
483 # `Array` of items in this node, filled with `null`
484 var items = new Array[nullable E].filled_with(null, capacity) is lazy
485
486 # Index of the first element in `items`
487 var head_index = 0
488
489 # Index after the last element in `items`
490 var tail_index = 0
491
492 fun length: Int do return tail_index - head_index
493
494 fun full: Bool do return length == capacity
495
496 fun is_empty: Bool do return tail_index == head_index
497
498 # Move towards the head all elements before `index` of `displace` cells
499 fun move_tail(index, displace: Int)
500 do
501 for i in [tail_index-1..head_index+index].step(-1) do
502 items[i+displace] = items[i]
503 end
504 end
505
506 # Move towards the tail all elements at and after `index` of `displace` cells
507 fun move_head(index, displace: Int)
508 do
509 for i in [head_index..head_index+index[ do
510 items[i-displace] = items[i]
511 end
512 end
513 end
514
515 private class UnrolledIterator[E]
516 super IndexedIterator[E]
517
518 var list: UnrolledList[E]
519
520 var node: nullable UnrolledNode[E] = list.head_node is lazy
521
522 # Index of the current `item`
523 redef var index = 0
524
525 # Index within the current `node`
526 var index_in_node: Int = node.head_index is lazy
527
528 redef fun item do return node.items[index_in_node]
529
530 redef fun is_ok do return index < list.length
531
532 redef fun next
533 do
534 index += 1
535 index_in_node += 1
536
537 if index_in_node >= node.tail_index then
538 node = node.next
539 if node != null then index_in_node = node.head_index
540 end
541 end
542 end