examples: annotate examples
[nit.git] / lib / more_collections.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Highly specific, but useful, collections-related classes.
16 module more_collections is serialize
17
18 import serialization
19 import poset
20
21 # Simple way to store an `HashMap[K, Array[V]]`
22 #
23 # Unlike standard HashMap, MultiHashMap provides a new
24 # empty array on the first access on a unknown key.
25 #
26 # var m = new MultiHashMap[String, Char]
27 # assert not m.has_key("four")
28 # m["four"].add('i')
29 # m["four"].add('i')
30 # m["four"].add('i')
31 # m["four"].add('i')
32 # assert m.has_key("four")
33 # assert m["four"] == ['i', 'i', 'i', 'i']
34 # assert m["zzz"] == new Array[Char]
35 class MultiHashMap[K, V]
36 super HashMap[K, Array[V]]
37
38 # Add `v` to the array associated with `k`.
39 #
40 # If there is no array associated, then create it.
41 #
42 # For the inverse operation, see `remove_one`.
43 #
44 # ```
45 # var m = new MultiHashMap[String, Char]
46 # m.add_one("four", 'i')
47 # m.add_one("four", 'i')
48 # m.add_one("four", 'i')
49 # m.add_one("four", 'i')
50 # assert m.has_key("four")
51 # assert m["four"] == ['i', 'i', 'i', 'i']
52 # ```
53 fun add_one(k: K, v: V)
54 do
55 var x = self.get_or_null(k)
56 if x != null then
57 x.add(v)
58 else
59 self[k] = [v]
60 end
61 end
62
63 redef fun provide_default_value(key) do
64 var res = new Array[V]
65 self[key] = res
66 return res
67 end
68
69 # Remove an occurrence of `v` from the array associated with `k`.
70 #
71 # If the associated array does not contain `v`, do nothing. If the
72 # associated array only contain one element and this element is `v`, remove
73 # the key `k`.
74 #
75 # In a nutshell, does the inverse operation of `add_one`.
76 #
77 # ```
78 # var m = new MultiHashMap[String, Char]
79 # m["four"] = ['4', 'i', 'i', 'i', 'i']
80 # m.remove_one("four", 'i')
81 # assert m["four"] == ['4', 'i', 'i', 'i']
82 #
83 # m = new MultiHashMap[String, Char]
84 # m.add_one("one", '1')
85 # m.remove_one("one", '?')
86 # assert m["one"] == ['1']
87 # m.remove_one("one", '1')
88 # assert not m.has_key("one")
89 # assert m["one"] == new Array[Char]
90 #
91 # m = new MultiHashMap[String, Char]
92 # m.add_one("one", '1')
93 # m.remove_one("two", '2')
94 # assert not m.has_key("two")
95 # assert m["one"] == ['1']
96 # assert m["two"] == new Array[Char]
97 # ```
98 fun remove_one(k: K, v: V)
99 do
100 var x = get_or_null(k)
101 if x != null then
102 x.remove(v)
103 if x.is_empty then keys.remove(k)
104 end
105 end
106
107 # Search the values in `pe.greaters` from the most smaller elements that have a value.
108 #
109 # Elements without values are ignored.
110 #
111 # Basically, values defined in nearest greater elements of `pe` are inherited.
112 #
113 # ~~~
114 # var pos = new POSet[String]
115 # pos.add_chain(["E", "D", "C", "B", "A"])
116 # pos.add_chain(["D", "X", "B"])
117 #
118 # var map = new MultiHashMap[String, String]
119 # map["A"].append(["a", "1"])
120 # map["C"].append(["c", "2"])
121 # map["X"].append(["x", "2"])
122 # map["E"].add "e"
123 #
124 # assert map.lookup_joined_values(pos["B"]).has_exactly(["a", "1"])
125 # assert map.lookup_joined_values(pos["C"]).has_exactly(["c", "2"])
126 # assert map.lookup_joined_values(pos["D"]).has_exactly(["c", "x", "2"])
127 # ~~~
128 fun lookup_joined_values(pe: POSetElement[K]): Set[V]
129 do
130 var res = new Set[V]
131 for k in pe.poset.select_smallest(filter_keys(pe.greaters)) do res.add_all self[k]
132 return res
133
134 end
135 end
136
137 # Simple way to store an `HashMap[K1, HashMap[K2, V]]`
138 #
139 # ~~~~
140 # var hm2 = new HashMap2[Int, String, Float]
141 # hm2[1, "one"] = 1.0
142 # hm2[2, "two"] = 2.0
143 # assert hm2[1, "one"] == 1.0
144 # assert hm2[2, "not-two"] == null
145 # ~~~~
146 class HashMap2[K1, K2, V]
147
148 private var level1 = new HashMap[K1, HashMap[K2, V]]
149
150 # Return the value associated to the keys `k1` and `k2`.
151 # Return `null` if no such a value.
152 fun [](k1: K1, k2: K2): nullable V
153 do
154 var level1 = self.level1
155 var level2 = level1.get_or_null(k1)
156 if level2 == null then return null
157 return level2.get_or_null(k2)
158 end
159
160 # Set `v` the value associated to the keys `k1` and `k2`.
161 fun []=(k1: K1, k2: K2, v: V)
162 do
163 var level1 = self.level1
164 var level2 = level1.get_or_null(k1)
165 if level2 == null then
166 level2 = new HashMap[K2, V]
167 level1[k1] = level2
168 end
169 level2[k2] = v
170 end
171
172 # Remove the item at `k1` and `k2`
173 fun remove_at(k1: K1, k2: K2)
174 do
175 var level1 = self.level1
176 var level2 = level1.get_or_null(k1)
177 if level2 == null then return
178 level2.keys.remove(k2)
179 end
180
181 # Is there a value at `k1, k2`?
182 fun has(k1: K1, k2: K2): Bool
183 do
184 if not level1.keys.has(k1) then return false
185 return level1[k1].keys.has(k2)
186 end
187
188 # Remove all items
189 fun clear do level1.clear
190 end
191
192 # Simple way to store an `HashMap[K1, HashMap[K2, HashMap[K3, V]]]`
193 #
194 # ~~~~
195 # var hm3 = new HashMap3[Int, String, Int, Float]
196 # hm3[1, "one", 11] = 1.0
197 # hm3[2, "two", 22] = 2.0
198 # assert hm3[1, "one", 11] == 1.0
199 # assert hm3[2, "not-two", 22] == null
200 # ~~~~
201 class HashMap3[K1, K2, K3, V]
202
203 private var level1 = new HashMap[K1, HashMap2[K2, K3, V]]
204
205 # Return the value associated to the keys `k1`, `k2`, and `k3`.
206 # Return `null` if no such a value.
207 fun [](k1: K1, k2: K2, k3: K3): nullable V
208 do
209 var level1 = self.level1
210 var level2 = level1.get_or_null(k1)
211 if level2 == null then return null
212 return level2[k2, k3]
213 end
214
215 # Set `v` the value associated to the keys `k1`, `k2`, and `k3`.
216 fun []=(k1: K1, k2: K2, k3: K3, v: V)
217 do
218 var level1 = self.level1
219 var level2 = level1.get_or_null(k1)
220 if level2 == null then
221 level2 = new HashMap2[K2, K3, V]
222 level1[k1] = level2
223 end
224 level2[k2, k3] = v
225 end
226
227 # Remove the item at `k1`, `k2` and `k3`
228 fun remove_at(k1: K1, k2: K2, k3: K3)
229 do
230 var level1 = self.level1
231 var level2 = level1.get_or_null(k1)
232 if level2 == null then return
233 level2.remove_at(k2, k3)
234 end
235
236 # Is there a value at `k1, k2, k3`?
237 fun has(k1: K1, k2: K2, k3: K3): Bool
238 do
239 if not level1.keys.has(k1) then return false
240 return level1[k1].has(k2, k3)
241 end
242
243 # Remove all items
244 fun clear do level1.clear
245 end
246
247 # Simple way to store an `HashMap[K1, HashMap[K2, HashMap[K3, HashMap[K4, V]]]]`
248 #
249 # ~~~~
250 # var hm4 = new HashMap4[Int, String, Int, String, Float]
251 # hm4[1, "one", 11, "un"] = 1.0
252 # hm4[2, "two", 22, "deux"] = 2.0
253 # assert hm4[1, "one", 11, "un"] == 1.0
254 # assert hm4[2, "not-two", 22, "deux"] == null
255 # ~~~~
256 class HashMap4[K1, K2, K3, K4, V]
257
258 private var level1 = new HashMap[K1, HashMap3[K2, K3, K4, V]]
259
260 # Return the value associated to the keys `k1`, `k2`, `k3` and `k4`.
261 # Return `null` if no such a value.
262 fun [](k1: K1, k2: K2, k3: K3, k4: K4): nullable V
263 do
264 var level1 = self.level1
265 var level2 = level1.get_or_null(k1)
266 if level2 == null then return null
267 return level2[k2, k3, k4]
268 end
269
270 # Set `v` the value associated to the keys `k1`, `k2`, `k3` and `k4`.
271 fun []=(k1: K1, k2: K2, k3: K3, k4: K4, v: V)
272 do
273 var level1 = self.level1
274 var level2 = level1.get_or_null(k1)
275 if level2 == null then
276 level2 = new HashMap3[K2, K3, K4, V]
277 level1[k1] = level2
278 end
279 level2[k2, k3, k4] = v
280 end
281
282 # Remove the item at `k1`, `k2`, `k3` and `k4`
283 fun remove_at(k1: K1, k2: K2, k3: K3, k4: K4)
284 do
285 var level1 = self.level1
286 var level2 = level1.get_or_null(k1)
287 if level2 == null then return
288 level2.remove_at(k2, k3, k4)
289 end
290
291 # Is there a value at `k1, k2, k3, k4`?
292 fun has(k1: K1, k2: K2, k3: K3, k4: K4): Bool
293 do
294 if not level1.keys.has(k1) then return false
295 return level1[k1].has(k2, k3, k4)
296 end
297
298 # Remove all items
299 fun clear do level1.clear
300 end
301
302 # A map with a default value.
303 #
304 # ~~~~
305 # var dm = new DefaultMap[String, Int](10)
306 # assert dm["a"] == 10
307 # ~~~~
308 #
309 # The default value is used when the key is not present.
310 # And getting a default value does not register the key.
311 #
312 # ~~~~
313 # assert dm["a"] == 10
314 # assert dm.length == 0
315 # assert dm.has_key("a") == false
316 # ~~~~
317 #
318 # It also means that removed key retrieve the default value.
319 #
320 # ~~~~
321 # dm["a"] = 2
322 # assert dm["a"] == 2
323 # dm.keys.remove("a")
324 # assert dm["a"] == 10
325 # ~~~~
326 #
327 # Warning: the default value is used as is, so using mutable object might
328 # cause side-effects.
329 #
330 # ~~~~
331 # var dma = new DefaultMap[String, Array[Int]](new Array[Int])
332 #
333 # dma["a"].add(65)
334 # assert dma["a"] == [65]
335 # assert dma.default == [65]
336 # assert dma["c"] == [65]
337 #
338 # dma["b"] += [66]
339 # assert dma["b"] == [65, 66]
340 # assert dma.default == [65]
341 # ~~~~
342 class DefaultMap[K, V]
343 super HashMap[K, V]
344
345 # The default value.
346 var default: V
347
348 redef fun provide_default_value(key) do return default
349 end
350
351 # An unrolled linked list
352 #
353 # A sequence implemented as a linked list of arrays.
354 #
355 # This data structure is similar to the `List` but it can benefit from
356 # better cache performance, lower data overhead for the nodes metadata and
357 # it spares the GC to allocate many small nodes.
358 class UnrolledList[E]
359 super Sequence[E]
360
361 # Desired capacity for each nodes
362 #
363 # By default at `32`, it can be increased for very large lists.
364 #
365 # It can be modified anytime, but newly created nodes may still be assigned
366 # the same capacity of old nodes when created by `insert`.
367 var nodes_length = 32 is writable
368
369 private var head_node: UnrolledNode[E] = new UnrolledNode[E](nodes_length)
370
371 private var tail_node: UnrolledNode[E] = head_node
372
373 redef var length = 0
374
375 redef fun clear
376 do
377 head_node = new UnrolledNode[E](nodes_length)
378 tail_node = head_node
379 length = 0
380 end
381
382 # Out parameter of `node_at`
383 private var index_within_node = 0
384
385 private fun node_at(index: Int): UnrolledNode[E]
386 do
387 assert index >= 0 and index < length
388
389 var node = head_node
390 while index >= node.length do
391 index -= node.length
392 node = node.next.as(not null)
393 end
394
395 index_within_node = index
396 return node
397 end
398
399 private fun insert_node(node: UnrolledNode[E], prev, next: nullable UnrolledNode[E])
400 do
401 if prev != null then
402 prev.next = node
403 else head_node = node
404
405 if next != null then
406 next.prev = node
407 else tail_node = node
408
409 node.next = next
410 node.prev = prev
411 end
412
413 redef fun [](index)
414 do
415 var node = node_at(index)
416 index = index_within_node + node.head_index
417 return node.items[index]
418 end
419
420 redef fun []=(index, value)
421 do
422 var node = node_at(index)
423 index = index_within_node + node.head_index
424 node.items[index] = value
425 end
426
427 redef fun push(item)
428 do
429 var node = tail_node
430 if not node.full then
431 if node.tail_index < node.capacity then
432 # There's room at the tail
433 node.tail_index += 1
434 else
435 # Move everything over by `d`
436 assert node.head_index > 0
437 var d = node.head_index / 2 + 1
438 node.move_head(node.length, d)
439 for i in d.times do node.items[node.tail_index - i] = null
440 node.head_index -= d
441 node.tail_index += -d+1
442 end
443 node.items[node.tail_index-1] = item
444 else
445 # New node!
446 node = new UnrolledNode[E](nodes_length)
447 insert_node(node, tail_node, null)
448 node.tail_index = 1
449 node.items[0] = item
450 end
451 length += 1
452 end
453
454 redef fun unshift(item)
455 do
456 var node = head_node
457 if not node.full then
458 if node.head_index > 0 then
459 # There's room at the head
460 node.head_index -= 1
461 else
462 # Move everything over by `d`
463 assert node.tail_index < node.capacity
464 var d = (node.capacity-node.tail_index) / 2 + 1
465 node.move_tail(0, d)
466 for i in d.times do node.items[node.head_index+i] = null
467 node.head_index += d-1
468 node.tail_index += d
469 end
470 node.items[node.head_index] = item
471 else
472 # New node!
473 node = new UnrolledNode[E](nodes_length)
474 insert_node(node, null, head_node)
475 node.head_index = node.capacity-1
476 node.tail_index = node.capacity
477 node.items[node.capacity-1] = item
478 end
479 length += 1
480 end
481
482 redef fun pop
483 do
484 assert not_empty
485
486 var node = tail_node
487 while node.length == 0 do
488 # Delete empty node
489 var nullable_node = node.prev
490 assert is_not_empty: nullable_node != null
491 node = nullable_node
492 node.next = null
493 self.tail_node = node
494 end
495
496 var item = node.items[node.tail_index-1]
497 node.tail_index -= 1
498 length -= 1
499 return item
500 end
501
502 redef fun shift
503 do
504 assert not_empty
505
506 var node = head_node
507 while node.length == 0 do
508 # Delete empty node
509 var nullable_node = node.next
510 assert is_not_empty: nullable_node != null
511 node = nullable_node
512 node.prev = null
513 self.head_node = node
514 end
515
516 var item = node.items[node.head_index]
517 node.head_index += 1
518 length -= 1
519 return item
520 end
521
522 redef fun insert(item, index)
523 do
524 if index == length then
525 push item
526 return
527 end
528
529 var node = node_at(index)
530 index = index_within_node
531 if node.full then
532 # Move half to a new node
533 var new_node = new UnrolledNode[E](nodes_length.max(node.capacity))
534
535 # Plug in the new node
536 var next_node = node.next
537 insert_node(new_node, node, next_node)
538
539 # Move items at and after `index` to the new node
540 var to_displace = node.length-index
541 var offset = (new_node.capacity-to_displace)/2
542 for i in [0..to_displace[ do
543 new_node.items[offset+i] = node.items[index+i]
544 node.items[index+i] = null
545 end
546 new_node.head_index = offset
547 new_node.tail_index = offset + to_displace
548 node.tail_index -= to_displace
549
550 # Store `item`
551 if index > node.capacity / 2 then
552 new_node.items[offset-1] = item
553 new_node.head_index -= 1
554 else
555 node.items[node.head_index+index] = item
556 node.tail_index += 1
557 end
558 else
559 if node.tail_index < node.capacity then
560 # Move items towards the tail
561 node.move_tail(index, 1)
562 node.tail_index += 1
563 node.items[node.head_index + index] = item
564 else
565 # Move items towards the head
566 node.move_head(index, 1)
567 node.items[node.head_index + index-1] = item
568 node.head_index -= 1
569 end
570 end
571 length += 1
572 end
573
574 redef fun remove_at(index)
575 do
576 var node = node_at(index)
577 index = index_within_node + node.head_index
578
579 # Shift left all the elements after `index`
580 for i in [index+1 .. node.tail_index[ do
581 node.items[i-1] = node.items[i]
582 end
583 node.tail_index -= 1
584 node.items[node.tail_index] = null
585
586 length -= 1
587
588 var next_node = node.next
589 var prev_node = node.prev
590 if node.is_empty and (next_node != null or prev_node != null) then
591 # Empty and non-head or tail node, delete
592 if next_node != null then
593 next_node.prev = node.prev
594 else tail_node = prev_node.as(not null)
595
596 if prev_node != null then
597 prev_node.next = node.next
598 else head_node = next_node.as(not null)
599 end
600 end
601
602 redef fun iterator do return new UnrolledIterator[E](self)
603 end
604
605 # Node composing an `UnrolledList`
606 #
607 # Stores the elements in the `items` array. The elements in the `items` array
608 # begin at `head_index` and end right before `tail_index`. The data is contiguous,
609 # but there can be empty cells at the beginning and the end of the array.
610 private class UnrolledNode[E]
611
612 var prev: nullable UnrolledNode[E] = null
613
614 var next: nullable UnrolledNode[E] = null
615
616 # Desired length of `items`
617 var capacity: Int
618
619 # `Array` of items in this node, filled with `null`
620 var items = new Array[nullable E].filled_with(null, capacity) is lazy
621
622 # Index of the first element in `items`
623 var head_index = 0
624
625 # Index after the last element in `items`
626 var tail_index = 0
627
628 fun length: Int do return tail_index - head_index
629
630 fun full: Bool do return length == capacity
631
632 fun is_empty: Bool do return tail_index == head_index
633
634 # Move towards the head all elements before `index` of `displace` cells
635 fun move_tail(index, displace: Int)
636 do
637 for i in [tail_index-1..head_index+index].step(-1) do
638 items[i+displace] = items[i]
639 end
640 end
641
642 # Move towards the tail all elements at and after `index` of `displace` cells
643 fun move_head(index, displace: Int)
644 do
645 for i in [head_index..head_index+index[ do
646 items[i-displace] = items[i]
647 end
648 end
649 end
650
651 private class UnrolledIterator[E]
652 super IndexedIterator[E]
653
654 var list: UnrolledList[E]
655
656 var node: nullable UnrolledNode[E] = list.head_node is lazy
657
658 # Index of the current `item`
659 redef var index = 0
660
661 # Index within the current `node`
662 var index_in_node: Int = node.head_index is lazy
663
664 redef fun item do return node.items[index_in_node]
665
666 redef fun is_ok do return index < list.length
667
668 redef fun next
669 do
670 index += 1
671 index_in_node += 1
672
673 if index_in_node >= node.tail_index then
674 node = node.next
675 if node != null then index_in_node = node.head_index
676 end
677 end
678 end
679
680 # Keep track of the best elements according to a distance value.
681 #
682 # ~~~
683 # var bests = new BestDistance[String](5)
684 # bests.update(10, "Too big")
685 # assert bests.best_items.is_empty
686 # bests.update(5, "Just fine")
687 # bests.update(5, "Another one")
688 # assert bests.best_items.has_exactly(["Just fine", "Another one"])
689 # bests.update(2, "A better one")
690 # bests.update(4, "Not good enough")
691 # assert bests.best_distance == 2
692 # assert bests.best_items.has_exactly(["A better one"])
693 # ~~~
694 class BestDistance[E]
695 # Current smallest distance
696 var best_distance: Int is writable
697
698 # Known elements with the smallest distance
699 var best_items = new Set[E] is writable
700
701 # Register a `candidate` with a `distance`
702 #
703 # * To high, it is ignored.
704 # * Equal to the current best, it is added
705 # * Better that them, is is the new best element
706 #
707 # Return `true` if the candidate is kept (alone or with other)
708 # returns `false` if the candidate is ignored.
709 fun update(distance: Int, candidate: E): Bool
710 do
711 if distance > best_distance then return false
712 if distance < best_distance then
713 best_distance = distance
714 best_items.clear
715 end
716 best_items.add candidate
717 return true
718 end
719 end