bench/string: fix typo in shell script
[nit.git] / lib / more_collections.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Highly specific, but useful, collections-related classes.
16 module more_collections is serialize
17
18 import serialization
19
20 # Simple way to store an `HashMap[K, Array[V]]`
21 #
22 # Unlike standard HashMap, MultiHashMap provides a new
23 # empty array on the first access on a unknown key.
24 #
25 # var m = new MultiHashMap[String, Char]
26 # assert not m.has_key("four")
27 # m["four"].add('i')
28 # m["four"].add('i')
29 # m["four"].add('i')
30 # m["four"].add('i')
31 # assert m.has_key("four")
32 # assert m["four"] == ['i', 'i', 'i', 'i']
33 # assert m["zzz"] == new Array[Char]
34 class MultiHashMap[K, V]
35 super HashMap[K, Array[V]]
36
37 # Add `v` to the array associated with `k`.
38 #
39 # If there is no array associated, then create it.
40 #
41 # For the inverse operation, see `remove_one`.
42 #
43 # ```
44 # var m = new MultiHashMap[String, Char]
45 # m.add_one("four", 'i')
46 # m.add_one("four", 'i')
47 # m.add_one("four", 'i')
48 # m.add_one("four", 'i')
49 # assert m.has_key("four")
50 # assert m["four"] == ['i', 'i', 'i', 'i']
51 # ```
52 fun add_one(k: K, v: V)
53 do
54 var x = self.get_or_null(k)
55 if x != null then
56 x.add(v)
57 else
58 self[k] = [v]
59 end
60 end
61
62 redef fun provide_default_value(key) do
63 var res = new Array[V]
64 self[key] = res
65 return res
66 end
67
68 # Remove an occurrence of `v` from the array associated with `k`.
69 #
70 # If the associated array does not contain `v`, do nothing. If the
71 # associated array only contain one element and this element is `v`, remove
72 # the key `k`.
73 #
74 # In a nutshell, does the inverse operation of `add_one`.
75 #
76 # ```
77 # var m = new MultiHashMap[String, Char]
78 # m["four"] = ['4', 'i', 'i', 'i', 'i']
79 # m.remove_one("four", 'i')
80 # assert m["four"] == ['4', 'i', 'i', 'i']
81 #
82 # m = new MultiHashMap[String, Char]
83 # m.add_one("one", '1')
84 # m.remove_one("one", '?')
85 # assert m["one"] == ['1']
86 # m.remove_one("one", '1')
87 # assert not m.has_key("one")
88 # assert m["one"] == new Array[Char]
89 #
90 # m = new MultiHashMap[String, Char]
91 # m.add_one("one", '1')
92 # m.remove_one("two", '2')
93 # assert not m.has_key("two")
94 # assert m["one"] == ['1']
95 # assert m["two"] == new Array[Char]
96 # ```
97 fun remove_one(k: K, v: V)
98 do
99 var x = get_or_null(k)
100 if x != null then
101 x.remove(v)
102 if x.is_empty then keys.remove(k)
103 end
104 end
105 end
106
107 # Simple way to store an `HashMap[K1, HashMap[K2, V]]`
108 #
109 # ~~~~
110 # var hm2 = new HashMap2[Int, String, Float]
111 # hm2[1, "one"] = 1.0
112 # hm2[2, "two"] = 2.0
113 # assert hm2[1, "one"] == 1.0
114 # assert hm2[2, "not-two"] == null
115 # ~~~~
116 class HashMap2[K1, K2, V]
117
118 private var level1 = new HashMap[K1, HashMap[K2, V]]
119
120 # Return the value associated to the keys `k1` and `k2`.
121 # Return `null` if no such a value.
122 fun [](k1: K1, k2: K2): nullable V
123 do
124 var level1 = self.level1
125 var level2 = level1.get_or_null(k1)
126 if level2 == null then return null
127 return level2.get_or_null(k2)
128 end
129
130 # Set `v` the value associated to the keys `k1` and `k2`.
131 fun []=(k1: K1, k2: K2, v: V)
132 do
133 var level1 = self.level1
134 var level2 = level1.get_or_null(k1)
135 if level2 == null then
136 level2 = new HashMap[K2, V]
137 level1[k1] = level2
138 end
139 level2[k2] = v
140 end
141
142 # Remove the item at `k1` and `k2`
143 fun remove_at(k1: K1, k2: K2)
144 do
145 var level1 = self.level1
146 var level2 = level1.get_or_null(k1)
147 if level2 == null then return
148 level2.keys.remove(k2)
149 end
150
151 # Is there a value at `k1, k2`?
152 fun has(k1: K1, k2: K2): Bool
153 do
154 if not level1.keys.has(k1) then return false
155 return level1[k1].keys.has(k2)
156 end
157
158 # Remove all items
159 fun clear do level1.clear
160 end
161
162 # Simple way to store an `HashMap[K1, HashMap[K2, HashMap[K3, V]]]`
163 #
164 # ~~~~
165 # var hm3 = new HashMap3[Int, String, Int, Float]
166 # hm3[1, "one", 11] = 1.0
167 # hm3[2, "two", 22] = 2.0
168 # assert hm3[1, "one", 11] == 1.0
169 # assert hm3[2, "not-two", 22] == null
170 # ~~~~
171 class HashMap3[K1, K2, K3, V]
172
173 private var level1 = new HashMap[K1, HashMap2[K2, K3, V]]
174
175 # Return the value associated to the keys `k1`, `k2`, and `k3`.
176 # Return `null` if no such a value.
177 fun [](k1: K1, k2: K2, k3: K3): nullable V
178 do
179 var level1 = self.level1
180 var level2 = level1.get_or_null(k1)
181 if level2 == null then return null
182 return level2[k2, k3]
183 end
184
185 # Set `v` the value associated to the keys `k1`, `k2`, and `k3`.
186 fun []=(k1: K1, k2: K2, k3: K3, v: V)
187 do
188 var level1 = self.level1
189 var level2 = level1.get_or_null(k1)
190 if level2 == null then
191 level2 = new HashMap2[K2, K3, V]
192 level1[k1] = level2
193 end
194 level2[k2, k3] = v
195 end
196
197 # Remove the item at `k1`, `k2` and `k3`
198 fun remove_at(k1: K1, k2: K2, k3: K3)
199 do
200 var level1 = self.level1
201 var level2 = level1.get_or_null(k1)
202 if level2 == null then return
203 level2.remove_at(k2, k3)
204 end
205
206 # Is there a value at `k1, k2, k3`?
207 fun has(k1: K1, k2: K2, k3: K3): Bool
208 do
209 if not level1.keys.has(k1) then return false
210 return level1[k1].has(k2, k3)
211 end
212
213 # Remove all items
214 fun clear do level1.clear
215 end
216
217 # A map with a default value.
218 #
219 # ~~~~
220 # var dm = new DefaultMap[String, Int](10)
221 # assert dm["a"] == 10
222 # ~~~~
223 #
224 # The default value is used when the key is not present.
225 # And getting a default value does not register the key.
226 #
227 # ~~~~
228 # assert dm["a"] == 10
229 # assert dm.length == 0
230 # assert dm.has_key("a") == false
231 # ~~~~
232 #
233 # It also means that removed key retrieve the default value.
234 #
235 # ~~~~
236 # dm["a"] = 2
237 # assert dm["a"] == 2
238 # dm.keys.remove("a")
239 # assert dm["a"] == 10
240 # ~~~~
241 #
242 # Warning: the default value is used as is, so using mutable object might
243 # cause side-effects.
244 #
245 # ~~~~
246 # var dma = new DefaultMap[String, Array[Int]](new Array[Int])
247 #
248 # dma["a"].add(65)
249 # assert dma["a"] == [65]
250 # assert dma.default == [65]
251 # assert dma["c"] == [65]
252 #
253 # dma["b"] += [66]
254 # assert dma["b"] == [65, 66]
255 # assert dma.default == [65]
256 # ~~~~
257 class DefaultMap[K, V]
258 super HashMap[K, V]
259
260 # The default value.
261 var default: V
262
263 redef fun provide_default_value(key) do return default
264 end
265
266 # An unrolled linked list
267 #
268 # A sequence implemented as a linked list of arrays.
269 #
270 # This data structure is similar to the `List` but it can benefit from
271 # better cache performance, lower data overhead for the nodes metadata and
272 # it spares the GC to allocate many small nodes.
273 class UnrolledList[E]
274 super Sequence[E]
275
276 # Desired capacity for each nodes
277 #
278 # By default at `32`, it can be increased for very large lists.
279 #
280 # It can be modified anytime, but newly created nodes may still be assigned
281 # the same capacity of old nodes when created by `insert`.
282 var nodes_length = 32 is writable
283
284 private var head_node: UnrolledNode[E] = new UnrolledNode[E](nodes_length)
285
286 private var tail_node: UnrolledNode[E] = head_node
287
288 redef var length = 0
289
290 redef fun clear
291 do
292 head_node = new UnrolledNode[E](nodes_length)
293 tail_node = head_node
294 length = 0
295 end
296
297 # Out parameter of `node_at`
298 private var index_within_node = 0
299
300 private fun node_at(index: Int): UnrolledNode[E]
301 do
302 assert index >= 0 and index < length
303
304 var node = head_node
305 while index >= node.length do
306 index -= node.length
307 node = node.next.as(not null)
308 end
309
310 index_within_node = index
311 return node
312 end
313
314 private fun insert_node(node: UnrolledNode[E], prev, next: nullable UnrolledNode[E])
315 do
316 if prev != null then
317 prev.next = node
318 else head_node = node
319
320 if next != null then
321 next.prev = node
322 else tail_node = node
323
324 node.next = next
325 node.prev = prev
326 end
327
328 redef fun [](index)
329 do
330 var node = node_at(index)
331 index = index_within_node + node.head_index
332 return node.items[index]
333 end
334
335 redef fun []=(index, value)
336 do
337 var node = node_at(index)
338 index = index_within_node + node.head_index
339 node.items[index] = value
340 end
341
342 redef fun push(item)
343 do
344 var node = tail_node
345 if not node.full then
346 if node.tail_index < node.capacity then
347 # There's room at the tail
348 node.tail_index += 1
349 else
350 # Move everything over by `d`
351 assert node.head_index > 0
352 var d = node.head_index / 2 + 1
353 node.move_head(node.length, d)
354 for i in d.times do node.items[node.tail_index - i] = null
355 node.head_index -= d
356 node.tail_index += -d+1
357 end
358 node.items[node.tail_index-1] = item
359 else
360 # New node!
361 node = new UnrolledNode[E](nodes_length)
362 insert_node(node, tail_node, null)
363 node.tail_index = 1
364 node.items[0] = item
365 end
366 length += 1
367 end
368
369 redef fun unshift(item)
370 do
371 var node = head_node
372 if not node.full then
373 if node.head_index > 0 then
374 # There's room at the head
375 node.head_index -= 1
376 else
377 # Move everything over by `d`
378 assert node.tail_index < node.capacity
379 var d = (node.capacity-node.tail_index) / 2 + 1
380 node.move_tail(0, d)
381 for i in d.times do node.items[node.head_index+i] = null
382 node.head_index += d-1
383 node.tail_index += d
384 end
385 node.items[node.head_index] = item
386 else
387 # New node!
388 node = new UnrolledNode[E](nodes_length)
389 insert_node(node, null, head_node)
390 node.head_index = node.capacity-1
391 node.tail_index = node.capacity
392 node.items[node.capacity-1] = item
393 end
394 length += 1
395 end
396
397 redef fun pop
398 do
399 assert not_empty
400
401 var node = tail_node
402 while node.length == 0 do
403 # Delete empty node
404 var nullable_node = node.prev
405 assert is_not_empty: nullable_node != null
406 node = nullable_node
407 node.next = null
408 self.tail_node = node
409 end
410
411 var item = node.items[node.tail_index-1]
412 node.tail_index -= 1
413 length -= 1
414 return item
415 end
416
417 redef fun shift
418 do
419 assert not_empty
420
421 var node = head_node
422 while node.length == 0 do
423 # Delete empty node
424 var nullable_node = node.next
425 assert is_not_empty: nullable_node != null
426 node = nullable_node
427 node.prev = null
428 self.head_node = node
429 end
430
431 var item = node.items[node.head_index]
432 node.head_index += 1
433 length -= 1
434 return item
435 end
436
437 redef fun insert(item, index)
438 do
439 if index == length then
440 push item
441 return
442 end
443
444 var node = node_at(index)
445 index = index_within_node
446 if node.full then
447 # Move half to a new node
448 var new_node = new UnrolledNode[E](nodes_length.max(node.capacity))
449
450 # Plug in the new node
451 var next_node = node.next
452 insert_node(new_node, node, next_node)
453
454 # Move items at and after `index` to the new node
455 var to_displace = node.length-index
456 var offset = (new_node.capacity-to_displace)/2
457 for i in [0..to_displace[ do
458 new_node.items[offset+i] = node.items[index+i]
459 node.items[index+i] = null
460 end
461 new_node.head_index = offset
462 new_node.tail_index = offset + to_displace
463 node.tail_index -= to_displace
464
465 # Store `item`
466 if index > node.capacity / 2 then
467 new_node.items[offset-1] = item
468 new_node.head_index -= 1
469 else
470 node.items[node.head_index+index] = item
471 node.tail_index += 1
472 end
473 else
474 if node.tail_index < node.capacity then
475 # Move items towards the tail
476 node.move_tail(index, 1)
477 node.tail_index += 1
478 node.items[node.head_index + index] = item
479 else
480 # Move items towards the head
481 node.move_head(index, 1)
482 node.items[node.head_index + index-1] = item
483 node.head_index -= 1
484 end
485 end
486 length += 1
487 end
488
489 redef fun remove_at(index)
490 do
491 var node = node_at(index)
492 index = index_within_node + node.head_index
493
494 # Shift left all the elements after `index`
495 for i in [index+1 .. node.tail_index[ do
496 node.items[i-1] = node.items[i]
497 end
498 node.tail_index -= 1
499 node.items[node.tail_index] = null
500
501 length -= 1
502
503 var next_node = node.next
504 var prev_node = node.prev
505 if node.is_empty and (next_node != null or prev_node != null) then
506 # Empty and non-head or tail node, delete
507 if next_node != null then
508 next_node.prev = node.prev
509 else tail_node = prev_node.as(not null)
510
511 if prev_node != null then
512 prev_node.next = node.next
513 else head_node = next_node.as(not null)
514 end
515 end
516
517 redef fun iterator do return new UnrolledIterator[E](self)
518 end
519
520 # Node composing an `UnrolledList`
521 #
522 # Stores the elements in the `items` array. The elements in the `items` array
523 # begin at `head_index` and end right before `tail_index`. The data is contiguous,
524 # but there can be empty cells at the beginning and the end of the array.
525 private class UnrolledNode[E]
526
527 var prev: nullable UnrolledNode[E] = null
528
529 var next: nullable UnrolledNode[E] = null
530
531 # Desired length of `items`
532 var capacity: Int
533
534 # `Array` of items in this node, filled with `null`
535 var items = new Array[nullable E].filled_with(null, capacity) is lazy
536
537 # Index of the first element in `items`
538 var head_index = 0
539
540 # Index after the last element in `items`
541 var tail_index = 0
542
543 fun length: Int do return tail_index - head_index
544
545 fun full: Bool do return length == capacity
546
547 fun is_empty: Bool do return tail_index == head_index
548
549 # Move towards the head all elements before `index` of `displace` cells
550 fun move_tail(index, displace: Int)
551 do
552 for i in [tail_index-1..head_index+index].step(-1) do
553 items[i+displace] = items[i]
554 end
555 end
556
557 # Move towards the tail all elements at and after `index` of `displace` cells
558 fun move_head(index, displace: Int)
559 do
560 for i in [head_index..head_index+index[ do
561 items[i-displace] = items[i]
562 end
563 end
564 end
565
566 private class UnrolledIterator[E]
567 super IndexedIterator[E]
568
569 var list: UnrolledList[E]
570
571 var node: nullable UnrolledNode[E] = list.head_node is lazy
572
573 # Index of the current `item`
574 redef var index = 0
575
576 # Index within the current `node`
577 var index_in_node: Int = node.head_index is lazy
578
579 redef fun item do return node.items[index_in_node]
580
581 redef fun is_ok do return index < list.length
582
583 redef fun next
584 do
585 index += 1
586 index_in_node += 1
587
588 if index_in_node >= node.tail_index then
589 node = node.next
590 if node != null then index_in_node = node.head_index
591 end
592 end
593 end
594
595 # Keep track of the best elements according to a distance value.
596 #
597 # ~~~
598 # var bests = new BestDistance[String](5)
599 # bests.update(10, "Too big")
600 # assert bests.best_items.is_empty
601 # bests.update(5, "Just fine")
602 # bests.update(5, "Another one")
603 # assert bests.best_items.has_exactly(["Just fine", "Another one"])
604 # bests.update(2, "A better one")
605 # bests.update(4, "Not good enough")
606 # assert bests.best_distance == 2
607 # assert bests.best_items.has_exactly(["A better one"])
608 # ~~~
609 class BestDistance[E]
610 # Current smallest distance
611 var best_distance: Int is writable
612
613 # Known elements with the smallest distance
614 var best_items = new Set[E] is writable
615
616 # Register a `candidate` with a `distance`
617 #
618 # * To high, it is ignored.
619 # * Equal to the current best, it is added
620 # * Better that them, is is the new best element
621 #
622 # Return `true` if the candidate is kept (alone or with other)
623 # returns `false` if the candidate is ignored.
624 fun update(distance: Int, candidate: E): Bool
625 do
626 if distance > best_distance then return false
627 if distance < best_distance then
628 best_distance = distance
629 best_items.clear
630 end
631 best_items.add candidate
632 return true
633 end
634 end