976def94d707093095ae974a966e51c7e1aaa5ae
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47 end
48
49 # Node that represents a concatenation between two nodes (of any RopeNode type)
50 private class Concat
51 super RopeNode
52
53 # Left child of the node
54 var left: nullable RopeNode
55 # Right child of the node
56 var right: nullable RopeNode
57
58 init(l: nullable RopeNode, r: nullable RopeNode)
59 do
60 left = l
61 right = r
62 if l != null then length += l.length
63 if r != null then length += r.length
64 end
65
66 end
67
68 # Leaf of a Rope, contains a FlatString
69 private abstract class Leaf
70 super RopeNode
71
72 # Encapsulated FlatString in the leaf node
73 var str: FlatText
74
75 end
76
77 private class StringLeaf
78 super Leaf
79
80 init(val: FlatString) do
81 self.str = val
82 length = str.length
83 end
84
85 end
86
87 # Basic structure, binary tree with a root node.
88 #
89 # Also shared services by subsequent implementations.
90 abstract class Rope
91 super Text
92
93 # Root node, entry point of a Rope.
94 private var root: RopeNode
95
96 # Cached version of self as a flat String
97 private var str_representation: nullable NativeString = null
98
99 # Empty Rope
100 init do from("")
101
102 # Creates a new Rope with `s` as root
103 init from(s: String) do
104 if s isa RopeString then root = s.root else root = new StringLeaf(s.as(FlatString))
105 end
106
107 private init from_root(r: RopeNode)
108 do
109 root = r
110 end
111
112 redef fun length do return root.length
113
114 # Iterator on the nodes of the rope, in forward postfix order
115 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
116
117 # Iterator on the leaves of the rope, forward order
118 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
119
120 # Iterator on the substrings from 0, in forward order
121 redef fun substrings do return new SubstringsIterator(self, 0)
122
123 # Iterator on the substrings, starting at position `from`, in forward order
124 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
125
126 # Iterator on the nodes of the rope, in backwards postfix order
127 private fun reverse_postfix(from: Int): ReversePostfix do return new ReversePostfix.from(self, from)
128
129 # Iterator on the leaves of the rope, backwards order
130 private fun reverse_leaves(from: Int): ReverseLeavesIterator do return new ReverseLeavesIterator(self,from)
131
132 # Iterator on the substrings, in reverse order
133 fun reverse_substrings: IndexedIterator[Text] do return new ReverseSubstringsIterator(self, length-1)
134
135 # Iterator on the substrings, in reverse order, starting iteration at position `from`
136 fun reverse_substrings_from(from: Int): IndexedIterator[Text] do return new ReverseSubstringsIterator(self, from)
137
138 redef fun output
139 do
140 for i in substrings do
141 i.output
142 end
143 end
144
145 redef fun to_cstring
146 do
147 if str_representation != null then return str_representation.as(not null)
148
149 var native_final_str = calloc_string(length + 1)
150
151 native_final_str[length] = '\0'
152
153 if self.length == 0 then
154 str_representation = native_final_str
155 return native_final_str
156 end
157
158 var offset = 0
159
160 for i in substrings do
161 var str = i.flatten
162 if str isa FlatString then str.items.copy_to(native_final_str, str.length, str.index_from, offset)
163 offset += i.length
164 end
165
166 str_representation = native_final_str
167
168 return native_final_str
169 end
170
171 # Path to the Leaf for `position`
172 private fun node_at(position: Int): Path
173 do
174 assert position >= 0 and position < length
175 return get_node_from(root.as(not null), 0, position, new List[PathElement])
176 end
177
178 # Builds the path to Leaf at position `seek_pos`
179 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
180 do
181 assert curr_pos >= 0
182 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
183 node = node.as(Concat)
184
185 if node.left != null then
186 var next_pos = curr_pos + node.left.length
187 stack.add(new PathElement(node))
188 if next_pos > seek_pos then
189 stack.last.left = true
190 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
191 end
192 stack.last.right = true
193 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
194 else
195 var vis = new PathElement(node)
196 vis.right = true
197 stack.add(vis)
198 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
199 end
200 end
201
202 redef fun ==(o)
203 do
204 if not o isa Text then return false
205 if o.length != self.length then return false
206 var oit = o.chars.iterator
207 for i in self.chars.iterator do
208 if i != oit.item then return false
209 oit.next
210 end
211 return true
212 end
213 end
214
215 # Rope that cannot be modified
216 class RopeString
217 super Rope
218 super String
219
220 redef fun to_s do return self
221
222 redef fun empty do return once new RopeString.from("")
223
224 redef var chars: SequenceRead[Char] = new RopeStringChars(self)
225
226 redef fun reversed
227 do
228 var ret = empty.as(RopeString)
229 for i in substrings do
230 ret = ret.prepend(i.reversed.to_s).as(RopeString)
231 end
232 return ret
233 end
234
235 redef fun to_upper
236 do
237 var ret = empty
238 for i in substrings do
239 ret += i.to_upper
240 end
241 return ret
242 end
243
244 redef fun to_lower
245 do
246 var ret = empty
247 for i in substrings do
248 ret += i.to_lower
249 end
250 return ret
251 end
252
253 redef fun +(o) do return insert_at(o.to_s, length)
254
255 redef fun *(n)
256 do
257 var ret = new RopeString.from("")
258 for i in [0..n[ do
259 ret = (ret + self).as(RopeString)
260 end
261 return ret
262 end
263
264 # Inserts a String `str` at position `pos`
265 redef fun insert_at(str, pos)
266 do
267 if str.length == 0 then return self
268 if self.length == 0 then return new RopeString.from(str)
269
270 assert pos >= 0 and pos <= length
271
272 if pos == length then return append(str).as(RopeString)
273
274 var path = node_at(pos)
275
276 var last_concat: Concat
277
278 if path.offset == 0 then
279 if str isa FlatString then
280 last_concat = new Concat(new StringLeaf(str), path.leaf)
281 else
282 last_concat = new Concat(str.as(RopeString).root, path.leaf)
283 end
284 else if path.offset == path.leaf.length then
285 if str isa FlatString then
286 last_concat = new Concat(path.leaf, new StringLeaf(str))
287 else
288 last_concat = new Concat(path.leaf, str.as(RopeString).root)
289 end
290 else
291 var s = path.leaf.str
292 var l_half = s.substring(0, s.length - path.offset)
293 var r_half = s.substring_from(s.length - path.offset)
294 var cct: Concat
295 var ll = new StringLeaf(l_half.as(FlatString))
296 if str isa FlatString then
297 cct = new Concat(ll, new StringLeaf(str))
298 else
299 cct = new Concat(ll, str.as(RopeString).root)
300 end
301 last_concat = new Concat(cct, new StringLeaf(r_half.as(FlatString)))
302 end
303
304 for i in path.stack.reverse_iterator do
305 if i.left then
306 last_concat = new Concat(last_concat, i.node.right)
307 else
308 last_concat = new Concat(i.node.left, last_concat)
309 end
310 end
311
312 return new RopeString.from_root(last_concat)
313 end
314
315 # Adds `s` at the beginning of self
316 redef fun prepend(s) do return insert_at(s, 0)
317
318 # Adds `s` at the end of self
319 redef fun append(s)
320 do
321 if self.is_empty then return s
322 return new RopeString.from_root(append_to_path(root,s))
323 end
324
325 # Builds a new path from root to the rightmost node with s appended
326 private fun append_to_path(node: RopeNode, s: String): RopeNode
327 do
328 var cct: Concat
329 if node isa Leaf then
330 if s isa FlatString then
331 cct = new Concat(node, new StringLeaf(s))
332 else
333 cct = new Concat(node, s.as(RopeString).root)
334 end
335 else
336 var n = node.as(Concat)
337 var right = n.right
338 var lft = n.left.as(not null)
339 if right == null then
340 if s isa FlatString then
341 cct = new Concat(lft, new StringLeaf(s))
342 else
343 cct = new Concat(lft, s.as(RopeString).root)
344 end
345 else
346 cct = new Concat(lft, append_to_path(right, s))
347 end
348 end
349 return cct
350 end
351
352 # O(log(n))
353 #
354 # var rope = new RopeString.from("abcd")
355 # assert rope.substring(1, 2) == "bc"
356 # assert rope.substring(-1, 2) == "a"
357 # assert rope.substring(1, 0) == ""
358 # assert rope.substring(2, 5) == "cd"
359 #
360 redef fun substring(pos, len)
361 do
362 if pos < 0 then
363 len += pos
364 pos = 0
365 end
366
367 if pos + len > length then len = length - pos
368
369 if len <= 0 then return new RopeString.from("")
370
371 var path = node_at(pos)
372
373 var lf = path.leaf
374 var offset = path.offset
375
376 if path.leaf.str.length - offset > len then lf = new StringLeaf(lf.str.substring(offset,len).as(FlatString)) else lf = new StringLeaf(lf.str.substring_from(offset).as(FlatString))
377
378 var nod: RopeNode = lf
379
380 for i in path.stack.reverse_iterator do
381 if i.right then continue
382 nod = new Concat(nod, i.node.right)
383 end
384
385 var ret = new RopeString
386 ret.root = nod
387
388 path = ret.node_at(len-1)
389
390 offset = path.offset
391 nod = new StringLeaf(path.leaf.str.substring(0, offset+1).as(FlatString))
392
393 for i in path.stack.reverse_iterator do
394 if i.left then continue
395 nod = new Concat(i.node.left, nod)
396 end
397
398 ret.root = nod
399
400 return ret
401 end
402 end
403
404 redef class FlatString
405
406 redef fun append(s) do return (new RopeString.from(self)) + s
407
408 redef fun prepend(s) do return (new RopeString.from(self)).prepend(s)
409
410 redef fun insert_at(s, pos)
411 do
412 if pos == 0 then return prepend(s)
413 if pos == length then return append(s)
414
415 var l = substring(0,pos)
416 var r = substring_from(pos)
417 var ret: String = new RopeString.from(l)
418 ret += s
419 return ret + r
420 end
421
422 end
423
424 private class RopeStringChars
425 super SequenceRead[Char]
426
427 var tgt: Rope
428
429 redef fun [](pos)
430 do
431 assert pos < tgt.length
432 var path = tgt.node_at(pos)
433 return path.leaf.str.chars[path.offset]
434 end
435
436 redef fun iterator do return iterator_from(0)
437
438 redef fun iterator_from(pos) do return new RopeCharIterator(tgt, pos)
439
440 redef fun reverse_iterator do return reverse_iterator_from(tgt.length-1)
441
442 redef fun reverse_iterator_from(pos) do return new ReverseRopeCharIterator(tgt, pos)
443 end
444
445 # Used to iterate on a Rope
446 private class IteratorElement
447
448 init(e: RopeNode)
449 do
450 if e isa Leaf then
451 left = true
452 right = true
453 end
454 node = e
455 end
456
457 # The node being visited
458 var node: RopeNode
459 # If the node has a left child, was it visited ?
460 var left = false
461 # If the node has a right child, was it visited ?
462 var right = false
463 # Was the current node visited ?
464 var done = false
465 end
466
467 # Simple Postfix iterator on the nodes of a Rope
468 private class Postfix
469 super IndexedIterator[RopeNode]
470
471 # Target Rope to iterate on
472 var target: Rope
473
474 # Current position in Rope
475 var pos: Int
476
477 # Visited nodes
478 var stack = new List[IteratorElement]
479
480 init from(tgt: Rope, pos: Int)
481 do
482 self.target = tgt
483 self.pos = pos
484 if pos < 0 or pos >= tgt.length then return
485 var path = tgt.node_at(pos)
486 self.pos -= path.offset
487 for i in path.stack do
488 var item = new IteratorElement(i.node)
489 item.left = true
490 if i.right then item.right = true
491 stack.push item
492 end
493 var item = new IteratorElement(path.leaf)
494 item.done = true
495 stack.push item
496 end
497
498 redef fun item
499 do
500 assert is_ok
501 return stack.last.node
502 end
503
504 redef fun is_ok do return not stack.is_empty
505
506 redef fun index do return pos
507
508 redef fun next do
509 if stack.is_empty then return
510 if pos > target.length-1 then
511 stack.clear
512 return
513 end
514 var lst = stack.last
515 if lst.done then
516 if lst.node isa Leaf then
517 pos += lst.node.length
518 end
519 stack.pop
520 next
521 return
522 end
523 if not lst.left then
524 lst.left = true
525 var nod = lst.node
526 if nod isa Concat and nod.left != null then
527 stack.push(new IteratorElement(nod.left.as(not null)))
528 next
529 return
530 end
531 end
532 if not lst.right then
533 lst.right = true
534 var nod = lst.node
535 if nod isa Concat and nod.right != null then
536 stack.push(new IteratorElement(nod.right.as(not null)))
537 next
538 return
539 end
540 end
541 lst.done = true
542 end
543 end
544
545 # Iterates on the leaves (substrings) of the Rope
546 class LeavesIterator
547 super IndexedIterator[Leaf]
548
549 private var nodes: Postfix
550
551 init(tgt: Rope, pos: Int)
552 do
553 nodes = tgt.postfix(pos)
554 end
555
556 redef fun is_ok do return nodes.is_ok
557
558 redef fun item
559 do
560 assert is_ok
561 return nodes.item.as(Leaf)
562 end
563
564 redef fun index do return nodes.index
565
566 redef fun next
567 do
568 while nodes.is_ok do
569 nodes.next
570 if nodes.is_ok and nodes.item isa Leaf then break
571 end
572 end
573 end
574
575 # Uses the leaves and calculates a new substring on each iteration
576 class SubstringsIterator
577 super IndexedIterator[Text]
578
579 private var nodes: IndexedIterator[Leaf]
580
581 # Current position in Rope
582 var pos: Int
583
584 # Current substring, computed from the current Leaf and indexes
585 var substring: Text
586
587 init(tgt: Rope, pos: Int)
588 do
589 nodes = tgt.leaves(pos)
590 self.pos = pos
591 if pos < 0 or pos >= tgt.length then return
592 make_substring
593 end
594
595 # Compute the bounds of the current substring and makes the substring
596 private fun make_substring
597 do
598 substring = nodes.item.str
599 var min = 0
600 var length = substring.length
601 if nodes.index < pos then
602 min = pos - nodes.index
603 end
604 substring = substring.substring(min, length)
605 end
606
607 redef fun is_ok do return nodes.is_ok
608
609 redef fun item
610 do
611 assert is_ok
612 return substring
613 end
614
615 redef fun index do return pos
616
617 redef fun next
618 do
619 pos += substring.length
620 nodes.next
621 if nodes.is_ok then make_substring
622 end
623
624 end
625
626 class RopeCharIterator
627 super IndexedIterator[Char]
628
629 var substrings: IndexedIterator[Text]
630
631 var pos: Int
632
633 var max: Int
634
635 var substr_iter: IndexedIterator[Char]
636
637 init(tgt: Rope, from: Int)
638 do
639 substrings = tgt.substrings_from(from)
640 max = tgt.length - 1
641 if not substrings.is_ok then
642 pos = tgt.length
643 return
644 end
645 pos = from
646 substr_iter = substrings.item.chars.iterator
647 end
648
649 redef fun item do return substr_iter.item
650
651 redef fun is_ok do return pos <= max
652
653 redef fun index do return pos
654
655 redef fun next
656 do
657 pos += 1
658 if substr_iter.is_ok then
659 substr_iter.next
660 end
661 if not substr_iter.is_ok then
662 substrings.next
663 if substrings.is_ok then
664 substr_iter = substrings.item.chars.iterator
665 end
666 end
667 end
668 end
669
670 private class ReversePostfix
671 super IndexedIterator[RopeNode]
672
673 var target: Rope
674
675 var pos: Int
676
677 var min = 0
678
679 var stack = new List[IteratorElement]
680
681 init from(tgt: Rope, pos: Int)
682 do
683 self.pos = pos
684 target = tgt
685 if pos < 0 or pos >= tgt.length then return
686 var path = tgt.node_at(pos)
687 self.pos -= path.offset
688 for i in path.stack do
689 var elemt = new IteratorElement(i.node)
690 elemt.right = true
691 if i.left then
692 elemt.left = true
693 end
694 stack.push elemt
695 end
696 stack.push(new IteratorElement(path.leaf))
697 stack.last.done = true
698 end
699
700 redef fun item do
701 assert is_ok
702 return stack.last.node
703 end
704
705 redef fun is_ok do return not stack.is_empty
706
707 redef fun index do return pos
708
709 redef fun next
710 do
711 if stack.is_empty then return
712 if pos < min then
713 stack.clear
714 return
715 end
716 var lst = stack.last
717 if lst.done then
718 stack.pop
719 next
720 return
721 end
722 if not lst.right then
723 var nod = lst.node.as(Concat)
724 var rgt = nod.right
725 lst.right = true
726 if rgt != null then
727 stack.push(new IteratorElement(rgt))
728 next
729 return
730 end
731 end
732 if not lst.left then
733 var nod = lst.node.as(Concat)
734 var lft = nod.left
735 lst.left = true
736 if lft != null then
737 stack.push(new IteratorElement(lft))
738 next
739 return
740 end
741 end
742 if lst.node isa Leaf then pos -= lst.node.length
743 lst.done = true
744 end
745 end
746
747 private class ReverseLeavesIterator
748 super IndexedIterator[Leaf]
749
750 var nodes: ReversePostfix
751
752 init(tgt: Rope, from: Int)
753 do
754 nodes = tgt.reverse_postfix(from)
755 end
756
757 redef fun is_ok do return nodes.is_ok
758
759 redef fun item do
760 assert is_ok
761 return nodes.item.as(Leaf)
762 end
763
764 redef fun next do
765 while nodes.is_ok do
766 nodes.next
767 if nodes.is_ok then if nodes.item isa Leaf then break
768 end
769 end
770
771 redef fun index do return nodes.index
772
773 end
774
775 private class ReverseSubstringsIterator
776 super IndexedIterator[Text]
777
778 var leaves: ReverseLeavesIterator
779
780 var pos: Int
781
782 var str: Text
783
784 init(tgt: Rope, from: Int)
785 do
786 leaves = tgt.reverse_leaves(from)
787 pos = from
788 if not leaves.is_ok then return
789 str = leaves.item.str
790 make_substring
791 end
792
793 fun make_substring
794 do
795 if pos >= (leaves.index + str.length - 1) then return
796 str = str.substring(0, (pos - leaves.index + 1))
797 end
798
799 redef fun is_ok do return leaves.is_ok
800
801 redef fun item do
802 assert is_ok
803 return str
804 end
805
806 redef fun index do return pos
807
808 redef fun next do
809 pos -= str.length
810 leaves.next
811 if not leaves.is_ok then return
812 str = leaves.item.str
813 make_substring
814 end
815 end
816
817 private class ReverseRopeCharIterator
818 super IndexedIterator[Char]
819
820 var substrs: IndexedIterator[Text]
821
822 var pos: Int
823
824 var subiter: IndexedIterator[Char]
825
826 init(tgt: Rope, from: Int)
827 do
828 substrs = tgt.reverse_substrings_from(from)
829 if not substrs.is_ok then
830 pos = -1
831 return
832 end
833 subiter = substrs.item.chars.reverse_iterator
834 pos = from
835 end
836
837 redef fun is_ok do return pos >= 0
838
839 redef fun item do
840 assert is_ok
841 return subiter.item
842 end
843
844 redef fun index do return pos
845
846 redef fun next do
847 pos -= 1
848 if subiter.is_ok then subiter.next
849 if not subiter.is_ok then
850 if substrs.is_ok then substrs.next
851 if substrs.is_ok then subiter = substrs.item.chars.reverse_iterator
852 end
853 end
854 end
855