lib/standard/ropes: Added to_leaf method on RopeNode.
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47
48 # Transforms the current node to a Leaf.
49 # This might be costly to invoke since this produces a FlatString concatenation.
50 # Can be used internally to limit the growth of the Rope when working with small leaves.
51 fun to_leaf: Leaf is abstract
52 end
53
54 # Node that represents a concatenation between two nodes (of any RopeNode type)
55 private class Concat
56 super RopeNode
57
58 # Left child of the node
59 var left: nullable RopeNode
60 # Right child of the node
61 var right: nullable RopeNode
62
63 init(l: nullable RopeNode, r: nullable RopeNode)
64 do
65 left = l
66 right = r
67 if l != null then length += l.length
68 if r != null then length += r.length
69 end
70
71 redef fun to_leaf
72 do
73 if left == null then
74 if right == null then return new StringLeaf("".as(FlatString))
75 return right.to_leaf
76 end
77 if right == null then return left.as(not null).to_leaf
78 return new StringLeaf((left.to_leaf.str.as(FlatString) + right.to_leaf.str.as(FlatString)).as(FlatString))
79 end
80 end
81
82 # Leaf of a Rope, contains a FlatString
83 private abstract class Leaf
84 super RopeNode
85
86 # Encapsulated FlatString in the leaf node
87 var str: FlatText
88
89 end
90
91 private class StringLeaf
92 super Leaf
93
94 init(val: FlatString) do
95 self.str = val
96 length = str.length
97 end
98
99 redef fun to_leaf do return self
100 end
101
102 # Basic structure, binary tree with a root node.
103 #
104 # Also shared services by subsequent implementations.
105 abstract class Rope
106 super Text
107
108 # Root node, entry point of a Rope.
109 private var root: RopeNode
110
111 # Cached version of self as a flat String
112 private var str_representation: nullable NativeString = null
113
114 # Empty Rope
115 init do from("")
116
117 # Creates a new Rope with `s` as root
118 init from(s: String) do
119 if s isa RopeString then root = s.root else root = new StringLeaf(s.as(FlatString))
120 end
121
122 private init from_root(r: RopeNode)
123 do
124 root = r
125 end
126
127 redef fun length do return root.length
128
129 # Iterator on the nodes of the rope, in forward postfix order
130 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
131
132 # Iterator on the leaves of the rope, forward order
133 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
134
135 # Iterator on the substrings from 0, in forward order
136 redef fun substrings do return new SubstringsIterator(self, 0)
137
138 # Iterator on the substrings, starting at position `from`, in forward order
139 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
140
141 # Iterator on the nodes of the rope, in backwards postfix order
142 private fun reverse_postfix(from: Int): ReversePostfix do return new ReversePostfix.from(self, from)
143
144 # Iterator on the leaves of the rope, backwards order
145 private fun reverse_leaves(from: Int): ReverseLeavesIterator do return new ReverseLeavesIterator(self,from)
146
147 # Iterator on the substrings, in reverse order
148 fun reverse_substrings: IndexedIterator[Text] do return new ReverseSubstringsIterator(self, length-1)
149
150 # Iterator on the substrings, in reverse order, starting iteration at position `from`
151 fun reverse_substrings_from(from: Int): IndexedIterator[Text] do return new ReverseSubstringsIterator(self, from)
152
153 redef fun output
154 do
155 for i in substrings do
156 i.output
157 end
158 end
159
160 redef fun to_cstring
161 do
162 if str_representation != null then return str_representation.as(not null)
163
164 var native_final_str = calloc_string(length + 1)
165
166 native_final_str[length] = '\0'
167
168 if self.length == 0 then
169 str_representation = native_final_str
170 return native_final_str
171 end
172
173 var offset = 0
174
175 for i in substrings do
176 var str = i.flatten
177 if str isa FlatString then str.items.copy_to(native_final_str, str.length, str.index_from, offset)
178 offset += i.length
179 end
180
181 str_representation = native_final_str
182
183 return native_final_str
184 end
185
186 # Path to the Leaf for `position`
187 private fun node_at(position: Int): Path
188 do
189 assert position >= 0 and position < length
190 return get_node_from(root.as(not null), 0, position, new List[PathElement])
191 end
192
193 # Builds the path to Leaf at position `seek_pos`
194 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
195 do
196 assert curr_pos >= 0
197 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
198 node = node.as(Concat)
199
200 if node.left != null then
201 var next_pos = curr_pos + node.left.length
202 stack.add(new PathElement(node))
203 if next_pos > seek_pos then
204 stack.last.left = true
205 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
206 end
207 stack.last.right = true
208 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
209 else
210 var vis = new PathElement(node)
211 vis.right = true
212 stack.add(vis)
213 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
214 end
215 end
216
217 redef fun ==(o)
218 do
219 if not o isa Text then return false
220 if o.length != self.length then return false
221 var oit = o.chars.iterator
222 for i in self.chars.iterator do
223 if i != oit.item then return false
224 oit.next
225 end
226 return true
227 end
228 end
229
230 # Rope that cannot be modified
231 class RopeString
232 super Rope
233 super String
234
235 redef fun to_s do return self
236
237 redef fun empty do return once new RopeString.from("")
238
239 redef var chars: SequenceRead[Char] = new RopeStringChars(self)
240
241 redef fun reversed
242 do
243 var ret = empty.as(RopeString)
244 for i in substrings do
245 ret = ret.prepend(i.reversed.to_s).as(RopeString)
246 end
247 return ret
248 end
249
250 redef fun to_upper
251 do
252 var ret = empty
253 for i in substrings do
254 ret += i.to_upper
255 end
256 return ret
257 end
258
259 redef fun to_lower
260 do
261 var ret = empty
262 for i in substrings do
263 ret += i.to_lower
264 end
265 return ret
266 end
267
268 redef fun +(o) do return insert_at(o.to_s, length)
269
270 redef fun *(n)
271 do
272 var ret = new RopeString.from("")
273 for i in [0..n[ do
274 ret = (ret + self).as(RopeString)
275 end
276 return ret
277 end
278
279 # Inserts a String `str` at position `pos`
280 redef fun insert_at(str, pos)
281 do
282 if str.length == 0 then return self
283 if self.length == 0 then return new RopeString.from(str)
284
285 assert pos >= 0 and pos <= length
286
287 if pos == length then return append(str).as(RopeString)
288
289 var path = node_at(pos)
290
291 var last_concat: Concat
292
293 if path.offset == 0 then
294 if str isa FlatString then
295 last_concat = new Concat(new StringLeaf(str), path.leaf)
296 else
297 last_concat = new Concat(str.as(RopeString).root, path.leaf)
298 end
299 else if path.offset == path.leaf.length then
300 if str isa FlatString then
301 last_concat = new Concat(path.leaf, new StringLeaf(str))
302 else
303 last_concat = new Concat(path.leaf, str.as(RopeString).root)
304 end
305 else
306 var s = path.leaf.str
307 var l_half = s.substring(0, s.length - path.offset)
308 var r_half = s.substring_from(s.length - path.offset)
309 var cct: Concat
310 var ll = new StringLeaf(l_half.as(FlatString))
311 if str isa FlatString then
312 cct = new Concat(ll, new StringLeaf(str))
313 else
314 cct = new Concat(ll, str.as(RopeString).root)
315 end
316 last_concat = new Concat(cct, new StringLeaf(r_half.as(FlatString)))
317 end
318
319 for i in path.stack.reverse_iterator do
320 if i.left then
321 last_concat = new Concat(last_concat, i.node.right)
322 else
323 last_concat = new Concat(i.node.left, last_concat)
324 end
325 end
326
327 return new RopeString.from_root(last_concat)
328 end
329
330 # Adds `s` at the beginning of self
331 redef fun prepend(s) do return insert_at(s, 0)
332
333 # Adds `s` at the end of self
334 redef fun append(s)
335 do
336 if self.is_empty then return s
337 return new RopeString.from_root(append_to_path(root,s))
338 end
339
340 # Builds a new path from root to the rightmost node with s appended
341 private fun append_to_path(node: RopeNode, s: String): RopeNode
342 do
343 var cct: Concat
344 if node isa Leaf then
345 if s isa FlatString then
346 cct = new Concat(node, new StringLeaf(s))
347 else
348 cct = new Concat(node, s.as(RopeString).root)
349 end
350 else
351 var n = node.as(Concat)
352 var right = n.right
353 var lft = n.left.as(not null)
354 if right == null then
355 if s isa FlatString then
356 cct = new Concat(lft, new StringLeaf(s))
357 else
358 cct = new Concat(lft, s.as(RopeString).root)
359 end
360 else
361 cct = new Concat(lft, append_to_path(right, s))
362 end
363 end
364 return cct
365 end
366
367 # O(log(n))
368 #
369 # var rope = new RopeString.from("abcd")
370 # assert rope.substring(1, 2) == "bc"
371 # assert rope.substring(-1, 2) == "a"
372 # assert rope.substring(1, 0) == ""
373 # assert rope.substring(2, 5) == "cd"
374 #
375 redef fun substring(pos, len)
376 do
377 if pos < 0 then
378 len += pos
379 pos = 0
380 end
381
382 if pos + len > length then len = length - pos
383
384 if len <= 0 then return new RopeString.from("")
385
386 var path = node_at(pos)
387
388 var lf = path.leaf
389 var offset = path.offset
390
391 if path.leaf.str.length - offset > len then lf = new StringLeaf(lf.str.substring(offset,len).as(FlatString)) else lf = new StringLeaf(lf.str.substring_from(offset).as(FlatString))
392
393 var nod: RopeNode = lf
394
395 for i in path.stack.reverse_iterator do
396 if i.right then continue
397 nod = new Concat(nod, i.node.right)
398 end
399
400 var ret = new RopeString
401 ret.root = nod
402
403 path = ret.node_at(len-1)
404
405 offset = path.offset
406 nod = new StringLeaf(path.leaf.str.substring(0, offset+1).as(FlatString))
407
408 for i in path.stack.reverse_iterator do
409 if i.left then continue
410 nod = new Concat(i.node.left, nod)
411 end
412
413 ret.root = nod
414
415 return ret
416 end
417 end
418
419 redef class FlatString
420
421 redef fun append(s) do return (new RopeString.from(self)) + s
422
423 redef fun prepend(s) do return (new RopeString.from(self)).prepend(s)
424
425 redef fun insert_at(s, pos)
426 do
427 if pos == 0 then return prepend(s)
428 if pos == length then return append(s)
429
430 var l = substring(0,pos)
431 var r = substring_from(pos)
432 var ret: String = new RopeString.from(l)
433 ret += s
434 return ret + r
435 end
436
437 end
438
439 private class RopeStringChars
440 super SequenceRead[Char]
441
442 var tgt: Rope
443
444 redef fun [](pos)
445 do
446 assert pos < tgt.length
447 var path = tgt.node_at(pos)
448 return path.leaf.str.chars[path.offset]
449 end
450
451 redef fun iterator do return iterator_from(0)
452
453 redef fun iterator_from(pos) do return new RopeCharIterator(tgt, pos)
454
455 redef fun reverse_iterator do return reverse_iterator_from(tgt.length-1)
456
457 redef fun reverse_iterator_from(pos) do return new ReverseRopeCharIterator(tgt, pos)
458 end
459
460 # Used to iterate on a Rope
461 private class IteratorElement
462
463 init(e: RopeNode)
464 do
465 if e isa Leaf then
466 left = true
467 right = true
468 end
469 node = e
470 end
471
472 # The node being visited
473 var node: RopeNode
474 # If the node has a left child, was it visited ?
475 var left = false
476 # If the node has a right child, was it visited ?
477 var right = false
478 # Was the current node visited ?
479 var done = false
480 end
481
482 # Simple Postfix iterator on the nodes of a Rope
483 private class Postfix
484 super IndexedIterator[RopeNode]
485
486 # Target Rope to iterate on
487 var target: Rope
488
489 # Current position in Rope
490 var pos: Int
491
492 # Visited nodes
493 var stack = new List[IteratorElement]
494
495 init from(tgt: Rope, pos: Int)
496 do
497 self.target = tgt
498 self.pos = pos
499 if pos < 0 or pos >= tgt.length then return
500 var path = tgt.node_at(pos)
501 self.pos -= path.offset
502 for i in path.stack do
503 var item = new IteratorElement(i.node)
504 item.left = true
505 if i.right then item.right = true
506 stack.push item
507 end
508 var item = new IteratorElement(path.leaf)
509 item.done = true
510 stack.push item
511 end
512
513 redef fun item
514 do
515 assert is_ok
516 return stack.last.node
517 end
518
519 redef fun is_ok do return not stack.is_empty
520
521 redef fun index do return pos
522
523 redef fun next do
524 if stack.is_empty then return
525 if pos > target.length-1 then
526 stack.clear
527 return
528 end
529 var lst = stack.last
530 if lst.done then
531 if lst.node isa Leaf then
532 pos += lst.node.length
533 end
534 stack.pop
535 next
536 return
537 end
538 if not lst.left then
539 lst.left = true
540 var nod = lst.node
541 if nod isa Concat and nod.left != null then
542 stack.push(new IteratorElement(nod.left.as(not null)))
543 next
544 return
545 end
546 end
547 if not lst.right then
548 lst.right = true
549 var nod = lst.node
550 if nod isa Concat and nod.right != null then
551 stack.push(new IteratorElement(nod.right.as(not null)))
552 next
553 return
554 end
555 end
556 lst.done = true
557 end
558 end
559
560 # Iterates on the leaves (substrings) of the Rope
561 class LeavesIterator
562 super IndexedIterator[Leaf]
563
564 private var nodes: Postfix
565
566 init(tgt: Rope, pos: Int)
567 do
568 nodes = tgt.postfix(pos)
569 end
570
571 redef fun is_ok do return nodes.is_ok
572
573 redef fun item
574 do
575 assert is_ok
576 return nodes.item.as(Leaf)
577 end
578
579 redef fun index do return nodes.index
580
581 redef fun next
582 do
583 while nodes.is_ok do
584 nodes.next
585 if nodes.is_ok and nodes.item isa Leaf then break
586 end
587 end
588 end
589
590 # Uses the leaves and calculates a new substring on each iteration
591 class SubstringsIterator
592 super IndexedIterator[Text]
593
594 private var nodes: IndexedIterator[Leaf]
595
596 # Current position in Rope
597 var pos: Int
598
599 # Current substring, computed from the current Leaf and indexes
600 var substring: Text
601
602 init(tgt: Rope, pos: Int)
603 do
604 nodes = tgt.leaves(pos)
605 self.pos = pos
606 if pos < 0 or pos >= tgt.length then return
607 make_substring
608 end
609
610 # Compute the bounds of the current substring and makes the substring
611 private fun make_substring
612 do
613 substring = nodes.item.str
614 var min = 0
615 var length = substring.length
616 if nodes.index < pos then
617 min = pos - nodes.index
618 end
619 substring = substring.substring(min, length)
620 end
621
622 redef fun is_ok do return nodes.is_ok
623
624 redef fun item
625 do
626 assert is_ok
627 return substring
628 end
629
630 redef fun index do return pos
631
632 redef fun next
633 do
634 pos += substring.length
635 nodes.next
636 if nodes.is_ok then make_substring
637 end
638
639 end
640
641 class RopeCharIterator
642 super IndexedIterator[Char]
643
644 var substrings: IndexedIterator[Text]
645
646 var pos: Int
647
648 var max: Int
649
650 var substr_iter: IndexedIterator[Char]
651
652 init(tgt: Rope, from: Int)
653 do
654 substrings = tgt.substrings_from(from)
655 max = tgt.length - 1
656 if not substrings.is_ok then
657 pos = tgt.length
658 return
659 end
660 pos = from
661 substr_iter = substrings.item.chars.iterator
662 end
663
664 redef fun item do return substr_iter.item
665
666 redef fun is_ok do return pos <= max
667
668 redef fun index do return pos
669
670 redef fun next
671 do
672 pos += 1
673 if substr_iter.is_ok then
674 substr_iter.next
675 end
676 if not substr_iter.is_ok then
677 substrings.next
678 if substrings.is_ok then
679 substr_iter = substrings.item.chars.iterator
680 end
681 end
682 end
683 end
684
685 private class ReversePostfix
686 super IndexedIterator[RopeNode]
687
688 var target: Rope
689
690 var pos: Int
691
692 var min = 0
693
694 var stack = new List[IteratorElement]
695
696 init from(tgt: Rope, pos: Int)
697 do
698 self.pos = pos
699 target = tgt
700 if pos < 0 or pos >= tgt.length then return
701 var path = tgt.node_at(pos)
702 self.pos -= path.offset
703 for i in path.stack do
704 var elemt = new IteratorElement(i.node)
705 elemt.right = true
706 if i.left then
707 elemt.left = true
708 end
709 stack.push elemt
710 end
711 stack.push(new IteratorElement(path.leaf))
712 stack.last.done = true
713 end
714
715 redef fun item do
716 assert is_ok
717 return stack.last.node
718 end
719
720 redef fun is_ok do return not stack.is_empty
721
722 redef fun index do return pos
723
724 redef fun next
725 do
726 if stack.is_empty then return
727 if pos < min then
728 stack.clear
729 return
730 end
731 var lst = stack.last
732 if lst.done then
733 stack.pop
734 next
735 return
736 end
737 if not lst.right then
738 var nod = lst.node.as(Concat)
739 var rgt = nod.right
740 lst.right = true
741 if rgt != null then
742 stack.push(new IteratorElement(rgt))
743 next
744 return
745 end
746 end
747 if not lst.left then
748 var nod = lst.node.as(Concat)
749 var lft = nod.left
750 lst.left = true
751 if lft != null then
752 stack.push(new IteratorElement(lft))
753 next
754 return
755 end
756 end
757 if lst.node isa Leaf then pos -= lst.node.length
758 lst.done = true
759 end
760 end
761
762 private class ReverseLeavesIterator
763 super IndexedIterator[Leaf]
764
765 var nodes: ReversePostfix
766
767 init(tgt: Rope, from: Int)
768 do
769 nodes = tgt.reverse_postfix(from)
770 end
771
772 redef fun is_ok do return nodes.is_ok
773
774 redef fun item do
775 assert is_ok
776 return nodes.item.as(Leaf)
777 end
778
779 redef fun next do
780 while nodes.is_ok do
781 nodes.next
782 if nodes.is_ok then if nodes.item isa Leaf then break
783 end
784 end
785
786 redef fun index do return nodes.index
787
788 end
789
790 private class ReverseSubstringsIterator
791 super IndexedIterator[Text]
792
793 var leaves: ReverseLeavesIterator
794
795 var pos: Int
796
797 var str: Text
798
799 init(tgt: Rope, from: Int)
800 do
801 leaves = tgt.reverse_leaves(from)
802 pos = from
803 if not leaves.is_ok then return
804 str = leaves.item.str
805 make_substring
806 end
807
808 fun make_substring
809 do
810 if pos >= (leaves.index + str.length - 1) then return
811 str = str.substring(0, (pos - leaves.index + 1))
812 end
813
814 redef fun is_ok do return leaves.is_ok
815
816 redef fun item do
817 assert is_ok
818 return str
819 end
820
821 redef fun index do return pos
822
823 redef fun next do
824 pos -= str.length
825 leaves.next
826 if not leaves.is_ok then return
827 str = leaves.item.str
828 make_substring
829 end
830 end
831
832 private class ReverseRopeCharIterator
833 super IndexedIterator[Char]
834
835 var substrs: IndexedIterator[Text]
836
837 var pos: Int
838
839 var subiter: IndexedIterator[Char]
840
841 init(tgt: Rope, from: Int)
842 do
843 substrs = tgt.reverse_substrings_from(from)
844 if not substrs.is_ok then
845 pos = -1
846 return
847 end
848 subiter = substrs.item.chars.reverse_iterator
849 pos = from
850 end
851
852 redef fun is_ok do return pos >= 0
853
854 redef fun item do
855 assert is_ok
856 return subiter.item
857 end
858
859 redef fun index do return pos
860
861 redef fun next do
862 pos -= 1
863 if subiter.is_ok then subiter.next
864 if not subiter.is_ok then
865 if substrs.is_ok then substrs.next
866 if substrs.is_ok then subiter = substrs.item.chars.reverse_iterator
867 end
868 end
869 end
870