ebc33771107e1911d88a8c3eeb769c6b9a1a358c
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47
48 # Transforms the current node to a Leaf.
49 # This might be costly to invoke since this produces a FlatString concatenation.
50 # Can be used internally to limit the growth of the Rope when working with small leaves.
51 fun to_leaf: Leaf is abstract
52 end
53
54 # Node that represents a concatenation between two nodes (of any RopeNode type)
55 private class Concat
56 super RopeNode
57
58 # Left child of the node
59 var left: nullable RopeNode
60 # Right child of the node
61 var right: nullable RopeNode
62
63 init(l: nullable RopeNode, r: nullable RopeNode)
64 do
65 left = l
66 right = r
67 if l != null then length += l.length
68 if r != null then length += r.length
69 end
70
71 redef fun to_leaf
72 do
73 if left == null then
74 if right == null then return new StringLeaf("".as(FlatString))
75 return right.to_leaf
76 end
77 if right == null then return left.as(not null).to_leaf
78 return new StringLeaf((left.to_leaf.str.as(FlatString) + right.to_leaf.str.as(FlatString)).as(FlatString))
79 end
80 end
81
82 # Leaf of a Rope, contains a FlatString
83 private abstract class Leaf
84 super RopeNode
85
86 # Encapsulated FlatString in the leaf node
87 var str: FlatText
88
89 end
90
91 private class StringLeaf
92 super Leaf
93
94 init(val: FlatString) do
95 self.str = val
96 length = str.length
97 end
98
99 redef fun to_leaf do return self
100 end
101
102 # Used as a cache when using indexed access to a substring in the Rope
103 private class LeafCache
104 # Cached leaf
105 var leaf: Leaf
106 # Position in Rope
107 var pos: Int
108 end
109
110 # Basic structure, binary tree with a root node.
111 #
112 # Also shared services by subsequent implementations.
113 abstract class Rope
114 super Text
115
116 # Root node, entry point of a Rope.
117 private var root: RopeNode
118
119 # Cached version of self as a flat String
120 private var str_representation: nullable NativeString = null
121
122 private var leaf_cache: nullable LeafCache = null
123
124 # Empty Rope
125 init do from("")
126
127 # Creates a new Rope with `s` as root
128 init from(s: String) do
129 if s isa RopeString then root = s.root else root = new StringLeaf(s.as(FlatString))
130 end
131
132 private init from_root(r: RopeNode)
133 do
134 root = r
135 end
136
137 redef fun length do return root.length
138
139 # Iterator on the nodes of the rope, in forward postfix order
140 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
141
142 # Iterator on the leaves of the rope, forward order
143 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
144
145 # Iterator on the substrings from 0, in forward order
146 redef fun substrings do return new SubstringsIterator(self, 0)
147
148 # Iterator on the substrings, starting at position `from`, in forward order
149 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
150
151 # Iterator on the nodes of the rope, in backwards postfix order
152 private fun reverse_postfix(from: Int): ReversePostfix do return new ReversePostfix.from(self, from)
153
154 # Iterator on the leaves of the rope, backwards order
155 private fun reverse_leaves(from: Int): ReverseLeavesIterator do return new ReverseLeavesIterator(self,from)
156
157 # Iterator on the substrings, in reverse order
158 fun reverse_substrings: IndexedIterator[Text] do return new ReverseSubstringsIterator(self, length-1)
159
160 # Iterator on the substrings, in reverse order, starting iteration at position `from`
161 fun reverse_substrings_from(from: Int): IndexedIterator[Text] do return new ReverseSubstringsIterator(self, from)
162
163 redef fun output
164 do
165 for i in substrings do
166 i.output
167 end
168 end
169
170 redef fun to_cstring
171 do
172 if str_representation != null then return str_representation.as(not null)
173
174 var native_final_str = calloc_string(length + 1)
175
176 native_final_str[length] = '\0'
177
178 if self.length == 0 then
179 str_representation = native_final_str
180 return native_final_str
181 end
182
183 var offset = 0
184
185 for i in substrings do
186 var str = i.flatten
187 if str isa FlatString then str.items.copy_to(native_final_str, str.length, str.index_from, offset)
188 offset += i.length
189 end
190
191 str_representation = native_final_str
192
193 return native_final_str
194 end
195
196 # Path to the Leaf for `position`
197 private fun node_at(position: Int): Path
198 do
199 assert position >= 0 and position <= length
200 if position == length then
201 var st = new List[PathElement]
202 stack_to_end(root,st)
203 if not st.is_empty then
204 var lst = st.last
205 var lf = lst.node.right
206 if lf != null then
207 return new Path(lf.as(Leaf), lf.length, st)
208 else
209 lf = lst.node.left
210 return new Path(lf.as(Leaf), lf.length, st)
211 end
212 else
213 return new Path(root.as(Leaf), length, st)
214 end
215 end
216 return get_node_from(root, 0, position, new List[PathElement])
217 end
218
219 # Special case for when the required pos is length
220 private fun stack_to_end(nod: RopeNode, st: List[PathElement])
221 do
222 if nod isa Leaf then return
223 var n = nod.as(Concat)
224 var r = n.right
225 var ele = new PathElement(n)
226 ele.right = true
227 st.push(ele)
228 if r != null then
229 stack_to_end(r, st)
230 end
231 return
232 end
233
234 # Builds the path to Leaf at position `seek_pos`
235 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
236 do
237 assert curr_pos >= 0
238 if node isa Leaf then
239 self.leaf_cache = new LeafCache(node, curr_pos)
240 return new Path(node, seek_pos - curr_pos, stack)
241 end
242 node = node.as(Concat)
243
244 if node.left != null then
245 var next_pos = curr_pos + node.left.length
246 stack.add(new PathElement(node))
247 if next_pos > seek_pos then
248 stack.last.left = true
249 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
250 end
251 stack.last.right = true
252 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
253 else
254 var vis = new PathElement(node)
255 vis.right = true
256 stack.add(vis)
257 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
258 end
259 end
260
261 redef fun ==(o)
262 do
263 if not o isa Text then return false
264 if o.length != self.length then return false
265 var oit = o.chars.iterator
266 for i in self.chars.iterator do
267 if i != oit.item then return false
268 oit.next
269 end
270 return true
271 end
272 end
273
274 # Rope that cannot be modified
275 class RopeString
276 super Rope
277 super String
278
279 redef fun to_s do return self
280
281 redef fun empty do return once new RopeString.from("")
282
283 redef var chars: SequenceRead[Char] = new RopeStringChars(self)
284
285 redef fun reversed
286 do
287 var ret = empty
288 for i in substrings do
289 ret = i.as(String).reversed + ret
290 end
291 return ret
292 end
293
294 redef fun to_upper
295 do
296 var ret = empty
297 for i in substrings do
298 ret += i.as(String).to_upper
299 end
300 return ret
301 end
302
303 redef fun to_lower
304 do
305 var ret = empty
306 for i in substrings do
307 ret += i.as(String).to_lower
308 end
309 return ret
310 end
311
312 redef fun +(o) do
313 if self.length == 0 then return o.to_s
314 if o.length == 0 then return self
315 var str = o.to_s
316 if str isa FlatString then
317 return new RopeString.from_root(new Concat(root, new StringLeaf(str)))
318 else if str isa RopeString then
319 return new RopeString.from_root(new Concat(root, str.root))
320 else
321 abort
322 end
323 end
324
325 redef fun *(n)
326 do
327 var ret = new RopeString.from("")
328 for i in [0..n[ do
329 ret = (ret + self).as(RopeString)
330 end
331 return ret
332 end
333
334 # Inserts a String `str` at position `pos`
335 redef fun insert_at(str, pos)
336 do
337 if str.length == 0 then return self
338 if self.length == 0 then return new RopeString.from(str)
339
340 assert pos >= 0 and pos <= length
341
342 var path = node_at(pos)
343
344 var last_concat: Concat
345
346 if path.offset == 0 then
347 if str isa FlatString then
348 last_concat = new Concat(new StringLeaf(str), path.leaf)
349 else
350 last_concat = new Concat(str.as(RopeString).root, path.leaf)
351 end
352 else if path.offset == path.leaf.length then
353 if str isa FlatString then
354 last_concat = new Concat(path.leaf, new StringLeaf(str))
355 else
356 last_concat = new Concat(path.leaf, str.as(RopeString).root)
357 end
358 else
359 var s = path.leaf.str
360 var l_half = s.substring(0, s.length - path.offset)
361 var r_half = s.substring_from(s.length - path.offset)
362 var cct: Concat
363 var ll = new StringLeaf(l_half.as(FlatString))
364 if str isa FlatString then
365 cct = new Concat(ll, new StringLeaf(str))
366 else
367 cct = new Concat(ll, str.as(RopeString).root)
368 end
369 last_concat = new Concat(cct, new StringLeaf(r_half.as(FlatString)))
370 end
371
372 for i in path.stack.reverse_iterator do
373 if i.left then
374 last_concat = new Concat(last_concat, i.node.right)
375 else
376 last_concat = new Concat(i.node.left, last_concat)
377 end
378 end
379
380 return new RopeString.from_root(last_concat)
381 end
382
383 # O(log(n))
384 #
385 # var rope = new RopeString.from("abcd")
386 # assert rope.substring(1, 2) == "bc"
387 # assert rope.substring(-1, 2) == "a"
388 # assert rope.substring(1, 0) == ""
389 # assert rope.substring(2, 5) == "cd"
390 #
391 redef fun substring(pos, len)
392 do
393 if pos < 0 then
394 len += pos
395 pos = 0
396 end
397
398 if pos + len > length then len = length - pos
399
400 if len <= 0 then return new RopeString.from("")
401
402 var path = node_at(pos)
403
404 var lf = path.leaf
405 var offset = path.offset
406
407 if path.leaf.str.length - offset > len then lf = new StringLeaf(lf.str.substring(offset,len).as(FlatString)) else lf = new StringLeaf(lf.str.substring_from(offset).as(FlatString))
408
409 var nod: RopeNode = lf
410
411 for i in path.stack.reverse_iterator do
412 if i.right then continue
413 nod = new Concat(nod, i.node.right)
414 end
415
416 var ret = new RopeString
417 ret.root = nod
418
419 path = ret.node_at(len-1)
420
421 offset = path.offset
422 nod = new StringLeaf(path.leaf.str.substring(0, offset+1).as(FlatString))
423
424 for i in path.stack.reverse_iterator do
425 if i.left then continue
426 nod = new Concat(i.node.left, nod)
427 end
428
429 ret.root = nod
430
431 return ret
432 end
433 end
434
435 redef class FlatString
436
437 redef fun insert_at(s, pos)
438 do
439
440 if pos == 0 then
441 var r = new RopeString.from(s)
442 return r + self
443 end
444 if pos == length then
445 var r = new RopeString.from(self)
446 return r + s
447 end
448
449 var l = substring(0,pos)
450 var r = substring_from(pos)
451 var ret: String = new RopeString.from(l)
452 ret += s
453 return ret + r
454 end
455
456 end
457
458 private class RopeStringChars
459 super SequenceRead[Char]
460
461 var tgt: Rope
462
463 redef fun [](pos)
464 do
465 assert pos < tgt.length
466 if tgt.leaf_cache != null and pos >= tgt.leaf_cache.pos and (tgt.leaf_cache.pos + tgt.leaf_cache.leaf.length) > pos then return tgt.leaf_cache.leaf.str.chars[pos - tgt.leaf_cache.pos]
467 var path = tgt.node_at(pos)
468 return path.leaf.str.chars[path.offset]
469 end
470
471 redef fun iterator do return iterator_from(0)
472
473 redef fun iterator_from(pos) do return new RopeCharIterator(tgt, pos)
474
475 redef fun reverse_iterator do return reverse_iterator_from(tgt.length-1)
476
477 redef fun reverse_iterator_from(pos) do return new ReverseRopeCharIterator(tgt, pos)
478 end
479
480 # Used to iterate on a Rope
481 private class IteratorElement
482
483 init(e: RopeNode)
484 do
485 if e isa Leaf then
486 left = true
487 right = true
488 end
489 node = e
490 end
491
492 # The node being visited
493 var node: RopeNode
494 # If the node has a left child, was it visited ?
495 var left = false
496 # If the node has a right child, was it visited ?
497 var right = false
498 # Was the current node visited ?
499 var done = false
500 end
501
502 # Simple Postfix iterator on the nodes of a Rope
503 private class Postfix
504 super IndexedIterator[RopeNode]
505
506 # Target Rope to iterate on
507 var target: Rope
508
509 # Current position in Rope
510 var pos: Int
511
512 # Visited nodes
513 var stack = new List[IteratorElement]
514
515 init from(tgt: Rope, pos: Int)
516 do
517 self.target = tgt
518 self.pos = pos
519 if pos < 0 or pos >= tgt.length then return
520 var path = tgt.node_at(pos)
521 self.pos -= path.offset
522 for i in path.stack do
523 var item = new IteratorElement(i.node)
524 item.left = true
525 if i.right then item.right = true
526 stack.push item
527 end
528 var item = new IteratorElement(path.leaf)
529 item.done = true
530 stack.push item
531 end
532
533 redef fun item
534 do
535 assert is_ok
536 return stack.last.node
537 end
538
539 redef fun is_ok do return not stack.is_empty
540
541 redef fun index do return pos
542
543 redef fun next do
544 if stack.is_empty then return
545 if pos > target.length-1 then
546 stack.clear
547 return
548 end
549 var lst = stack.last
550 if lst.done then
551 if lst.node isa Leaf then
552 pos += lst.node.length
553 end
554 stack.pop
555 next
556 return
557 end
558 if not lst.left then
559 lst.left = true
560 var nod = lst.node
561 if nod isa Concat and nod.left != null then
562 stack.push(new IteratorElement(nod.left.as(not null)))
563 next
564 return
565 end
566 end
567 if not lst.right then
568 lst.right = true
569 var nod = lst.node
570 if nod isa Concat and nod.right != null then
571 stack.push(new IteratorElement(nod.right.as(not null)))
572 next
573 return
574 end
575 end
576 lst.done = true
577 end
578 end
579
580 # Iterates on the leaves (substrings) of the Rope
581 class LeavesIterator
582 super IndexedIterator[Leaf]
583
584 private var nodes: Postfix
585
586 init(tgt: Rope, pos: Int)
587 do
588 nodes = tgt.postfix(pos)
589 end
590
591 redef fun is_ok do return nodes.is_ok
592
593 redef fun item
594 do
595 assert is_ok
596 return nodes.item.as(Leaf)
597 end
598
599 redef fun index do return nodes.index
600
601 redef fun next
602 do
603 while nodes.is_ok do
604 nodes.next
605 if nodes.is_ok and nodes.item isa Leaf then break
606 end
607 end
608 end
609
610 # Uses the leaves and calculates a new substring on each iteration
611 class SubstringsIterator
612 super IndexedIterator[Text]
613
614 private var nodes: IndexedIterator[Leaf]
615
616 # Current position in Rope
617 var pos: Int
618
619 # Current substring, computed from the current Leaf and indexes
620 var substring: Text
621
622 init(tgt: Rope, pos: Int)
623 do
624 nodes = tgt.leaves(pos)
625 self.pos = pos
626 if pos < 0 or pos >= tgt.length then return
627 make_substring
628 end
629
630 # Compute the bounds of the current substring and makes the substring
631 private fun make_substring
632 do
633 substring = nodes.item.str
634 var min = 0
635 var length = substring.length
636 if nodes.index < pos then
637 min = pos - nodes.index
638 end
639 substring = substring.substring(min, length)
640 end
641
642 redef fun is_ok do return nodes.is_ok
643
644 redef fun item
645 do
646 assert is_ok
647 return substring
648 end
649
650 redef fun index do return pos
651
652 redef fun next
653 do
654 pos += substring.length
655 nodes.next
656 if nodes.is_ok then make_substring
657 end
658
659 end
660
661 class RopeCharIterator
662 super IndexedIterator[Char]
663
664 var substrings: IndexedIterator[Text]
665
666 var pos: Int
667
668 var max: Int
669
670 var substr_iter: IndexedIterator[Char]
671
672 init(tgt: Rope, from: Int)
673 do
674 substrings = tgt.substrings_from(from)
675 max = tgt.length - 1
676 if not substrings.is_ok then
677 pos = tgt.length
678 return
679 end
680 pos = from
681 substr_iter = substrings.item.chars.iterator
682 end
683
684 redef fun item do return substr_iter.item
685
686 redef fun is_ok do return pos <= max
687
688 redef fun index do return pos
689
690 redef fun next
691 do
692 pos += 1
693 if substr_iter.is_ok then
694 substr_iter.next
695 end
696 if not substr_iter.is_ok then
697 substrings.next
698 if substrings.is_ok then
699 substr_iter = substrings.item.chars.iterator
700 end
701 end
702 end
703 end
704
705 private class ReversePostfix
706 super IndexedIterator[RopeNode]
707
708 var target: Rope
709
710 var pos: Int
711
712 var min = 0
713
714 var stack = new List[IteratorElement]
715
716 init from(tgt: Rope, pos: Int)
717 do
718 self.pos = pos
719 target = tgt
720 if pos < 0 or pos >= tgt.length then return
721 var path = tgt.node_at(pos)
722 self.pos -= path.offset
723 for i in path.stack do
724 var elemt = new IteratorElement(i.node)
725 elemt.right = true
726 if i.left then
727 elemt.left = true
728 end
729 stack.push elemt
730 end
731 stack.push(new IteratorElement(path.leaf))
732 stack.last.done = true
733 end
734
735 redef fun item do
736 assert is_ok
737 return stack.last.node
738 end
739
740 redef fun is_ok do return not stack.is_empty
741
742 redef fun index do return pos
743
744 redef fun next
745 do
746 if stack.is_empty then return
747 if pos < min then
748 stack.clear
749 return
750 end
751 var lst = stack.last
752 if lst.done then
753 stack.pop
754 next
755 return
756 end
757 if not lst.right then
758 var nod = lst.node.as(Concat)
759 var rgt = nod.right
760 lst.right = true
761 if rgt != null then
762 stack.push(new IteratorElement(rgt))
763 next
764 return
765 end
766 end
767 if not lst.left then
768 var nod = lst.node.as(Concat)
769 var lft = nod.left
770 lst.left = true
771 if lft != null then
772 stack.push(new IteratorElement(lft))
773 next
774 return
775 end
776 end
777 if lst.node isa Leaf then pos -= lst.node.length
778 lst.done = true
779 end
780 end
781
782 private class ReverseLeavesIterator
783 super IndexedIterator[Leaf]
784
785 var nodes: ReversePostfix
786
787 init(tgt: Rope, from: Int)
788 do
789 nodes = tgt.reverse_postfix(from)
790 end
791
792 redef fun is_ok do return nodes.is_ok
793
794 redef fun item do
795 assert is_ok
796 return nodes.item.as(Leaf)
797 end
798
799 redef fun next do
800 while nodes.is_ok do
801 nodes.next
802 if nodes.is_ok then if nodes.item isa Leaf then break
803 end
804 end
805
806 redef fun index do return nodes.index
807
808 end
809
810 private class ReverseSubstringsIterator
811 super IndexedIterator[Text]
812
813 var leaves: ReverseLeavesIterator
814
815 var pos: Int
816
817 var str: Text
818
819 init(tgt: Rope, from: Int)
820 do
821 leaves = tgt.reverse_leaves(from)
822 pos = from
823 if not leaves.is_ok then return
824 str = leaves.item.str
825 make_substring
826 end
827
828 fun make_substring
829 do
830 if pos >= (leaves.index + str.length - 1) then return
831 str = str.substring(0, (pos - leaves.index + 1))
832 end
833
834 redef fun is_ok do return leaves.is_ok
835
836 redef fun item do
837 assert is_ok
838 return str
839 end
840
841 redef fun index do return pos
842
843 redef fun next do
844 pos -= str.length
845 leaves.next
846 if not leaves.is_ok then return
847 str = leaves.item.str
848 make_substring
849 end
850 end
851
852 private class ReverseRopeCharIterator
853 super IndexedIterator[Char]
854
855 var substrs: IndexedIterator[Text]
856
857 var pos: Int
858
859 var subiter: IndexedIterator[Char]
860
861 init(tgt: Rope, from: Int)
862 do
863 substrs = tgt.reverse_substrings_from(from)
864 if not substrs.is_ok then
865 pos = -1
866 return
867 end
868 subiter = substrs.item.chars.reverse_iterator
869 pos = from
870 end
871
872 redef fun is_ok do return pos >= 0
873
874 redef fun item do
875 assert is_ok
876 return subiter.item
877 end
878
879 redef fun index do return pos
880
881 redef fun next do
882 pos -= 1
883 if subiter.is_ok then subiter.next
884 if not subiter.is_ok then
885 if substrs.is_ok then substrs.next
886 if substrs.is_ok then subiter = substrs.item.chars.reverse_iterator
887 end
888 end
889 end
890