54e60502b10e569a28cc9f3b8364657c166dd599
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47 end
48
49 # Node that represents a concatenation between two nodes (of any RopeNode type)
50 private class Concat
51 super RopeNode
52
53 # Left child of the node
54 var _left: nullable RopeNode = null
55 # Right child of the node
56 var _right: nullable RopeNode = null
57
58 fun left: nullable RopeNode do return _left
59 fun right: nullable RopeNode do return _right
60
61 fun left=(l: RopeNode)
62 do
63 _left = l
64 length = l.length
65 if _right != null then length += _right.length
66 end
67
68 fun right=(r: RopeNode)
69 do
70 _right = r
71 length = r.length
72 if _left != null then length += _left.length
73 end
74 end
75
76 # Leaf of a Rope, contains a FlatString
77 private abstract class Leaf
78 super RopeNode
79
80 # Encapsulated FlatString in the leaf node
81 var str: FlatText
82
83 end
84
85 private class StringLeaf
86 super Leaf
87
88 init(val: FlatString) do
89 self.str = val
90 length = str.length
91 end
92
93 end
94
95 # Basic structure, binary tree with a root node.
96 #
97 # Also shared services by subsequent implementations.
98 abstract class Rope
99 super Text
100
101 # Root node, entry point of a Rope.
102 private var root: RopeNode
103
104 # Cached version of self as a flat String
105 private var str_representation: nullable NativeString = null
106
107 # Empty Rope
108 init do from("")
109
110 # Creates a new Rope with `s` as root
111 init from(s: String) do
112 if s isa RopeString then root = s.root else root = new StringLeaf(s.as(FlatString))
113 end
114
115 private init from_root(r: RopeNode)
116 do
117 root = r
118 end
119
120 redef fun length do return root.length
121
122 # Iterator on the nodes of the rope, in forward postfix order
123 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
124
125 # Iterator on the leaves of the rope, forward order
126 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
127
128 # Iterator on the substrings from 0, in forward order
129 redef fun substrings do return new SubstringsIterator(self, 0)
130
131 # Iterator on the substrings, starting at position `from`, in forward order
132 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
133
134 # Iterator on the nodes of the rope, in backwards postfix order
135 private fun reverse_postfix(from: Int): ReversePostfix do return new ReversePostfix.from(self, from)
136
137 # Iterator on the leaves of the rope, backwards order
138 private fun reverse_leaves(from: Int): ReverseLeavesIterator do return new ReverseLeavesIterator(self,from)
139
140 # Iterator on the substrings, in reverse order
141 fun reverse_substrings: IndexedIterator[Text] do return new ReverseSubstringsIterator(self, length-1)
142
143 # Iterator on the substrings, in reverse order, starting iteration at position `from`
144 fun reverse_substrings_from(from: Int): IndexedIterator[Text] do return new ReverseSubstringsIterator(self, from)
145
146 redef fun output
147 do
148 for i in substrings do
149 i.output
150 end
151 end
152
153 redef fun to_cstring
154 do
155 if str_representation != null then return str_representation.as(not null)
156
157 var native_final_str = calloc_string(length + 1)
158
159 native_final_str[length] = '\0'
160
161 if self.length == 0 then
162 str_representation = native_final_str
163 return native_final_str
164 end
165
166 var offset = 0
167
168 for i in substrings do
169 var str = i.flatten
170 if str isa FlatString then str.items.copy_to(native_final_str, str.length, str.index_from, offset)
171 offset += i.length
172 end
173
174 str_representation = native_final_str
175
176 return native_final_str
177 end
178
179 # Path to the Leaf for `position`
180 private fun node_at(position: Int): Path
181 do
182 assert position >= 0 and position < length
183 return get_node_from(root.as(not null), 0, position, new List[PathElement])
184 end
185
186 # Builds the path to Leaf at position `seek_pos`
187 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
188 do
189 assert curr_pos >= 0
190 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
191 node = node.as(Concat)
192
193 if node.left != null then
194 var next_pos = curr_pos + node.left.length
195 stack.add(new PathElement(node))
196 if next_pos > seek_pos then
197 stack.last.left = true
198 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
199 end
200 stack.last.right = true
201 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
202 else
203 var vis = new PathElement(node)
204 vis.right = true
205 stack.add(vis)
206 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
207 end
208 end
209
210 redef fun ==(o)
211 do
212 if not o isa Text then return false
213 if o.length != self.length then return false
214 var oit = o.chars.iterator
215 for i in self.chars.iterator do
216 if i != oit.item then return false
217 oit.next
218 end
219 return true
220 end
221 end
222
223 # Rope that cannot be modified
224 class RopeString
225 super Rope
226 super String
227
228 redef fun to_s do return self
229
230 redef fun empty do return once new RopeString.from("")
231
232 redef var chars: SequenceRead[Char] = new RopeStringChars(self)
233
234 redef fun reversed
235 do
236 var ret = empty.as(RopeString)
237 for i in substrings do
238 ret = ret.prepend(i.reversed.to_s).as(RopeString)
239 end
240 return ret
241 end
242
243 redef fun to_upper
244 do
245 var ret = empty
246 for i in substrings do
247 ret += i.to_upper
248 end
249 return ret
250 end
251
252 redef fun to_lower
253 do
254 var ret = empty
255 for i in substrings do
256 ret += i.to_lower
257 end
258 return ret
259 end
260
261 redef fun +(o) do return insert_at(o.to_s, length)
262
263 redef fun *(n)
264 do
265 var ret = new RopeString.from("")
266 for i in [0..n[ do
267 ret = (ret + self).as(RopeString)
268 end
269 return ret
270 end
271
272 # Inserts a String `str` at position `pos`
273 redef fun insert_at(str, pos)
274 do
275 if str.length == 0 then return self
276 if self.length == 0 then return new RopeString.from(str)
277
278 assert pos >= 0 and pos <= length
279
280 if pos == length then return append(str).as(RopeString)
281
282 var path = node_at(pos)
283
284 var last_concat = new Concat
285
286 if path.offset == 0 then
287 last_concat.right = path.leaf
288 if str isa FlatString then last_concat.left = new StringLeaf(str) else last_concat.left = str.as(RopeString).root
289 else if path.offset == path.leaf.length then
290 if str isa FlatString then last_concat.right = new StringLeaf(str) else last_concat.right = str.as(RopeString).root
291 last_concat.left = path.leaf
292 else
293 var s = path.leaf.str
294 var l_half = s.substring(0, s.length - path.offset)
295 var r_half = s.substring_from(s.length - path.offset)
296 var cct = new Concat
297 cct.right = new StringLeaf(r_half.as(FlatString))
298 last_concat.left = new StringLeaf(l_half.as(FlatString))
299 if str isa FlatString then last_concat.right = new StringLeaf(str) else last_concat.right = str.as(RopeString).root
300 cct.left = last_concat
301 last_concat = cct
302 end
303
304 for i in path.stack.reverse_iterator do
305 var nod = new Concat
306 if i.left then
307 nod.right = i.node.right.as(not null)
308 nod.left = last_concat
309 else
310 nod.left = i.node.left.as(not null)
311 nod.right = last_concat
312 end
313 last_concat = nod
314 end
315
316 return new RopeString.from_root(last_concat)
317 end
318
319 # Adds `s` at the beginning of self
320 redef fun prepend(s) do return insert_at(s, 0)
321
322 # Adds `s` at the end of self
323 redef fun append(s)
324 do
325 if self.is_empty then return s
326 return new RopeString.from_root(append_to_path(root,s))
327 end
328
329 # Builds a new path from root to the rightmost node with s appended
330 private fun append_to_path(node: RopeNode, s: String): RopeNode
331 do
332 var cct = new Concat
333 if node isa Leaf then
334 cct.left = node
335 if s isa FlatString then cct.right = new StringLeaf(s) else cct.right = s.as(RopeString).root
336 else if node isa Concat then
337 var right = node.right
338 if node.left != null then cct.left = node.left.as(not null)
339 if right == null then
340 if s isa FlatString then cct.right = new StringLeaf(s) else cct.right = s.as(RopeString).root
341 else
342 cct.right = append_to_path(right, s)
343 end
344 end
345 return cct
346 end
347
348 # O(log(n))
349 #
350 # var rope = new RopeString.from("abcd")
351 # assert rope.substring(1, 2) == "bc"
352 # assert rope.substring(-1, 2) == "a"
353 # assert rope.substring(1, 0) == ""
354 # assert rope.substring(2, 5) == "cd"
355 #
356 redef fun substring(pos, len)
357 do
358 if pos < 0 then
359 len += pos
360 pos = 0
361 end
362
363 if pos + len > length then len = length - pos
364
365 if len <= 0 then return new RopeString.from("")
366
367 var path = node_at(pos)
368
369 var lf = path.leaf
370 var offset = path.offset
371
372 if path.leaf.str.length - offset > len then lf = new StringLeaf(lf.str.substring(offset,len).as(FlatString)) else lf = new StringLeaf(lf.str.substring_from(offset).as(FlatString))
373
374 var nod: RopeNode = lf
375
376 for i in path.stack.reverse_iterator do
377 if i.right then continue
378 var tmp = new Concat
379 tmp.left = nod
380 var r = i.node.right
381 if r != null then tmp.right = r
382 nod = tmp
383 end
384
385 var ret = new RopeString
386 ret.root = nod
387
388 path = ret.node_at(len-1)
389
390 offset = path.offset
391 nod = new StringLeaf(path.leaf.str.substring(0, offset+1).as(FlatString))
392
393 for i in path.stack.reverse_iterator do
394 if i.left then continue
395 var tmp = new Concat
396 tmp.right = nod
397 var l = i.node.left
398 if l != null then tmp.left = l
399 nod = tmp
400 end
401
402 ret.root = nod
403
404 return ret
405 end
406 end
407
408 redef class FlatString
409
410 redef fun append(s) do return (new RopeString.from(self)) + s
411
412 redef fun prepend(s) do return (new RopeString.from(self)).prepend(s)
413
414 redef fun insert_at(s, pos)
415 do
416 if pos == 0 then return prepend(s)
417 if pos == length then return append(s)
418
419 var l = substring(0,pos)
420 var r = substring_from(pos)
421 var ret: String = new RopeString.from(l)
422 ret += s
423 return ret + r
424 end
425
426 end
427
428 private class RopeStringChars
429 super SequenceRead[Char]
430
431 var tgt: Rope
432
433 redef fun [](pos)
434 do
435 assert pos < tgt.length
436 var path = tgt.node_at(pos)
437 return path.leaf.str.chars[path.offset]
438 end
439
440 redef fun iterator do return iterator_from(0)
441
442 redef fun iterator_from(pos) do return new RopeCharIterator(tgt, pos)
443
444 redef fun reverse_iterator do return reverse_iterator_from(tgt.length-1)
445
446 redef fun reverse_iterator_from(pos) do return new ReverseRopeCharIterator(tgt, pos)
447 end
448
449 # Used to iterate on a Rope
450 private class IteratorElement
451
452 init(e: RopeNode)
453 do
454 if e isa Leaf then
455 left = true
456 right = true
457 end
458 node = e
459 end
460
461 # The node being visited
462 var node: RopeNode
463 # If the node has a left child, was it visited ?
464 var left = false
465 # If the node has a right child, was it visited ?
466 var right = false
467 # Was the current node visited ?
468 var done = false
469 end
470
471 # Simple Postfix iterator on the nodes of a Rope
472 private class Postfix
473 super IndexedIterator[RopeNode]
474
475 # Target Rope to iterate on
476 var target: Rope
477
478 # Current position in Rope
479 var pos: Int
480
481 # Visited nodes
482 var stack = new List[IteratorElement]
483
484 init from(tgt: Rope, pos: Int)
485 do
486 self.target = tgt
487 self.pos = pos
488 if pos < 0 or pos >= tgt.length then return
489 var path = tgt.node_at(pos)
490 self.pos -= path.offset
491 for i in path.stack do
492 var item = new IteratorElement(i.node)
493 item.left = true
494 if i.right then item.right = true
495 stack.push item
496 end
497 var item = new IteratorElement(path.leaf)
498 item.done = true
499 stack.push item
500 end
501
502 redef fun item
503 do
504 assert is_ok
505 return stack.last.node
506 end
507
508 redef fun is_ok do return not stack.is_empty
509
510 redef fun index do return pos
511
512 redef fun next do
513 if stack.is_empty then return
514 if pos > target.length-1 then
515 stack.clear
516 return
517 end
518 var lst = stack.last
519 if lst.done then
520 if lst.node isa Leaf then
521 pos += lst.node.length
522 end
523 stack.pop
524 next
525 return
526 end
527 if not lst.left then
528 lst.left = true
529 var nod = lst.node
530 if nod isa Concat and nod.left != null then
531 stack.push(new IteratorElement(nod.left.as(not null)))
532 next
533 return
534 end
535 end
536 if not lst.right then
537 lst.right = true
538 var nod = lst.node
539 if nod isa Concat and nod.right != null then
540 stack.push(new IteratorElement(nod.right.as(not null)))
541 next
542 return
543 end
544 end
545 lst.done = true
546 end
547 end
548
549 # Iterates on the leaves (substrings) of the Rope
550 class LeavesIterator
551 super IndexedIterator[Leaf]
552
553 private var nodes: Postfix
554
555 init(tgt: Rope, pos: Int)
556 do
557 nodes = tgt.postfix(pos)
558 end
559
560 redef fun is_ok do return nodes.is_ok
561
562 redef fun item
563 do
564 assert is_ok
565 return nodes.item.as(Leaf)
566 end
567
568 redef fun index do return nodes.index
569
570 redef fun next
571 do
572 while nodes.is_ok do
573 nodes.next
574 if nodes.is_ok and nodes.item isa Leaf then break
575 end
576 end
577 end
578
579 # Uses the leaves and calculates a new substring on each iteration
580 class SubstringsIterator
581 super IndexedIterator[Text]
582
583 private var nodes: IndexedIterator[Leaf]
584
585 # Current position in Rope
586 var pos: Int
587
588 # Current substring, computed from the current Leaf and indexes
589 var substring: Text
590
591 init(tgt: Rope, pos: Int)
592 do
593 nodes = tgt.leaves(pos)
594 self.pos = pos
595 if pos < 0 or pos >= tgt.length then return
596 make_substring
597 end
598
599 # Compute the bounds of the current substring and makes the substring
600 private fun make_substring
601 do
602 substring = nodes.item.str
603 var min = 0
604 var length = substring.length
605 if nodes.index < pos then
606 min = pos - nodes.index
607 end
608 substring = substring.substring(min, length)
609 end
610
611 redef fun is_ok do return nodes.is_ok
612
613 redef fun item
614 do
615 assert is_ok
616 return substring
617 end
618
619 redef fun index do return pos
620
621 redef fun next
622 do
623 pos += substring.length
624 nodes.next
625 if nodes.is_ok then make_substring
626 end
627
628 end
629
630 class RopeCharIterator
631 super IndexedIterator[Char]
632
633 var substrings: IndexedIterator[Text]
634
635 var pos: Int
636
637 var max: Int
638
639 var substr_iter: IndexedIterator[Char]
640
641 init(tgt: Rope, from: Int)
642 do
643 substrings = tgt.substrings_from(from)
644 max = tgt.length - 1
645 if not substrings.is_ok then
646 pos = tgt.length
647 return
648 end
649 pos = from
650 substr_iter = substrings.item.chars.iterator
651 end
652
653 redef fun item do return substr_iter.item
654
655 redef fun is_ok do return pos <= max
656
657 redef fun index do return pos
658
659 redef fun next
660 do
661 pos += 1
662 if substr_iter.is_ok then
663 substr_iter.next
664 end
665 if not substr_iter.is_ok then
666 substrings.next
667 if substrings.is_ok then
668 substr_iter = substrings.item.chars.iterator
669 end
670 end
671 end
672 end
673
674 private class ReversePostfix
675 super IndexedIterator[RopeNode]
676
677 var target: Rope
678
679 var pos: Int
680
681 var min = 0
682
683 var stack = new List[IteratorElement]
684
685 init from(tgt: Rope, pos: Int)
686 do
687 self.pos = pos
688 target = tgt
689 if pos < 0 or pos >= tgt.length then return
690 var path = tgt.node_at(pos)
691 self.pos -= path.offset
692 for i in path.stack do
693 var elemt = new IteratorElement(i.node)
694 elemt.right = true
695 if i.left then
696 elemt.left = true
697 end
698 stack.push elemt
699 end
700 stack.push(new IteratorElement(path.leaf))
701 stack.last.done = true
702 end
703
704 redef fun item do
705 assert is_ok
706 return stack.last.node
707 end
708
709 redef fun is_ok do return not stack.is_empty
710
711 redef fun index do return pos
712
713 redef fun next
714 do
715 if stack.is_empty then return
716 if pos < min then
717 stack.clear
718 return
719 end
720 var lst = stack.last
721 if lst.done then
722 stack.pop
723 next
724 return
725 end
726 if not lst.right then
727 var nod = lst.node.as(Concat)
728 var rgt = nod.right
729 lst.right = true
730 if rgt != null then
731 stack.push(new IteratorElement(rgt))
732 next
733 return
734 end
735 end
736 if not lst.left then
737 var nod = lst.node.as(Concat)
738 var lft = nod.left
739 lst.left = true
740 if lft != null then
741 stack.push(new IteratorElement(lft))
742 next
743 return
744 end
745 end
746 if lst.node isa Leaf then pos -= lst.node.length
747 lst.done = true
748 end
749 end
750
751 private class ReverseLeavesIterator
752 super IndexedIterator[Leaf]
753
754 var nodes: ReversePostfix
755
756 init(tgt: Rope, from: Int)
757 do
758 nodes = tgt.reverse_postfix(from)
759 end
760
761 redef fun is_ok do return nodes.is_ok
762
763 redef fun item do
764 assert is_ok
765 return nodes.item.as(Leaf)
766 end
767
768 redef fun next do
769 while nodes.is_ok do
770 nodes.next
771 if nodes.is_ok then if nodes.item isa Leaf then break
772 end
773 end
774
775 redef fun index do return nodes.index
776
777 end
778
779 private class ReverseSubstringsIterator
780 super IndexedIterator[Text]
781
782 var leaves: ReverseLeavesIterator
783
784 var pos: Int
785
786 var str: Text
787
788 init(tgt: Rope, from: Int)
789 do
790 leaves = tgt.reverse_leaves(from)
791 pos = from
792 if not leaves.is_ok then return
793 str = leaves.item.str
794 make_substring
795 end
796
797 fun make_substring
798 do
799 if pos >= (leaves.index + str.length - 1) then return
800 str = str.substring(0, (pos - leaves.index + 1))
801 end
802
803 redef fun is_ok do return leaves.is_ok
804
805 redef fun item do
806 assert is_ok
807 return str
808 end
809
810 redef fun index do return pos
811
812 redef fun next do
813 pos -= str.length
814 leaves.next
815 if not leaves.is_ok then return
816 str = leaves.item.str
817 make_substring
818 end
819 end
820
821 private class ReverseRopeCharIterator
822 super IndexedIterator[Char]
823
824 var substrs: IndexedIterator[Text]
825
826 var pos: Int
827
828 var subiter: IndexedIterator[Char]
829
830 init(tgt: Rope, from: Int)
831 do
832 substrs = tgt.reverse_substrings_from(from)
833 if not substrs.is_ok then
834 pos = -1
835 return
836 end
837 subiter = substrs.item.chars.reverse_iterator
838 pos = from
839 end
840
841 redef fun is_ok do return pos >= 0
842
843 redef fun item do
844 assert is_ok
845 return subiter.item
846 end
847
848 redef fun index do return pos
849
850 redef fun next do
851 pos -= 1
852 if subiter.is_ok then subiter.next
853 if not subiter.is_ok then
854 if substrs.is_ok then substrs.next
855 if substrs.is_ok then subiter = substrs.item.chars.reverse_iterator
856 end
857 end
858 end
859