45c1cbd0f8d084cc133e92f27b0beebfe53b0004
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47 end
48
49 # Node that represents a concatenation between two nodes (of any RopeNode type)
50 private class Concat
51 super RopeNode
52
53 # Left child of the node
54 var _left: nullable RopeNode = null
55 # Right child of the node
56 var _right: nullable RopeNode = null
57
58 fun left: nullable RopeNode do return _left
59 fun right: nullable RopeNode do return _right
60
61 fun left=(l: RopeNode)
62 do
63 _left = l
64 length = l.length
65 if _right != null then length += _right.length
66 end
67
68 fun right=(r: RopeNode)
69 do
70 _right = r
71 length = r.length
72 if _left != null then length += _left.length
73 end
74 end
75
76 # Leaf of a Rope, contains a FlatString
77 private class Leaf
78 super RopeNode
79
80 # Encapsulated FlatString in the leaf node
81 var str: FlatString
82
83 init(val: FlatString) do
84 self.str = val
85 length = str.length
86 end
87
88 end
89
90 # Basic structure, binary tree with a root node.
91 #
92 # Also shared services by subsequent implementations.
93 abstract class Rope
94 super Text
95
96 # Root node, entry point of a Rope.
97 private var root: RopeNode
98
99 # Cached version of self as a flat String
100 private var str_representation: nullable NativeString = null
101
102 # Empty Rope
103 init do from("")
104
105 # Creates a new Rope with `s` as root
106 init from(s: String) do
107 if s isa RopeString then root = s.root else root = new Leaf(s.as(FlatString))
108 end
109
110 private init from_root(r: RopeNode)
111 do
112 root = r
113 end
114
115 redef fun length do return root.length
116
117 # Iterator on the nodes of the rope, in forward postfix order
118 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
119
120 # Iterator on the leaves of the rope, forward order
121 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
122
123 # Iterator on the substrings from 0, in forward order
124 redef fun substrings do return new SubstringsIterator(self, 0)
125
126 # Iterator on the substrings, starting at position `from`, in forward order
127 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
128
129 # Iterator on the nodes of the rope, in backwards postfix order
130 private fun reverse_postfix(from: Int): ReversePostfix do return new ReversePostfix.from(self, from)
131
132 # Iterator on the leaves of the rope, backwards order
133 private fun reverse_leaves(from: Int): ReverseLeavesIterator do return new ReverseLeavesIterator(self,from)
134
135 # Iterator on the substrings, in reverse order
136 fun reverse_substrings: IndexedIterator[Text] do return new ReverseSubstringsIterator(self, length-1)
137
138 # Iterator on the substrings, in reverse order, starting iteration at position `from`
139 fun reverse_substrings_from(from: Int): IndexedIterator[Text] do return new ReverseSubstringsIterator(self, from)
140
141 redef fun output
142 do
143 for i in substrings do
144 i.output
145 end
146 end
147
148 redef fun to_cstring
149 do
150 if str_representation != null then return str_representation.as(not null)
151
152 var native_final_str = calloc_string(length + 1)
153
154 native_final_str[length] = '\0'
155
156 if self.length == 0 then
157 str_representation = native_final_str
158 return native_final_str
159 end
160
161 var offset = 0
162
163 for i in substrings do
164 var str = i.flatten
165 if str isa FlatString then str.items.copy_to(native_final_str, str.length, str.index_from, offset)
166 offset += i.length
167 end
168
169 str_representation = native_final_str
170
171 return native_final_str
172 end
173
174 # Path to the Leaf for `position`
175 private fun node_at(position: Int): Path
176 do
177 assert position >= 0 and position < length
178 return get_node_from(root.as(not null), 0, position, new List[PathElement])
179 end
180
181 # Builds the path to Leaf at position `seek_pos`
182 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
183 do
184 assert curr_pos >= 0
185 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
186 node = node.as(Concat)
187
188 if node.left != null then
189 var next_pos = curr_pos + node.left.length
190 stack.add(new PathElement(node))
191 if next_pos > seek_pos then
192 stack.last.left = true
193 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
194 end
195 stack.last.right = true
196 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
197 else
198 var vis = new PathElement(node)
199 vis.right = true
200 stack.add(vis)
201 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
202 end
203 end
204
205 redef fun ==(o)
206 do
207 if not o isa Text then return false
208 if o.length != self.length then return false
209 var oit = o.chars.iterator
210 for i in self.chars.iterator do
211 if i != oit.item then return false
212 oit.next
213 end
214 return true
215 end
216 end
217
218 # Rope that cannot be modified
219 class RopeString
220 super Rope
221 super String
222
223 redef fun to_s do return self
224
225 redef fun empty do return once new RopeString.from("")
226
227 redef var chars: SequenceRead[Char] = new RopeStringChars(self)
228
229 redef fun reversed
230 do
231 var ret = empty.as(RopeString)
232 for i in substrings do
233 ret = ret.prepend(i.reversed.to_s).as(RopeString)
234 end
235 return ret
236 end
237
238 redef fun to_upper
239 do
240 var ret = empty
241 for i in substrings do
242 ret += i.to_upper
243 end
244 return ret
245 end
246
247 redef fun to_lower
248 do
249 var ret = empty
250 for i in substrings do
251 ret += i.to_lower
252 end
253 return ret
254 end
255
256 redef fun +(o) do return insert_at(o.to_s, length)
257
258 redef fun *(n)
259 do
260 var ret = new RopeString.from("")
261 for i in [0..n[ do
262 ret = (ret + self).as(RopeString)
263 end
264 return ret
265 end
266
267 # Inserts a String `str` at position `pos`
268 redef fun insert_at(str, pos)
269 do
270 if str.length == 0 then return self
271 if self.length == 0 then return new RopeString.from(str)
272
273 assert pos >= 0 and pos <= length
274
275 if pos == length then return append(str).as(RopeString)
276
277 var path = node_at(pos)
278
279 var last_concat = new Concat
280
281 if path.offset == 0 then
282 last_concat.right = path.leaf
283 if str isa FlatString then last_concat.left = new Leaf(str) else last_concat.left = str.as(RopeString).root
284 else if path.offset == path.leaf.length then
285 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
286 last_concat.left = path.leaf
287 else
288 var s = path.leaf.str
289 var l_half = s.substring(0, s.length - path.offset)
290 var r_half = s.substring_from(s.length - path.offset)
291 var cct = new Concat
292 cct.right = new Leaf(r_half.as(FlatString))
293 last_concat.left = new Leaf(l_half.as(FlatString))
294 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
295 cct.left = last_concat
296 last_concat = cct
297 end
298
299 for i in path.stack.reverse_iterator do
300 var nod = new Concat
301 if i.left then
302 nod.right = i.node.right.as(not null)
303 nod.left = last_concat
304 else
305 nod.left = i.node.left.as(not null)
306 nod.right = last_concat
307 end
308 last_concat = nod
309 end
310
311 return new RopeString.from_root(last_concat)
312 end
313
314 # Adds `s` at the beginning of self
315 redef fun prepend(s) do return insert_at(s, 0)
316
317 # Adds `s` at the end of self
318 redef fun append(s)
319 do
320 if self.is_empty then return s
321 return new RopeString.from_root(append_to_path(root,s))
322 end
323
324 # Builds a new path from root to the rightmost node with s appended
325 private fun append_to_path(node: RopeNode, s: String): RopeNode
326 do
327 var cct = new Concat
328 if node isa Leaf then
329 cct.left = node
330 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
331 else if node isa Concat then
332 var right = node.right
333 if node.left != null then cct.left = node.left.as(not null)
334 if right == null then
335 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
336 else
337 cct.right = append_to_path(right, s)
338 end
339 end
340 return cct
341 end
342
343 # O(log(n))
344 #
345 # var rope = new RopeString.from("abcd")
346 # assert rope.substring(1, 2) == "bc"
347 # assert rope.substring(-1, 2) == "a"
348 # assert rope.substring(1, 0) == ""
349 # assert rope.substring(2, 5) == "cd"
350 #
351 redef fun substring(pos, len)
352 do
353 if pos < 0 then
354 len += pos
355 pos = 0
356 end
357
358 if pos + len > length then len = length - pos
359
360 if len <= 0 then return new RopeString.from("")
361
362 var path = node_at(pos)
363
364 var lf = path.leaf
365 var offset = path.offset
366
367 if path.leaf.str.length - offset > len then lf = new Leaf(lf.str.substring(offset,len).as(FlatString)) else lf = new Leaf(lf.str.substring_from(offset).as(FlatString))
368
369 var nod: RopeNode = lf
370
371 for i in path.stack.reverse_iterator do
372 if i.right then continue
373 var tmp = new Concat
374 tmp.left = nod
375 var r = i.node.right
376 if r != null then tmp.right = r
377 nod = tmp
378 end
379
380 var ret = new RopeString
381 ret.root = nod
382
383 path = ret.node_at(len-1)
384
385 offset = path.offset
386 nod = new Leaf(path.leaf.str.substring(0, offset+1).as(FlatString))
387
388 for i in path.stack.reverse_iterator do
389 if i.left then continue
390 var tmp = new Concat
391 tmp.right = nod
392 var l = i.node.left
393 if l != null then tmp.left = l
394 nod = tmp
395 end
396
397 ret.root = nod
398
399 return ret
400 end
401 end
402
403 redef class FlatString
404
405 redef fun append(s) do return (new RopeString.from(self)) + s
406
407 redef fun prepend(s) do return (new RopeString.from(self)).prepend(s)
408
409 redef fun insert_at(s, pos)
410 do
411 if pos == 0 then return prepend(s)
412 if pos == length then return append(s)
413
414 var l = substring(0,pos)
415 var r = substring_from(pos)
416 var ret: String = new RopeString.from(l)
417 ret += s
418 return ret + r
419 end
420
421 end
422
423 private class RopeStringChars
424 super SequenceRead[Char]
425
426 var tgt: Rope
427
428 redef fun [](pos)
429 do
430 assert pos < tgt.length
431 var path = tgt.node_at(pos)
432 return path.leaf.str.chars[path.offset]
433 end
434
435 redef fun iterator do return iterator_from(0)
436
437 redef fun iterator_from(pos) do return new RopeCharIterator(tgt, pos)
438
439 redef fun reverse_iterator do return reverse_iterator_from(tgt.length-1)
440
441 redef fun reverse_iterator_from(pos) do return new ReverseRopeCharIterator(tgt, pos)
442 end
443
444 # Used to iterate on a Rope
445 private class IteratorElement
446
447 init(e: RopeNode)
448 do
449 if e isa Leaf then
450 left = true
451 right = true
452 end
453 node = e
454 end
455
456 # The node being visited
457 var node: RopeNode
458 # If the node has a left child, was it visited ?
459 var left = false
460 # If the node has a right child, was it visited ?
461 var right = false
462 # Was the current node visited ?
463 var done = false
464 end
465
466 # Simple Postfix iterator on the nodes of a Rope
467 private class Postfix
468 super IndexedIterator[RopeNode]
469
470 # Target Rope to iterate on
471 var target: Rope
472
473 # Current position in Rope
474 var pos: Int
475
476 # Visited nodes
477 var stack = new List[IteratorElement]
478
479 init from(tgt: Rope, pos: Int)
480 do
481 self.target = tgt
482 self.pos = pos
483 if pos < 0 or pos >= tgt.length then return
484 var path = tgt.node_at(pos)
485 self.pos -= path.offset
486 for i in path.stack do
487 var item = new IteratorElement(i.node)
488 item.left = true
489 if i.right then item.right = true
490 stack.push item
491 end
492 var item = new IteratorElement(path.leaf)
493 item.done = true
494 stack.push item
495 end
496
497 redef fun item
498 do
499 assert is_ok
500 return stack.last.node
501 end
502
503 redef fun is_ok do return not stack.is_empty
504
505 redef fun index do return pos
506
507 redef fun next do
508 if stack.is_empty then return
509 if pos > target.length-1 then
510 stack.clear
511 return
512 end
513 var lst = stack.last
514 if lst.done then
515 if lst.node isa Leaf then
516 pos += lst.node.length
517 end
518 stack.pop
519 next
520 return
521 end
522 if not lst.left then
523 lst.left = true
524 var nod = lst.node
525 if nod isa Concat and nod.left != null then
526 stack.push(new IteratorElement(nod.left.as(not null)))
527 next
528 return
529 end
530 end
531 if not lst.right then
532 lst.right = true
533 var nod = lst.node
534 if nod isa Concat and nod.right != null then
535 stack.push(new IteratorElement(nod.right.as(not null)))
536 next
537 return
538 end
539 end
540 lst.done = true
541 end
542 end
543
544 # Iterates on the leaves (substrings) of the Rope
545 class LeavesIterator
546 super IndexedIterator[Leaf]
547
548 private var nodes: Postfix
549
550 init(tgt: Rope, pos: Int)
551 do
552 nodes = tgt.postfix(pos)
553 end
554
555 redef fun is_ok do return nodes.is_ok
556
557 redef fun item
558 do
559 assert is_ok
560 return nodes.item.as(Leaf)
561 end
562
563 redef fun index do return nodes.index
564
565 redef fun next
566 do
567 while nodes.is_ok do
568 nodes.next
569 if nodes.is_ok and nodes.item isa Leaf then break
570 end
571 end
572 end
573
574 # Uses the leaves and calculates a new substring on each iteration
575 class SubstringsIterator
576 super IndexedIterator[Text]
577
578 private var nodes: IndexedIterator[Leaf]
579
580 # Current position in Rope
581 var pos: Int
582
583 # Current substring, computed from the current Leaf and indexes
584 var substring: Text
585
586 init(tgt: Rope, pos: Int)
587 do
588 nodes = tgt.leaves(pos)
589 self.pos = pos
590 if pos < 0 or pos >= tgt.length then return
591 make_substring
592 end
593
594 # Compute the bounds of the current substring and makes the substring
595 private fun make_substring
596 do
597 substring = nodes.item.str
598 var min = 0
599 var length = substring.length
600 if nodes.index < pos then
601 min = pos - nodes.index
602 end
603 substring = substring.substring(min, length)
604 end
605
606 redef fun is_ok do return nodes.is_ok
607
608 redef fun item
609 do
610 assert is_ok
611 return substring
612 end
613
614 redef fun index do return pos
615
616 redef fun next
617 do
618 pos += substring.length
619 nodes.next
620 if nodes.is_ok then make_substring
621 end
622
623 end
624
625 class RopeCharIterator
626 super IndexedIterator[Char]
627
628 var substrings: IndexedIterator[Text]
629
630 var pos: Int
631
632 var max: Int
633
634 var substr_iter: IndexedIterator[Char]
635
636 init(tgt: Rope, from: Int)
637 do
638 substrings = tgt.substrings_from(from)
639 max = tgt.length - 1
640 if not substrings.is_ok then
641 pos = tgt.length
642 return
643 end
644 pos = from
645 substr_iter = substrings.item.chars.iterator
646 end
647
648 redef fun item do return substr_iter.item
649
650 redef fun is_ok do return pos <= max
651
652 redef fun index do return pos
653
654 redef fun next
655 do
656 pos += 1
657 if substr_iter.is_ok then
658 substr_iter.next
659 end
660 if not substr_iter.is_ok then
661 substrings.next
662 if substrings.is_ok then
663 substr_iter = substrings.item.chars.iterator
664 end
665 end
666 end
667 end
668
669 private class ReversePostfix
670 super IndexedIterator[RopeNode]
671
672 var target: Rope
673
674 var pos: Int
675
676 var min = 0
677
678 var stack = new List[IteratorElement]
679
680 init from(tgt: Rope, pos: Int)
681 do
682 self.pos = pos
683 target = tgt
684 if pos < 0 or pos >= tgt.length then return
685 var path = tgt.node_at(pos)
686 self.pos -= path.offset
687 for i in path.stack do
688 var elemt = new IteratorElement(i.node)
689 elemt.right = true
690 if i.left then
691 elemt.left = true
692 end
693 stack.push elemt
694 end
695 stack.push(new IteratorElement(path.leaf))
696 stack.last.done = true
697 end
698
699 redef fun item do
700 assert is_ok
701 return stack.last.node
702 end
703
704 redef fun is_ok do return not stack.is_empty
705
706 redef fun index do return pos
707
708 redef fun next
709 do
710 if stack.is_empty then return
711 if pos < min then
712 stack.clear
713 return
714 end
715 var lst = stack.last
716 if lst.done then
717 stack.pop
718 next
719 return
720 end
721 if not lst.right then
722 var nod = lst.node.as(Concat)
723 var rgt = nod.right
724 lst.right = true
725 if rgt != null then
726 stack.push(new IteratorElement(rgt))
727 next
728 return
729 end
730 end
731 if not lst.left then
732 var nod = lst.node.as(Concat)
733 var lft = nod.left
734 lst.left = true
735 if lft != null then
736 stack.push(new IteratorElement(lft))
737 next
738 return
739 end
740 end
741 if lst.node isa Leaf then pos -= lst.node.length
742 lst.done = true
743 end
744 end
745
746 private class ReverseLeavesIterator
747 super IndexedIterator[Leaf]
748
749 var nodes: ReversePostfix
750
751 init(tgt: Rope, from: Int)
752 do
753 nodes = tgt.reverse_postfix(from)
754 end
755
756 redef fun is_ok do return nodes.is_ok
757
758 redef fun item do
759 assert is_ok
760 return nodes.item.as(Leaf)
761 end
762
763 redef fun next do
764 while nodes.is_ok do
765 nodes.next
766 if nodes.is_ok then if nodes.item isa Leaf then break
767 end
768 end
769
770 redef fun index do return nodes.index
771
772 end
773
774 private class ReverseSubstringsIterator
775 super IndexedIterator[Text]
776
777 var leaves: ReverseLeavesIterator
778
779 var pos: Int
780
781 var str: Text
782
783 init(tgt: Rope, from: Int)
784 do
785 leaves = tgt.reverse_leaves(from)
786 pos = from
787 if not leaves.is_ok then return
788 str = leaves.item.str
789 make_substring
790 end
791
792 fun make_substring
793 do
794 if pos >= (leaves.index + str.length - 1) then return
795 str = str.substring(0, (pos - leaves.index + 1))
796 end
797
798 redef fun is_ok do return leaves.is_ok
799
800 redef fun item do
801 assert is_ok
802 return str
803 end
804
805 redef fun index do return pos
806
807 redef fun next do
808 pos -= str.length
809 leaves.next
810 if not leaves.is_ok then return
811 str = leaves.item.str
812 make_substring
813 end
814 end
815
816 private class ReverseRopeCharIterator
817 super IndexedIterator[Char]
818
819 var substrs: IndexedIterator[Text]
820
821 var pos: Int
822
823 var subiter: IndexedIterator[Char]
824
825 init(tgt: Rope, from: Int)
826 do
827 substrs = tgt.reverse_substrings_from(from)
828 if not substrs.is_ok then
829 pos = -1
830 return
831 end
832 subiter = substrs.item.chars.reverse_iterator
833 pos = from
834 end
835
836 redef fun is_ok do return pos >= 0
837
838 redef fun item do
839 assert is_ok
840 return subiter.item
841 end
842
843 redef fun index do return pos
844
845 redef fun next do
846 pos -= 1
847 if subiter.is_ok then subiter.next
848 if not subiter.is_ok then
849 if substrs.is_ok then substrs.next
850 if substrs.is_ok then subiter = substrs.item.chars.reverse_iterator
851 end
852 end
853 end
854