e43508e81fbe10012e7b4aa4e646a8034cd8399d
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47 end
48
49 # Node that represents a concatenation between two nodes (of any RopeNode type)
50 private class Concat
51 super RopeNode
52
53 # Left child of the node
54 var _left: nullable RopeNode = null
55 # Right child of the node
56 var _right: nullable RopeNode = null
57
58 fun left: nullable RopeNode do return _left
59 fun right: nullable RopeNode do return _right
60
61 fun left=(l: RopeNode)
62 do
63 _left = l
64 length = l.length
65 if _right != null then length += _right.length
66 end
67
68 fun right=(r: RopeNode)
69 do
70 _right = r
71 length = r.length
72 if _left != null then length += _left.length
73 end
74 end
75
76 # Leaf of a Rope, contains a FlatString
77 private class Leaf
78 super RopeNode
79
80 # Encapsulated FlatString in the leaf node
81 var str: FlatString
82
83 init(val: FlatString) do
84 self.str = val
85 length = str.length
86 end
87
88 end
89
90 # Basic structure, binary tree with a root node.
91 #
92 # Also shared services by subsequent implementations.
93 abstract class Rope
94 super Text
95
96 # Root node, entry point of a Rope.
97 private var root: RopeNode
98
99 # Cached version of self as a flat String
100 private var str_representation: nullable NativeString = null
101
102 # Empty Rope
103 init do from("")
104
105 # Creates a new Rope with `s` as root
106 init from(s: String) do
107 if s isa RopeString then root = s.root else root = new Leaf(s.as(FlatString))
108 end
109
110 private init from_root(r: RopeNode)
111 do
112 root = r
113 end
114
115 redef fun length do return root.length
116
117 # Iterator on the nodes of the rope, in forward postfix order
118 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
119
120 # Iterator on the leaves of the rope, forward order
121 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
122
123 # Iterator on the substrings from 0, in forward order
124 redef fun substrings do return new SubstringsIterator(self, 0)
125
126 # Iterator on the substrings, starting at position `from`, in forward order
127 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
128
129 # Iterator on the nodes of the rope, in backwards postfix order
130 private fun reverse_postfix(from: Int): ReversePostfix do return new ReversePostfix.from(self, from)
131
132 # Iterator on the leaves of the rope, backwards order
133 private fun reverse_leaves(from: Int): ReverseLeavesIterator do return new ReverseLeavesIterator(self,from)
134
135 # Iterator on the substrings, in reverse order
136 fun reverse_substrings: IndexedIterator[Text] do return new ReverseSubstringsIterator(self, length-1)
137
138 # Iterator on the substrings, in reverse order, starting iteration at position `from`
139 fun reverse_substrings_from(from: Int): IndexedIterator[Text] do return new ReverseSubstringsIterator(self, from)
140
141 redef fun output
142 do
143 for i in substrings do
144 i.output
145 end
146 end
147
148 redef fun to_cstring
149 do
150 if str_representation != null then return str_representation.as(not null)
151
152 var native_final_str = calloc_string(length + 1)
153
154 native_final_str[length] = '\0'
155
156 if self.length == 0 then
157 str_representation = native_final_str
158 return native_final_str
159 end
160
161 var offset = 0
162
163 for i in substrings do
164 var str = i.flatten
165 if str isa FlatString then str.items.copy_to(native_final_str, str.length, str.index_from, offset)
166 offset += i.length
167 end
168
169 str_representation = native_final_str
170
171 return native_final_str
172 end
173
174 # Path to the Leaf for `position`
175 private fun node_at(position: Int): Path
176 do
177 assert position >= 0 and position < length
178 return get_node_from(root.as(not null), 0, position, new List[PathElement])
179 end
180
181 # Builds the path to Leaf at position `seek_pos`
182 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
183 do
184 assert curr_pos >= 0
185 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
186 node = node.as(Concat)
187
188 if node.left != null then
189 var next_pos = curr_pos + node.left.length
190 stack.add(new PathElement(node))
191 if next_pos > seek_pos then
192 stack.last.left = true
193 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
194 end
195 stack.last.right = true
196 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
197 else
198 var vis = new PathElement(node)
199 vis.right = true
200 stack.add(vis)
201 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
202 end
203 end
204
205 redef fun ==(o)
206 do
207 if not o isa Text then return false
208 if o.length != self.length then return false
209 var oit = o.chars.iterator
210 for i in self.chars.iterator do
211 if i != oit.item then return false
212 oit.next
213 end
214 return true
215 end
216 end
217
218 # Rope that cannot be modified
219 class RopeString
220 super Rope
221 super String
222
223 redef fun to_s do return self
224
225 redef fun empty do return once new RopeString.from("")
226
227 redef var chars: SequenceRead[Char] = new RopeStringChars(self)
228
229 redef fun reversed
230 do
231 var ret = empty.as(RopeString)
232 for i in substrings do
233 ret = ret.prepend(i.reversed.to_s).as(RopeString)
234 end
235 return ret
236 end
237
238 redef fun to_upper
239 do
240 var ret = empty
241 for i in substrings do
242 ret += i.to_upper
243 end
244 return ret
245 end
246
247 redef fun to_lower
248 do
249 var ret = empty
250 for i in substrings do
251 ret += i.to_lower
252 end
253 return ret
254 end
255
256 redef fun +(o) do return insert_at(o.to_s, length)
257
258 redef fun *(n)
259 do
260 var ret = new RopeString.from("")
261 for i in [0..n[ do
262 ret = (ret + self).as(RopeString)
263 end
264 return ret
265 end
266
267 # Inserts a String `str` at position `pos`
268 fun insert_at(str: String, pos: Int): RopeString
269 do
270 if str.length == 0 then return self
271 if self.length == 0 then return new RopeString.from(str)
272
273 assert pos >= 0 and pos <= length
274
275 if pos == length then return append(str).as(RopeString)
276
277 var path = node_at(pos)
278
279 var last_concat = new Concat
280
281 if path.offset == 0 then
282 last_concat.right = path.leaf
283 if str isa FlatString then last_concat.left = new Leaf(str) else last_concat.left = str.as(RopeString).root
284 else if path.offset == path.leaf.length then
285 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
286 last_concat.left = path.leaf
287 else
288 var s = path.leaf.str
289 var l_half = s.substring(0, s.length - path.offset)
290 var r_half = s.substring_from(s.length - path.offset)
291 var cct = new Concat
292 cct.right = new Leaf(r_half)
293 last_concat.left = new Leaf(l_half)
294 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
295 cct.left = last_concat
296 last_concat = cct
297 end
298
299 for i in path.stack.reverse_iterator do
300 var nod = new Concat
301 if i.left then
302 nod.right = i.node.right.as(not null)
303 nod.left = last_concat
304 else
305 nod.left = i.node.left.as(not null)
306 nod.right = last_concat
307 end
308 last_concat = nod
309 end
310
311 return new RopeString.from_root(last_concat)
312 end
313
314 # Adds `s` at the beginning of self
315 fun prepend(s: String): String do return insert_at(s, 0)
316
317 # Adds `s` at the end of self
318 fun append(s: String): String
319 do
320 if self.is_empty then return s
321 return new RopeString.from_root(append_to_path(root,s))
322 end
323
324 # Builds a new path from root to the rightmost node with s appended
325 private fun append_to_path(node: RopeNode, s: String): RopeNode
326 do
327 var cct = new Concat
328 if node isa Leaf then
329 cct.left = node
330 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
331 else if node isa Concat then
332 var right = node.right
333 if node.left != null then cct.left = node.left.as(not null)
334 if right == null then
335 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
336 else
337 cct.right = append_to_path(right, s)
338 end
339 end
340 return cct
341 end
342
343 # O(log(n))
344 #
345 # var rope = new RopeString.from("abcd")
346 # assert rope.substring(1, 2) == "bc"
347 # assert rope.substring(-1, 2) == "a"
348 # assert rope.substring(1, 0) == ""
349 # assert rope.substring(2, 5) == "cd"
350 #
351 redef fun substring(pos, len)
352 do
353 if pos < 0 then
354 len += pos
355 pos = 0
356 end
357
358 if pos + len > length then len = length - pos
359
360 if len <= 0 then return new RopeString.from("")
361
362 var path = node_at(pos)
363
364 var lf = path.leaf
365 var offset = path.offset
366
367 if path.leaf.str.length - offset > len then lf = new Leaf(lf.str.substring(offset,len)) else lf = new Leaf(lf.str.substring_from(offset))
368
369 var nod: RopeNode = lf
370
371 for i in path.stack.reverse_iterator do
372 if i.right then continue
373 var tmp = new Concat
374 tmp.left = nod
375 var r = i.node.right
376 if r != null then tmp.right = r
377 nod = tmp
378 end
379
380 var ret = new RopeString
381 ret.root = nod
382
383 path = ret.node_at(len-1)
384
385 offset = path.offset
386 nod = new Leaf(path.leaf.str.substring(0, offset+1))
387
388 for i in path.stack.reverse_iterator do
389 if i.left then continue
390 var tmp = new Concat
391 tmp.right = nod
392 var l = i.node.left
393 if l != null then tmp.left = l
394 nod = tmp
395 end
396
397 ret.root = nod
398
399 return ret
400 end
401 end
402
403 private class RopeStringChars
404 super SequenceRead[Char]
405
406 var tgt: Rope
407
408 redef fun [](pos)
409 do
410 assert pos < tgt.length
411 var path = tgt.node_at(pos)
412 return path.leaf.str.chars[path.offset]
413 end
414
415 redef fun iterator do return iterator_from(0)
416
417 redef fun iterator_from(pos) do return new RopeCharIterator(tgt, pos)
418
419 redef fun reverse_iterator do return reverse_iterator_from(tgt.length-1)
420
421 redef fun reverse_iterator_from(pos) do return new ReverseRopeCharIterator(tgt, pos)
422 end
423
424 # Used to iterate on a Rope
425 private class IteratorElement
426
427 init(e: RopeNode)
428 do
429 if e isa Leaf then
430 left = true
431 right = true
432 end
433 node = e
434 end
435
436 # The node being visited
437 var node: RopeNode
438 # If the node has a left child, was it visited ?
439 var left = false
440 # If the node has a right child, was it visited ?
441 var right = false
442 # Was the current node visited ?
443 var done = false
444 end
445
446 # Simple Postfix iterator on the nodes of a Rope
447 private class Postfix
448 super IndexedIterator[RopeNode]
449
450 # Target Rope to iterate on
451 var target: Rope
452
453 # Current position in Rope
454 var pos: Int
455
456 # Visited nodes
457 var stack = new List[IteratorElement]
458
459 init from(tgt: Rope, pos: Int)
460 do
461 self.target = tgt
462 self.pos = pos
463 if pos < 0 or pos >= tgt.length then return
464 var path = tgt.node_at(pos)
465 self.pos -= path.offset
466 for i in path.stack do
467 var item = new IteratorElement(i.node)
468 item.left = true
469 if i.right then item.right = true
470 stack.push item
471 end
472 var item = new IteratorElement(path.leaf)
473 item.done = true
474 stack.push item
475 end
476
477 redef fun item
478 do
479 assert is_ok
480 return stack.last.node
481 end
482
483 redef fun is_ok do return not stack.is_empty
484
485 redef fun index do return pos
486
487 redef fun next do
488 if stack.is_empty then return
489 if pos > target.length-1 then
490 stack.clear
491 return
492 end
493 var lst = stack.last
494 if lst.done then
495 if lst.node isa Leaf then
496 pos += lst.node.length
497 end
498 stack.pop
499 next
500 return
501 end
502 if not lst.left then
503 lst.left = true
504 var nod = lst.node
505 if nod isa Concat and nod.left != null then
506 stack.push(new IteratorElement(nod.left.as(not null)))
507 next
508 return
509 end
510 end
511 if not lst.right then
512 lst.right = true
513 var nod = lst.node
514 if nod isa Concat and nod.right != null then
515 stack.push(new IteratorElement(nod.right.as(not null)))
516 next
517 return
518 end
519 end
520 lst.done = true
521 end
522 end
523
524 # Iterates on the leaves (substrings) of the Rope
525 class LeavesIterator
526 super IndexedIterator[Leaf]
527
528 private var nodes: Postfix
529
530 init(tgt: Rope, pos: Int)
531 do
532 nodes = tgt.postfix(pos)
533 end
534
535 redef fun is_ok do return nodes.is_ok
536
537 redef fun item
538 do
539 assert is_ok
540 return nodes.item.as(Leaf)
541 end
542
543 redef fun index do return nodes.index
544
545 redef fun next
546 do
547 while nodes.is_ok do
548 nodes.next
549 if nodes.is_ok and nodes.item isa Leaf then break
550 end
551 end
552 end
553
554 # Uses the leaves and calculates a new substring on each iteration
555 class SubstringsIterator
556 super IndexedIterator[Text]
557
558 private var nodes: IndexedIterator[Leaf]
559
560 # Current position in Rope
561 var pos: Int
562
563 # Current substring, computed from the current Leaf and indexes
564 var substring: Text
565
566 init(tgt: Rope, pos: Int)
567 do
568 nodes = tgt.leaves(pos)
569 self.pos = pos
570 if pos < 0 or pos >= tgt.length then return
571 make_substring
572 end
573
574 # Compute the bounds of the current substring and makes the substring
575 private fun make_substring
576 do
577 substring = nodes.item.str
578 var min = 0
579 var length = substring.length
580 if nodes.index < pos then
581 min = pos - nodes.index
582 end
583 substring = substring.substring(min, length)
584 end
585
586 redef fun is_ok do return nodes.is_ok
587
588 redef fun item
589 do
590 assert is_ok
591 return substring
592 end
593
594 redef fun index do return pos
595
596 redef fun next
597 do
598 pos += substring.length
599 nodes.next
600 if nodes.is_ok then make_substring
601 end
602
603 end
604
605 class RopeCharIterator
606 super IndexedIterator[Char]
607
608 var substrings: IndexedIterator[Text]
609
610 var pos: Int
611
612 var max: Int
613
614 var substr_iter: IndexedIterator[Char]
615
616 init(tgt: Rope, from: Int)
617 do
618 substrings = tgt.substrings_from(from)
619 max = tgt.length - 1
620 if not substrings.is_ok then
621 pos = tgt.length
622 return
623 end
624 pos = from
625 substr_iter = substrings.item.chars.iterator
626 end
627
628 redef fun item do return substr_iter.item
629
630 redef fun is_ok do return pos <= max
631
632 redef fun index do return pos
633
634 redef fun next
635 do
636 pos += 1
637 if substr_iter.is_ok then
638 substr_iter.next
639 end
640 if not substr_iter.is_ok then
641 substrings.next
642 if substrings.is_ok then
643 substr_iter = substrings.item.chars.iterator
644 end
645 end
646 end
647 end
648
649 private class ReversePostfix
650 super IndexedIterator[RopeNode]
651
652 var target: Rope
653
654 var pos: Int
655
656 var min = 0
657
658 var stack = new List[IteratorElement]
659
660 init from(tgt: Rope, pos: Int)
661 do
662 self.pos = pos
663 target = tgt
664 if pos < 0 or pos >= tgt.length then return
665 var path = tgt.node_at(pos)
666 self.pos -= path.offset
667 for i in path.stack do
668 var elemt = new IteratorElement(i.node)
669 elemt.right = true
670 if i.left then
671 elemt.left = true
672 end
673 stack.push elemt
674 end
675 stack.push(new IteratorElement(path.leaf))
676 stack.last.done = true
677 end
678
679 redef fun item do
680 assert is_ok
681 return stack.last.node
682 end
683
684 redef fun is_ok do return not stack.is_empty
685
686 redef fun index do return pos
687
688 redef fun next
689 do
690 if stack.is_empty then return
691 if pos < min then
692 stack.clear
693 return
694 end
695 var lst = stack.last
696 if lst.done then
697 stack.pop
698 next
699 return
700 end
701 if not lst.right then
702 var nod = lst.node.as(Concat)
703 var rgt = nod.right
704 lst.right = true
705 if rgt != null then
706 stack.push(new IteratorElement(rgt))
707 next
708 return
709 end
710 end
711 if not lst.left then
712 var nod = lst.node.as(Concat)
713 var lft = nod.left
714 lst.left = true
715 if lft != null then
716 stack.push(new IteratorElement(lft))
717 next
718 return
719 end
720 end
721 if lst.node isa Leaf then pos -= lst.node.length
722 lst.done = true
723 end
724 end
725
726 private class ReverseLeavesIterator
727 super IndexedIterator[Leaf]
728
729 var nodes: ReversePostfix
730
731 init(tgt: Rope, from: Int)
732 do
733 nodes = tgt.reverse_postfix(from)
734 end
735
736 redef fun is_ok do return nodes.is_ok
737
738 redef fun item do
739 assert is_ok
740 return nodes.item.as(Leaf)
741 end
742
743 redef fun next do
744 while nodes.is_ok do
745 nodes.next
746 if nodes.is_ok then if nodes.item isa Leaf then break
747 end
748 end
749
750 redef fun index do return nodes.index
751
752 end
753
754 private class ReverseSubstringsIterator
755 super IndexedIterator[Text]
756
757 var leaves: ReverseLeavesIterator
758
759 var pos: Int
760
761 var str: Text
762
763 init(tgt: Rope, from: Int)
764 do
765 leaves = tgt.reverse_leaves(from)
766 pos = from
767 if not leaves.is_ok then return
768 str = leaves.item.str
769 make_substring
770 end
771
772 fun make_substring
773 do
774 if pos >= (leaves.index + str.length - 1) then return
775 str = str.substring(0, (pos - leaves.index + 1))
776 end
777
778 redef fun is_ok do return leaves.is_ok
779
780 redef fun item do
781 assert is_ok
782 return str
783 end
784
785 redef fun index do return pos
786
787 redef fun next do
788 pos -= str.length
789 leaves.next
790 if not leaves.is_ok then return
791 str = leaves.item.str
792 make_substring
793 end
794 end
795
796 private class ReverseRopeCharIterator
797 super IndexedIterator[Char]
798
799 var substrs: IndexedIterator[Text]
800
801 var pos: Int
802
803 var subiter: IndexedIterator[Char]
804
805 init(tgt: Rope, from: Int)
806 do
807 substrs = tgt.reverse_substrings_from(from)
808 if not substrs.is_ok then
809 pos = -1
810 return
811 end
812 subiter = substrs.item.chars.reverse_iterator
813 pos = from
814 end
815
816 redef fun is_ok do return pos >= 0
817
818 redef fun item do
819 assert is_ok
820 return subiter.item
821 end
822
823 redef fun index do return pos
824
825 redef fun next do
826 pos -= 1
827 if subiter.is_ok then subiter.next
828 if not subiter.is_ok then
829 if substrs.is_ok then substrs.next
830 if substrs.is_ok then subiter = substrs.item.chars.reverse_iterator
831 end
832 end
833 end
834