b1aad2e2079588de865785bf9407c0f70dfcb3d3
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it if you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or as a part of
9 # another product.
10
11 # Nit implementation of the Ropes (see Ropes : An Alternative to Strings,
12 # SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995)
13 # Hans. J Boehm, Russ Atkinson and Michael Plass)
14 #
15 # A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data.
16 module ropes
17
18 intrude import string
19
20 # Used when searching for a particular node
21 # Returns the path to the node from the root of the rope
22 # Also, the node and the offset for seeked position in the rope
23 private class Path
24 # Leaf found
25 var leaf: Leaf
26 # Offset in leaf
27 var offset: Int
28 # Stack of the nodes traversed, and the path used
29 var stack: List[PathElement]
30 end
31
32 # An element for a Path, has the concat node and whether or not
33 # left or right child was visited.
34 private class PathElement
35 # Visited node
36 var node: Concat
37 # Was the left child visited ?
38 var left = false
39 # Was the right child visited ?
40 var right = false
41 end
42
43 # A node for a Rope
44 private abstract class RopeNode
45 # Length of the node
46 var length = 0
47 end
48
49 # Node that represents a concatenation between two nodes (of any RopeNode type)
50 private class Concat
51 super RopeNode
52
53 # Left child of the node
54 var _left: nullable RopeNode = null
55 # Right child of the node
56 var _right: nullable RopeNode = null
57
58 fun left: nullable RopeNode do return _left
59 fun right: nullable RopeNode do return _right
60
61 fun left=(l: RopeNode)
62 do
63 _left = l
64 length = l.length
65 if _right != null then length += _right.length
66 end
67
68 fun right=(r: RopeNode)
69 do
70 _right = r
71 length = r.length
72 if _left != null then length += _left.length
73 end
74 end
75
76 # Leaf of a Rope, contains a FlatString
77 private class Leaf
78 super RopeNode
79
80 # Encapsulated FlatString in the leaf node
81 var str: FlatString
82
83 init(val: FlatString) do
84 self.str = val
85 length = str.length
86 end
87
88 end
89
90 # Basic structure, binary tree with a root node.
91 #
92 # Also shared services by subsequent implementations.
93 abstract class Rope
94 super Text
95
96 # Root node, entry point of a Rope.
97 private var root: RopeNode
98
99 # Cached version of self as a flat String
100 private var str_representation: nullable NativeString = null
101
102 # Empty Rope
103 init do from("")
104
105 # Creates a new Rope with `s` as root
106 init from(s: String) do
107 if s isa RopeString then root = s.root else root = new Leaf(s.as(FlatString))
108 end
109
110 private init from_root(r: RopeNode)
111 do
112 root = r
113 end
114
115 redef fun length do return root.length
116
117 # Iterator on the nodes of the rope, in forward postfix order
118 private fun postfix(from: Int): Postfix do return new Postfix.from(self, from)
119
120 # Iterator on the leaves of the rope, forward order
121 private fun leaves(from: Int): LeavesIterator do return new LeavesIterator(self, from)
122
123 # Iterator on the substrings from 0, in forward order
124 fun substrings: IndexedIterator[Text] do return new SubstringsIterator(self, 0)
125
126 # Iterator on the substrings, starting at position `from`, in forward order
127 fun substrings_from(from: Int): IndexedIterator[Text] do return new SubstringsIterator(self, from)
128
129 # Iterator on the nodes of the rope, in backwards postfix order
130 private fun reverse_postfix(from: Int): ReversePostfix do return new ReversePostfix.from(self, from)
131
132 # Iterator on the leaves of the rope, backwards order
133 private fun reverse_leaves(from: Int): ReverseLeavesIterator do return new ReverseLeavesIterator(self,from)
134
135 # Iterator on the substrings, in reverse order
136 fun reverse_substrings: IndexedIterator[Text] do return new ReverseSubstringsIterator(self, length-1)
137
138 # Iterator on the substrings, in reverse order, starting iteration at position `from`
139 fun reverse_substrings_from(from: Int): IndexedIterator[Text] do return new ReverseSubstringsIterator(self, from)
140
141 redef fun output
142 do
143 for i in substrings do
144 i.output
145 end
146 end
147
148 redef fun to_cstring
149 do
150 if str_representation != null then return str_representation.as(not null)
151
152 var native_final_str = calloc_string(length + 1)
153
154 native_final_str[length] = '\0'
155
156 if self.length == 0 then
157 str_representation = native_final_str
158 return native_final_str
159 end
160
161 var offset = 0
162
163 for i in substrings do
164 var str = i.flatten
165 if str isa FlatString then str.items.copy_to(native_final_str, str.length, str.index_from, offset)
166 offset += i.length
167 end
168
169 str_representation = native_final_str
170
171 return native_final_str
172 end
173
174 # Path to the Leaf for `position`
175 private fun node_at(position: Int): Path
176 do
177 assert position >= 0 and position < length
178 return get_node_from(root.as(not null), 0, position, new List[PathElement])
179 end
180
181 # Builds the path to Leaf at position `seek_pos`
182 private fun get_node_from(node: RopeNode, curr_pos: Int, seek_pos: Int, stack: List[PathElement]): Path
183 do
184 assert curr_pos >= 0
185 if node isa Leaf then return new Path(node, seek_pos - curr_pos, stack)
186 node = node.as(Concat)
187
188 if node.left != null then
189 var next_pos = curr_pos + node.left.length
190 stack.add(new PathElement(node))
191 if next_pos > seek_pos then
192 stack.last.left = true
193 return get_node_from(node.left.as(not null), curr_pos, seek_pos, stack)
194 end
195 stack.last.right = true
196 return get_node_from(node.right.as(not null), next_pos, seek_pos, stack)
197 else
198 var vis = new PathElement(node)
199 vis.right = true
200 stack.add(vis)
201 return get_node_from(node.right.as(not null), curr_pos, seek_pos, stack)
202 end
203 end
204
205 redef fun ==(o)
206 do
207 if not o isa Text then return false
208 if o.length != self.length then return false
209 var oit = o.chars.iterator
210 for i in self.chars.iterator do
211 if i != oit.item then return false
212 oit.next
213 end
214 return true
215 end
216 end
217
218 # Rope that cannot be modified
219 class RopeString
220 super Rope
221 super String
222
223 redef fun to_s do return self
224
225 redef fun empty do return once new RopeString.from("")
226
227 redef fun reversed
228 do
229 var ret = empty.as(RopeString)
230 for i in substrings do
231 ret = ret.prepend(i.reversed.to_s).as(RopeString)
232 end
233 return ret
234 end
235
236 redef fun to_upper
237 do
238 var ret = empty
239 for i in substrings do
240 ret += i.to_upper
241 end
242 return ret
243 end
244
245 redef fun to_lower
246 do
247 var ret = empty
248 for i in substrings do
249 ret += i.to_lower
250 end
251 return ret
252 end
253
254 redef fun +(o) do return insert_at(o.to_s, length)
255
256 redef fun *(n)
257 do
258 var ret = new RopeString.from("")
259 for i in [0..n[ do
260 ret = (ret + self).as(RopeString)
261 end
262 return ret
263 end
264
265 # Inserts a String `str` at position `pos`
266 fun insert_at(str: String, pos: Int): RopeString
267 do
268 if str.length == 0 then return self
269 if self.length == 0 then return new RopeString.from(str)
270
271 assert pos >= 0 and pos <= length
272
273 if pos == length then return append(str).as(RopeString)
274
275 var path = node_at(pos)
276
277 var last_concat = new Concat
278
279 if path.offset == 0 then
280 last_concat.right = path.leaf
281 if str isa FlatString then last_concat.left = new Leaf(str) else last_concat.left = str.as(RopeString).root
282 else if path.offset == path.leaf.length then
283 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
284 last_concat.left = path.leaf
285 else
286 var s = path.leaf.str
287 var l_half = s.substring(0, s.length - path.offset)
288 var r_half = s.substring_from(s.length - path.offset)
289 var cct = new Concat
290 cct.right = new Leaf(r_half)
291 last_concat.left = new Leaf(l_half)
292 if str isa FlatString then last_concat.right = new Leaf(str) else last_concat.right = str.as(RopeString).root
293 cct.left = last_concat
294 last_concat = cct
295 end
296
297 for i in path.stack.reverse_iterator do
298 var nod = new Concat
299 if i.left then
300 nod.right = i.node.right.as(not null)
301 nod.left = last_concat
302 else
303 nod.left = i.node.left.as(not null)
304 nod.right = last_concat
305 end
306 last_concat = nod
307 end
308
309 return new RopeString.from_root(last_concat)
310 end
311
312 # Adds `s` at the beginning of self
313 fun prepend(s: String): String do return insert_at(s, 0)
314
315 # Adds `s` at the end of self
316 fun append(s: String): String
317 do
318 if self.is_empty then return s
319 return new RopeString.from_root(append_to_path(root,s))
320 end
321
322 # Builds a new path from root to the rightmost node with s appended
323 private fun append_to_path(node: RopeNode, s: String): RopeNode
324 do
325 var cct = new Concat
326 if node isa Leaf then
327 cct.left = node
328 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
329 else if node isa Concat then
330 var right = node.right
331 if node.left != null then cct.left = node.left.as(not null)
332 if right == null then
333 if s isa FlatString then cct.right = new Leaf(s) else cct.right = s.as(RopeString).root
334 else
335 cct.right = append_to_path(right, s)
336 end
337 end
338 return cct
339 end
340
341 # O(log(n))
342 #
343 # var rope = new RopeString.from("abcd")
344 # assert rope.substring(1, 2) == "bc"
345 # assert rope.substring(-1, 2) == "a"
346 # assert rope.substring(1, 0) == ""
347 # assert rope.substring(2, 5) == "cd"
348 #
349 redef fun substring(pos, len)
350 do
351 if pos < 0 then
352 len += pos
353 pos = 0
354 end
355
356 if pos + len > length then len = length - pos
357
358 if len <= 0 then return new RopeString.from("")
359
360 var path = node_at(pos)
361
362 var lf = path.leaf
363 var offset = path.offset
364
365 if path.leaf.str.length - offset > len then lf = new Leaf(lf.str.substring(offset,len)) else lf = new Leaf(lf.str.substring_from(offset))
366
367 var nod: RopeNode = lf
368
369 for i in path.stack.reverse_iterator do
370 if i.right then continue
371 var tmp = new Concat
372 tmp.left = nod
373 var r = i.node.right
374 if r != null then tmp.right = r
375 nod = tmp
376 end
377
378 var ret = new RopeString
379 ret.root = nod
380
381 path = ret.node_at(len-1)
382
383 offset = path.offset
384 nod = new Leaf(path.leaf.str.substring(0, offset+1))
385
386 for i in path.stack.reverse_iterator do
387 if i.left then continue
388 var tmp = new Concat
389 tmp.right = nod
390 var l = i.node.left
391 if l != null then tmp.left = l
392 nod = tmp
393 end
394
395 ret.root = nod
396
397 return ret
398 end
399 end
400
401 # Used to iterate on a Rope
402 private class IteratorElement
403
404 init(e: RopeNode)
405 do
406 if e isa Leaf then
407 left = true
408 right = true
409 end
410 node = e
411 end
412
413 # The node being visited
414 var node: RopeNode
415 # If the node has a left child, was it visited ?
416 var left = false
417 # If the node has a right child, was it visited ?
418 var right = false
419 # Was the current node visited ?
420 var done = false
421 end
422
423 # Simple Postfix iterator on the nodes of a Rope
424 private class Postfix
425 super IndexedIterator[RopeNode]
426
427 # Target Rope to iterate on
428 var target: Rope
429
430 # Current position in Rope
431 var pos: Int
432
433 # Visited nodes
434 var stack = new List[IteratorElement]
435
436 init from(tgt: Rope, pos: Int)
437 do
438 self.target = tgt
439 self.pos = pos
440 if pos < 0 or pos >= tgt.length then return
441 var path = tgt.node_at(pos)
442 self.pos -= path.offset
443 for i in path.stack do
444 var item = new IteratorElement(i.node)
445 item.left = true
446 if i.right then item.right = true
447 stack.push item
448 end
449 var item = new IteratorElement(path.leaf)
450 item.done = true
451 stack.push item
452 end
453
454 redef fun item
455 do
456 assert is_ok
457 return stack.last.node
458 end
459
460 redef fun is_ok do return not stack.is_empty
461
462 redef fun index do return pos
463
464 redef fun next do
465 if stack.is_empty then return
466 if pos > target.length-1 then
467 stack.clear
468 return
469 end
470 var lst = stack.last
471 if lst.done then
472 if lst.node isa Leaf then
473 pos += lst.node.length
474 end
475 stack.pop
476 next
477 return
478 end
479 if not lst.left then
480 lst.left = true
481 var nod = lst.node
482 if nod isa Concat and nod.left != null then
483 stack.push(new IteratorElement(nod.left.as(not null)))
484 next
485 return
486 end
487 end
488 if not lst.right then
489 lst.right = true
490 var nod = lst.node
491 if nod isa Concat and nod.right != null then
492 stack.push(new IteratorElement(nod.right.as(not null)))
493 next
494 return
495 end
496 end
497 lst.done = true
498 end
499 end
500
501 # Iterates on the leaves (substrings) of the Rope
502 class LeavesIterator
503 super IndexedIterator[Leaf]
504
505 private var nodes: Postfix
506
507 init(tgt: Rope, pos: Int)
508 do
509 nodes = tgt.postfix(pos)
510 end
511
512 redef fun is_ok do return nodes.is_ok
513
514 redef fun item
515 do
516 assert is_ok
517 return nodes.item.as(Leaf)
518 end
519
520 redef fun index do return nodes.index
521
522 redef fun next
523 do
524 while nodes.is_ok do
525 nodes.next
526 if nodes.is_ok and nodes.item isa Leaf then break
527 end
528 end
529 end
530
531 # Uses the leaves and calculates a new substring on each iteration
532 class SubstringsIterator
533 super IndexedIterator[Text]
534
535 private var nodes: IndexedIterator[Leaf]
536
537 # Current position in Rope
538 var pos: Int
539
540 # Current substring, computed from the current Leaf and indexes
541 var substring: Text
542
543 init(tgt: Rope, pos: Int)
544 do
545 nodes = tgt.leaves(pos)
546 self.pos = pos
547 if pos < 0 or pos >= tgt.length then return
548 make_substring
549 end
550
551 # Compute the bounds of the current substring and makes the substring
552 private fun make_substring
553 do
554 substring = nodes.item.str
555 var min = 0
556 var length = substring.length
557 if nodes.index < pos then
558 min = pos - nodes.index
559 end
560 substring = substring.substring(min, length)
561 end
562
563 redef fun is_ok do return nodes.is_ok
564
565 redef fun item
566 do
567 assert is_ok
568 return substring
569 end
570
571 redef fun index do return pos
572
573 redef fun next
574 do
575 pos += substring.length
576 nodes.next
577 if nodes.is_ok then make_substring
578 end
579
580 end
581
582 class RopeCharIterator
583 super IndexedIterator[Char]
584
585 var substrings: IndexedIterator[Text]
586
587 var pos: Int
588
589 var max: Int
590
591 var substr_iter: IndexedIterator[Char]
592
593 init(tgt: Rope, from: Int)
594 do
595 substrings = tgt.substrings_from(from)
596 max = tgt.length - 1
597 if not substrings.is_ok then
598 pos = tgt.length
599 return
600 end
601 pos = from
602 substr_iter = substrings.item.chars.iterator
603 end
604
605 redef fun item do return substr_iter.item
606
607 redef fun is_ok do return pos <= max
608
609 redef fun index do return pos
610
611 redef fun next
612 do
613 pos += 1
614 if substr_iter.is_ok then
615 substr_iter.next
616 end
617 if not substr_iter.is_ok then
618 substrings.next
619 if substrings.is_ok then
620 substr_iter = substrings.item.chars.iterator
621 end
622 end
623 end
624 end
625
626 private class ReversePostfix
627 super IndexedIterator[RopeNode]
628
629 var target: Rope
630
631 var pos: Int
632
633 var min = 0
634
635 var stack = new List[IteratorElement]
636
637 init from(tgt: Rope, pos: Int)
638 do
639 self.pos = pos
640 target = tgt
641 if pos < 0 or pos >= tgt.length then return
642 var path = tgt.node_at(pos)
643 self.pos -= path.offset
644 for i in path.stack do
645 var elemt = new IteratorElement(i.node)
646 elemt.right = true
647 if i.left then
648 elemt.left = true
649 end
650 stack.push elemt
651 end
652 stack.push(new IteratorElement(path.leaf))
653 stack.last.done = true
654 end
655
656 redef fun item do
657 assert is_ok
658 return stack.last.node
659 end
660
661 redef fun is_ok do return not stack.is_empty
662
663 redef fun index do return pos
664
665 redef fun next
666 do
667 if stack.is_empty then return
668 if pos < min then
669 stack.clear
670 return
671 end
672 var lst = stack.last
673 if lst.done then
674 stack.pop
675 next
676 return
677 end
678 if not lst.right then
679 var nod = lst.node.as(Concat)
680 var rgt = nod.right
681 lst.right = true
682 if rgt != null then
683 stack.push(new IteratorElement(rgt))
684 next
685 return
686 end
687 end
688 if not lst.left then
689 var nod = lst.node.as(Concat)
690 var lft = nod.left
691 lst.left = true
692 if lft != null then
693 stack.push(new IteratorElement(lft))
694 next
695 return
696 end
697 end
698 if lst.node isa Leaf then pos -= lst.node.length
699 lst.done = true
700 end
701 end
702
703 private class ReverseLeavesIterator
704 super IndexedIterator[Leaf]
705
706 var nodes: ReversePostfix
707
708 init(tgt: Rope, from: Int)
709 do
710 nodes = tgt.reverse_postfix(from)
711 end
712
713 redef fun is_ok do return nodes.is_ok
714
715 redef fun item do
716 assert is_ok
717 return nodes.item.as(Leaf)
718 end
719
720 redef fun next do
721 while nodes.is_ok do
722 nodes.next
723 if nodes.is_ok then if nodes.item isa Leaf then break
724 end
725 end
726
727 redef fun index do return nodes.index
728
729 end
730
731 private class ReverseSubstringsIterator
732 super IndexedIterator[Text]
733
734 var leaves: ReverseLeavesIterator
735
736 var pos: Int
737
738 var str: Text
739
740 init(tgt: Rope, from: Int)
741 do
742 leaves = tgt.reverse_leaves(from)
743 pos = from
744 if not leaves.is_ok then return
745 str = leaves.item.str
746 make_substring
747 end
748
749 fun make_substring
750 do
751 if pos >= (leaves.index + str.length - 1) then return
752 str = str.substring(0, (pos - leaves.index + 1))
753 end
754
755 redef fun is_ok do return leaves.is_ok
756
757 redef fun item do
758 assert is_ok
759 return str
760 end
761
762 redef fun index do return pos
763
764 redef fun next do
765 pos -= str.length
766 leaves.next
767 if not leaves.is_ok then return
768 str = leaves.item.str
769 make_substring
770 end
771 end
772
773 private class ReverseRopeCharIterator
774 super IndexedIterator[Char]
775
776 var substrs: IndexedIterator[Text]
777
778 var pos: Int
779
780 var subiter: IndexedIterator[Char]
781
782 init(tgt: Rope, from: Int)
783 do
784 substrs = tgt.reverse_substrings_from(from)
785 if not substrs.is_ok then
786 pos = -1
787 return
788 end
789 subiter = substrs.item.chars.reverse_iterator
790 pos = from
791 end
792
793 redef fun is_ok do return pos >= 0
794
795 redef fun item do
796 assert is_ok
797 return subiter.item
798 end
799
800 redef fun index do return pos
801
802 redef fun next do
803 pos -= 1
804 if subiter.is_ok then subiter.next
805 if not subiter.is_ok then
806 if substrs.is_ok then substrs.next
807 if substrs.is_ok then subiter = substrs.item.chars.reverse_iterator
808 end
809 end
810 end
811