ropes: Reset everything when clearing.
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Tree-based representation of a String.
16 #
17 # Ropes are a data structure introduced in a 1995 paper from
18 # Hans J. Boehm, Russ Atkinson and Michael Plass.
19 # See : `Ropes : an Alternative to Strings`, `Software - Practice and Experience,
20 # Vol. 25(12), 1315-1330 (December 1995)`.
21 #
22 # The implementation developed here provides an automatic change
23 # of data structure depending on the length of the leaves.
24 #
25 # The length from which a `Rope` is built from a `flat` string
26 # if defined at top-level (see `maxlen`) and can be redefined by clients
27 # depending on their needs (e.g. if you need to bench the speed of
28 # the creation of concat nodes, lower the size of maxlen).
29 #
30 # A Rope as defined in the original paper is a Tree made of concatenation
31 # nodes and containing `Flat` (that is Array-based) strings as Leaves.
32 #
33 # Example :
34 #
35 # ` Concat `
36 # ` / \ `
37 # ` Concat Concat `
38 # ` / \ / \ `
39 # `"My" " Name" " is" " Simon." `
40 #
41 # Note that the above example is not representative of the actual implementation
42 # of `Ropes`, since short leaves are merged to keep the rope at an acceptable
43 # height (hence, this rope here might actually be a `FlatString` !).
44 module ropes
45
46 intrude import string
47
48 # Maxlen is the maximum length of a Leaf node
49 #
50 # When concatenating two leaves, if `new_length` > `maxlen`,
51 # A `Concat` node is created instead
52 #
53 # Its purpose is to limit the depth of the `Rope` (this
54 # improves performance when accessing/iterating).
55 fun maxlen: Int do return 64
56
57 # String using a tree-based representation with leaves as `FlatStrings`
58 private abstract class Rope
59 super Text
60 end
61
62 private abstract class RopeString
63 super Rope
64 super String
65
66 redef fun chars is cached do return new RopeChars(self)
67 end
68
69 # Node that represents a concatenation between two `String`
70 private class Concat
71 super RopeString
72
73 redef var length: Int
74
75 redef fun substrings do return new RopeSubstrings(self)
76
77 redef fun empty do return ""
78
79 redef fun to_cstring is cached do
80 var len = length
81 var ns = new NativeString(len + 1)
82 ns[len] = '\0'
83 var off = 0
84 for i in substrings do
85 var ilen = i.length
86 i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).index_from, off)
87 off += ilen
88 end
89 return ns
90 end
91
92 # Left child of the node
93 var left: String
94 # Right child of the node
95 var right: String
96
97 init(l: String, r: String) is old_style_init do
98 left = l
99 right = r
100 length = l.length + r.length
101 end
102
103 redef fun output do
104 left.output
105 right.output
106 end
107
108 redef fun iterator do return new RopeIter(self)
109
110 redef fun *(i) do
111 var x: String = self
112 for j in [1 .. i[ do x += self
113 return x
114 end
115
116 redef fun [](i) do
117 var llen = left.length
118 if i >= llen then return right[i - llen]
119 return left[i]
120 end
121
122 redef fun substring(from, len) do
123 var llen = left.length
124 if from < llen then
125 if from + len < llen then return left.substring(from,len)
126 var lsublen = llen - from
127 return left.substring_from(from) + right.substring(0, len - lsublen)
128 else
129 return right.substring(from - llen, len)
130 end
131 end
132
133 redef fun reversed do return new Concat(right.reversed, left.reversed)
134
135 redef fun insert_at(s, pos) do
136 if pos > left.length then
137 return left + right.insert_at(s, pos - left.length)
138 end
139 return left.insert_at(s, pos) + right
140 end
141
142 redef fun to_upper do return new Concat(left.to_upper, right.to_upper)
143
144 redef fun to_lower do return new Concat(left.to_lower, right.to_lower)
145
146 redef fun +(o) do
147 var s = o.to_s
148 var slen = s.length
149 if s isa Concat then
150 return new Concat(self, s)
151 else
152 var r = right
153 var rlen = r.length
154 if rlen + slen > maxlen then return new Concat(left, new Concat(r, s))
155 return new Concat(left, r + s)
156 end
157 end
158 end
159
160 # Mutable `Rope`, optimized for concatenation operations
161 #
162 # A `RopeBuffer` is an efficient way of building a `String` when
163 # concatenating small strings.
164 #
165 # It does concatenations in an optimized way by having a
166 # mutable part and an immutable part built by efficiently
167 # concatenating strings in chain.
168 #
169 # Every concatenation operation is done by copying a string to
170 # the mutable part and flushing it when full.
171 #
172 # However, when a long string is appended to the `Buffer`,
173 # the concatenation is done at it would be in a `Rope`.
174 class RopeBuffer
175 super Rope
176 super Buffer
177
178 redef fun chars: Sequence[Char] is cached do return new RopeBufferChars(self)
179
180 # The final string being built on the fly
181 private var str: String is noinit
182
183 # Current concatenation buffer
184 private var ns: NativeString is noinit
185
186 # Next available (e.g. unset) character in the `Buffer`
187 private var rpos = 0
188
189 # Keeps track of the buffer's currently dumped part
190 #
191 # This might happen if for instance, a String was being
192 # built by concatenating small parts of string and suddenly
193 # a long string (length > maxlen) is appended.
194 private var dumped: Int is noinit
195
196 # Length of the complete rope
197 redef var length = 0
198
199 # Length of the mutable part
200 #
201 # Is also used as base to compute the size of the next
202 # mutable native string (`ns`)
203 private var buf_size: Int is noinit
204
205 redef fun substrings: Iterator[String] do return new RopeBufSubstringIterator(self)
206
207 # Builds an empty `RopeBuffer`
208 init do
209 str = ""
210 ns = new NativeString(maxlen)
211 buf_size = maxlen
212 dumped = 0
213 end
214
215 # Builds a new `RopeBuffer` with `str` in it.
216 init from(str: String) do
217 self.str = str
218 ns = new NativeString(maxlen)
219 buf_size = maxlen
220 length = str.length
221 dumped = 0
222 end
223
224 # Resets the informations of the `Buffer`
225 #
226 # This is called when doing in-place modifications
227 # on a previously to_s'd `RopeBuffer`
228 private fun reset do
229 var nns = new NativeString(buf_size)
230 var blen = rpos - dumped
231 ns.copy_to(nns, blen, dumped, 0)
232 dumped = 0
233 rpos = blen
234 written = false
235 end
236
237 redef fun empty do return new RopeBuffer
238
239 redef fun clear do
240 str = ""
241 length = 0
242 rpos = 0
243 dumped = 0
244 end
245
246 redef fun substring(from, count) do
247 var strlen = str.length
248
249 if from < 0 then
250 count += from
251 if count < 0 then count = 0
252 from = 0
253 end
254
255 if count > length then count = length - from
256
257 if count == 0 then return empty
258
259 if from < strlen then
260 var subpos = strlen - from
261 if count <= subpos then
262 return new RopeBuffer.from(str.substring(from, count))
263 else
264 var l = str.substring_from(from)
265 var rem = count - subpos
266 var nns = new NativeString(rem)
267 ns.copy_to(nns, rem, dumped, 0)
268 return new RopeBuffer.from(l + nns.to_s_with_length(rem))
269 end
270 else
271 var nns = new NativeString(count)
272 ns.copy_to(nns, count, dumped, 0)
273 return new RopeBuffer.from(nns.to_s_with_length(count))
274 end
275 end
276
277 redef fun append(s) do
278 var slen = s.length
279 length += slen
280 var rp = rpos
281 if s isa Rope then
282 if rp > 0 and dumped != rp then
283 str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1)
284 dumped = rp
285 end
286 str = str + s
287 return
288 end
289 if slen > maxlen then
290 if rp > 0 and dumped != rp then
291 str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1)
292 dumped = rp
293 end
294 str = str + s
295 return
296 end
297 var remsp = buf_size - rp
298 var sits: NativeString
299 var begin: Int
300 if s isa FlatString then
301 begin = s.index_from
302 sits = s.items
303 else if s isa FlatBuffer then
304 begin = 0
305 sits = s.items
306 else
307 if slen <= remsp then
308 for i in s.chars do
309 ns[rpos] = i
310 rpos += 1
311 end
312 else
313 var spos = 0
314 for i in [0..remsp[ do
315 ns[rpos] = s[spos]
316 rpos += 1
317 spos += 1
318 end
319 dump_buffer
320 while spos < slen do
321 ns[rpos] = s[spos]
322 spos += 1
323 rpos += 1
324 end
325 end
326 return
327 end
328 if slen <= remsp then
329 sits.copy_to(ns, slen, begin, rp)
330 if rp == buf_size then
331 rpos = buf_size
332 dump_buffer
333 rpos = 0
334 else
335 rpos += slen
336 end
337 else
338 sits.copy_to(ns, remsp, begin, rp)
339 rpos = buf_size
340 dump_buffer
341 var nlen = slen - remsp
342 sits.copy_to(ns, nlen, begin + remsp, 0)
343 rpos = nlen
344 end
345 end
346
347 redef fun add(c) do
348 var rp = rpos
349 length += 1
350 ns[rp] = c
351 rp += 1
352 if rp == buf_size then
353 rpos = rp
354 dump_buffer
355 rp = 0
356 end
357 rpos = rp
358 end
359
360 # Converts the Buffer to a FlatString, appends it to
361 # the final String and re-allocates a new larger Buffer.
362 private fun dump_buffer do
363 written = false
364 var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
365 str += nstr
366 var bs = buf_size
367 bs = bs * 2
368 ns = new NativeString(bs)
369 buf_size = bs
370 dumped = 0
371 end
372
373 redef fun output do
374 str.output
375 new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output
376 end
377
378 # Enlarge is useless here since the `Buffer`
379 # part is automatically dumped when necessary.
380 #
381 # Also, since the buffer can not be overused by a
382 # single string, there is no need for manual
383 # resizing.
384 #
385 # "You have no power here !"
386 redef fun enlarge(i) do end
387
388 redef fun to_s do
389 written = true
390 var nnslen = rpos - dumped
391 if nnslen == 0 then return str
392 return str + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
393 end
394
395 redef fun reverse do
396 str = str.reversed
397 var nns = new NativeString(buf_size)
398 var j = rpos
399 var mits = ns
400 for i in [0 .. rpos[ do
401 nns[i] = mits[j]
402 j -= 1
403 end
404 ns = nns
405 end
406
407 redef fun upper do
408 if written then reset
409 str = str.to_upper
410 var mits = ns
411 for i in [0 .. rpos[ do
412 mits[i] = mits[i].to_upper
413 end
414 end
415
416 redef fun lower do
417 if written then reset
418 str = str.to_lower
419 var mits = ns
420 for i in [0 .. rpos[ do
421 mits[i] = mits[i].to_lower
422 end
423 end
424 end
425
426 redef class FlatString
427
428 redef fun insert_at(s, pos) do
429 var l = substring(0, pos)
430 var r = substring_from(pos)
431 return l + s + r
432 end
433
434 redef fun +(o) do
435 var s = o.to_s
436 var slen = s.length
437 var mlen = length
438 if slen == 0 then return self
439 if mlen == 0 then return s
440 var nlen = slen + mlen
441 if s isa FlatString then
442 if nlen > maxlen then return new Concat(self, s)
443 var mits = items
444 var sifrom = s.index_from
445 var mifrom = index_from
446 var sits = s.items
447 var ns = new NativeString(nlen + 1)
448 mits.copy_to(ns, mlen, mifrom, 0)
449 sits.copy_to(ns, slen, sifrom, mlen)
450 return ns.to_s_with_length(nlen)
451 else if s isa Concat then
452 var sl = s.left
453 var sllen = sl.length
454 if sllen + mlen > maxlen then return new Concat(self, s)
455 return new Concat(self + sl, s.right)
456 else
457 abort
458 end
459 end
460 end
461
462 # A simple linked list for use with iterators
463 private class RopeIterPiece
464 # The encapsulated node of the `Rope`
465 var node: String
466 # Was its left child (if any) visited ?
467 var ldone: Bool
468 # Was its right child (if any) visited ?
469 var rdone: Bool
470 # The previous node in the list.
471 var prev: nullable RopeIterPiece
472 end
473
474 # A reverse iterator capable of working with `Rope` objects
475 private class RopeReviter
476 super IndexedIterator[Char]
477
478 # Current NativeString
479 var ns: String
480 # Current position in NativeString
481 var pns: Int
482 # Position in the Rope (0-indexed)
483 var pos: Int
484 # Iterator on the substrings, does the Postfix part of
485 # the Rope traversal.
486 var subs: IndexedIterator[String]
487
488 init(root: RopeString) is old_style_init do
489 pos = root.length - 1
490 subs = new ReverseRopeSubstrings(root)
491 ns = subs.item
492 pns = ns.length - 1
493 end
494
495 init from(root: RopeString, pos: Int) do
496 self.pos = pos
497 subs = new ReverseRopeSubstrings.from(root, pos)
498 ns = subs.item
499 pns = pos - subs.index
500 end
501
502 redef fun index do return pos
503
504 redef fun is_ok do return pos >= 0
505
506 redef fun item do return ns[pns]
507
508 redef fun next do
509 pns -= 1
510 pos -= 1
511 if pns >= 0 then return
512 if not subs.is_ok then return
513 subs.next
514 if not subs.is_ok then return
515 ns = subs.item
516 pns = ns.length - 1
517 end
518 end
519
520 # Forward iterator on the chars of a `Rope`
521 private class RopeIter
522 super IndexedIterator[Char]
523
524 # Position in current `String`
525 var pns: Int
526 # Current `String` being iterated on
527 var str: String
528 # Substrings of the Rope
529 var subs: IndexedIterator[String]
530 # Maximum position to iterate on (e.g. Rope.length)
531 var max: Int
532 # Position (char) in the Rope (0-indexed)
533 var pos: Int
534
535 init(root: RopeString) is old_style_init do
536 subs = new RopeSubstrings(root)
537 pns = 0
538 str = subs.item
539 max = root.length - 1
540 pos = 0
541 end
542
543 init from(root: RopeString, pos: Int) do
544 subs = new RopeSubstrings.from(root, pos)
545 pns = pos - subs.index
546 self.pos = pos
547 str = subs.item
548 max = root.length - 1
549 end
550
551 redef fun item do return str[pns]
552
553 redef fun is_ok do return pos <= max
554
555 redef fun index do return pos
556
557 redef fun next do
558 pns += 1
559 pos += 1
560 if pns < subs.item.length then return
561 if not subs.is_ok then return
562 subs.next
563 if not subs.is_ok then return
564 str = subs.item
565 pns = 0
566 end
567 end
568
569 # Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
570 private class ReverseRopeSubstrings
571 super IndexedIterator[String]
572
573 # Visit Stack
574 var iter: RopeIterPiece is noinit
575 # Position in `Rope`
576 var pos: Int is noinit
577
578 # Current leaf
579 var str: String is noinit
580
581 init(root: RopeString) is old_style_init do
582 var r = new RopeIterPiece(root, false, true, null)
583 pos = root.length - 1
584 var lnod: String = root
585 loop
586 if lnod isa Concat then
587 lnod = lnod.right
588 r = new RopeIterPiece(lnod, false, true, r)
589 else
590 str = lnod
591 iter = r
592 break
593 end
594 end
595 end
596
597 init from(root: RopeString, pos: Int) do
598 var r = new RopeIterPiece(root, false, true, null)
599 var rnod: String = root
600 var off = pos
601 loop
602 if rnod isa Concat then
603 if off >= rnod.left.length then
604 off -= rnod.left.length
605 rnod = rnod.right
606 r = new RopeIterPiece(rnod, false, true, r)
607 else
608 r.ldone = true
609 rnod = rnod.left
610 r = new RopeIterPiece(rnod, false, true, r)
611 end
612 else
613 str = rnod
614 r.ldone = true
615 iter = r
616 self.pos = pos - off
617 break
618 end
619 end
620 end
621
622 redef fun item do return str
623
624 redef fun index do return pos
625
626 redef fun is_ok do return pos >= 0
627
628 redef fun next do
629 if pos < 0 then return
630 var curr = iter.prev
631 var currit = curr.node
632 while curr != null do
633 currit = curr.node
634 if not currit isa Concat then
635 str = currit
636 pos -= str.length
637 iter = curr
638 return
639 end
640 if not curr.rdone then
641 curr.rdone = true
642 curr = new RopeIterPiece(currit.right, false, false, curr)
643 continue
644 end
645 if not curr.ldone then
646 curr.ldone = true
647 curr = new RopeIterPiece(currit.left, false, false, curr)
648 continue
649 end
650 curr = curr.prev
651 end
652 pos = -1
653 end
654 end
655
656 private class RopeBufSubstringIterator
657 super Iterator[String]
658
659 # Iterator on the substrings of the building string
660 var iter: Iterator[String]
661 # Makes a String out of the buffered part of the Ropebuffer
662 var nsstr: String
663 # Did we attain the buffered part ?
664 var nsstr_done = false
665
666 init(str: RopeBuffer) is old_style_init do
667 iter = str.str.substrings
668 nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1)
669 if str.length == 0 then nsstr_done = true
670 end
671
672 redef fun is_ok do return iter.is_ok or not nsstr_done
673
674 redef fun item do
675 assert is_ok
676 if iter.is_ok then return iter.item
677 return nsstr
678 end
679
680 redef fun next do
681 if iter.is_ok then
682 iter.next
683 return
684 end
685 nsstr_done = true
686 end
687 end
688
689 # Substrings of a Rope (i.e. Postfix iterator on leaves)
690 private class RopeSubstrings
691 super IndexedIterator[String]
692
693 # Visit Stack
694 var iter: RopeIterPiece is noinit
695 # Position in `Rope`
696 var pos: Int is noinit
697 # Maximum position in `Rope` (i.e. length - 1)
698 var max: Int is noinit
699
700 # Current leaf
701 var str: String is noinit
702
703 init(root: RopeString) is old_style_init do
704 var r = new RopeIterPiece(root, true, false, null)
705 pos = 0
706 max = root.length - 1
707 var rnod: String = root
708 loop
709 if rnod isa Concat then
710 rnod = rnod.left
711 r = new RopeIterPiece(rnod, true, false, r)
712 else
713 str = rnod
714 r.rdone = true
715 iter = r
716 break
717 end
718 end
719 end
720
721 init from(root: RopeString, pos: Int) do
722 var r = new RopeIterPiece(root, true, false, null)
723 max = root.length - 1
724 var rnod: String = root
725 var off = pos
726 loop
727 if rnod isa Concat then
728 if off >= rnod.left.length then
729 r.rdone = true
730 off -= rnod.left.length
731 rnod = rnod.right
732 r = new RopeIterPiece(rnod, true, false, r)
733 else
734 rnod = rnod.left
735 r = new RopeIterPiece(rnod, true, false, r)
736 end
737 else
738 str = rnod
739 r.rdone = true
740 iter = r
741 self.pos = pos - off
742 break
743 end
744 end
745 end
746
747 redef fun item do return str
748
749 redef fun is_ok do return pos <= max
750
751 redef fun index do return pos
752
753 redef fun next do
754 pos += str.length
755 if pos > max then return
756 var it = iter.prev
757 var rnod = it.node
758 loop
759 if not rnod isa Concat then
760 it.ldone = true
761 it.rdone = true
762 str = rnod
763 iter = it.as(not null)
764 break
765 end
766 if not it.ldone then
767 rnod = rnod.left
768 it.ldone = true
769 it = new RopeIterPiece(rnod, false, false, it)
770 else if not it.rdone then
771 it.rdone = true
772 rnod = rnod.right
773 it = new RopeIterPiece(rnod, false, false, it)
774 else
775 it = it.prev
776 rnod = it.node
777 continue
778 end
779 end
780 end
781 end
782
783 # Implementation of a `StringCharView` for `RopeString` objects
784 private class RopeChars
785 super StringCharView
786
787 var tgt: RopeString
788
789 init(s: RopeString) is old_style_init do tgt = s
790
791 redef fun [](i) do
792 return tgt[i]
793 end
794
795 redef fun iterator_from(i) do return new RopeIter.from(tgt, i)
796
797 redef fun reverse_iterator_from(i) do return new RopeReviter.from(tgt, i)
798
799 end
800
801 # An Iterator over a RopeBuffer.
802 class RopeBufferIter
803 super IndexedIterator[Char]
804
805 # Subiterator.
806 var sit: IndexedIterator[Char]
807
808 # Native string iterated over.
809 var ns: NativeString
810
811 # Current position in `ns`.
812 var pns: Int
813
814 # Maximum position iterable.
815 var maxpos: Int
816
817 redef var index: Int
818
819 # Init the iterator from a RopeBuffer.
820 init(t: RopeBuffer) is old_style_init do
821 ns = t.ns
822 maxpos = t.rpos
823 sit = t.str.chars.iterator
824 pns = t.dumped
825 index = 0
826 end
827
828 # Init the iterator from a RopeBuffer starting from `pos`.
829 init from(t: RopeBuffer, pos: Int) do
830 ns = t.ns
831 maxpos = t.length
832 sit = t.str.chars.iterator_from(pos)
833 pns = pos - t.str.length
834 index = pos
835 end
836
837 redef fun is_ok do return index < maxpos
838
839 redef fun item do
840 if sit.is_ok then return sit.item
841 return ns[pns]
842 end
843
844 redef fun next do
845 index += 1
846 if sit.is_ok then
847 sit.next
848 else
849 pns += 1
850 end
851 end
852 end
853
854 # Reverse iterator over a RopeBuffer.
855 class RopeBufferReviter
856 super IndexedIterator[Char]
857
858 # Subiterator.
859 var sit: IndexedIterator[Char]
860
861 # Native string iterated over.
862 var ns: NativeString
863
864 # Current position in `ns`.
865 var pns: Int
866
867 redef var index: Int
868
869 # Init the iterator from a RopeBuffer.
870 init(tgt: RopeBuffer) is old_style_init do
871 sit = tgt.str.chars.reverse_iterator
872 pns = tgt.rpos - 1
873 index = tgt.length - 1
874 ns = tgt.ns
875 end
876
877 # Init the iterator from a RopeBuffer starting from `pos`.
878 init from(tgt: RopeBuffer, pos: Int) do
879 sit = tgt.str.chars.reverse_iterator_from(pos - tgt.rpos - tgt.dumped)
880 pns = pos - tgt.str.length
881 index = pos
882 ns = tgt.ns
883 end
884
885 redef fun is_ok do return index > 0
886
887 redef fun item do
888 if pns >= 0 then return ns[pns]
889 return sit.item
890 end
891
892 redef fun next do
893 index -= 1
894 if pns >= 0 then
895 pns -= 1
896 else
897 sit.next
898 end
899 end
900 end
901
902 # View on the chars of a `RopeBuffer`
903 class RopeBufferChars
904 super BufferCharView
905
906 redef type SELFTYPE: RopeBuffer
907
908 redef fun [](i) do
909 if i < target.str.length then
910 return target.str[i]
911 else
912 return target.ns[i - target.str.length]
913 end
914 end
915
916 redef fun []=(i,c) do
917 if i == target.length then target.add c
918 if i < target.str.length then
919 var s = target.str
920 var l = s.substring(0, i)
921 var r = s.substring_from(i + 1)
922 target.str = l + c.to_s + r
923 else
924 target.ns[i - target.str.length] = c
925 end
926 end
927
928 redef fun add(c) do target.add c
929
930 redef fun push(c) do target.add c
931
932 redef fun iterator_from(i) do return new RopeBufferIter.from(target, i)
933
934 redef fun reverse_iterator_from(i) do return new RopeBufferReviter.from(target, i)
935 end