ropes: Fix the behavior of `RopeBuffer.clear` when used with `to_s`.
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Tree-based representation of a String.
16 #
17 # Ropes are a data structure introduced in a 1995 paper from
18 # Hans J. Boehm, Russ Atkinson and Michael Plass.
19 # See : `Ropes : an Alternative to Strings`, `Software - Practice and Experience,
20 # Vol. 25(12), 1315-1330 (December 1995)`.
21 #
22 # The implementation developed here provides an automatic change
23 # of data structure depending on the length of the leaves.
24 #
25 # The length from which a `Rope` is built from a `flat` string
26 # if defined at top-level (see `maxlen`) and can be redefined by clients
27 # depending on their needs (e.g. if you need to bench the speed of
28 # the creation of concat nodes, lower the size of maxlen).
29 #
30 # A Rope as defined in the original paper is a Tree made of concatenation
31 # nodes and containing `Flat` (that is Array-based) strings as Leaves.
32 #
33 # Example :
34 #
35 # ` Concat `
36 # ` / \ `
37 # ` Concat Concat `
38 # ` / \ / \ `
39 # `"My" " Name" " is" " Simon." `
40 #
41 # Note that the above example is not representative of the actual implementation
42 # of `Ropes`, since short leaves are merged to keep the rope at an acceptable
43 # height (hence, this rope here might actually be a `FlatString` !).
44 module ropes
45
46 intrude import string
47
48 # Maxlen is the maximum length of a Leaf node
49 #
50 # When concatenating two leaves, if `new_length` > `maxlen`,
51 # A `Concat` node is created instead
52 #
53 # Its purpose is to limit the depth of the `Rope` (this
54 # improves performance when accessing/iterating).
55 fun maxlen: Int do return 64
56
57 # String using a tree-based representation with leaves as `FlatStrings`
58 private abstract class Rope
59 super Text
60 end
61
62 private abstract class RopeString
63 super Rope
64 super String
65
66 redef fun chars is cached do return new RopeChars(self)
67 end
68
69 # Node that represents a concatenation between two `String`
70 private class Concat
71 super RopeString
72
73 redef var length: Int
74
75 redef fun substrings do return new RopeSubstrings(self)
76
77 redef fun empty do return ""
78
79 redef fun to_cstring is cached do
80 var len = length
81 var ns = new NativeString(len + 1)
82 ns[len] = '\0'
83 var off = 0
84 for i in substrings do
85 var ilen = i.length
86 i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).index_from, off)
87 off += ilen
88 end
89 return ns
90 end
91
92 # Left child of the node
93 var left: String
94 # Right child of the node
95 var right: String
96
97 init(l: String, r: String) is old_style_init do
98 left = l
99 right = r
100 length = l.length + r.length
101 end
102
103 redef fun output do
104 left.output
105 right.output
106 end
107
108 redef fun iterator do return new RopeIter(self)
109
110 redef fun *(i) do
111 var x: String = self
112 for j in [1 .. i[ do x += self
113 return x
114 end
115
116 redef fun [](i) do
117 var llen = left.length
118 if i >= llen then return right[i - llen]
119 return left[i]
120 end
121
122 redef fun substring(from, len) do
123 var llen = left.length
124 if from < llen then
125 if from + len < llen then return left.substring(from,len)
126 var lsublen = llen - from
127 return left.substring_from(from) + right.substring(0, len - lsublen)
128 else
129 return right.substring(from - llen, len)
130 end
131 end
132
133 redef fun reversed do return new Concat(right.reversed, left.reversed)
134
135 redef fun insert_at(s, pos) do
136 if pos > left.length then
137 return left + right.insert_at(s, pos - left.length)
138 end
139 return left.insert_at(s, pos) + right
140 end
141
142 redef fun to_upper do return new Concat(left.to_upper, right.to_upper)
143
144 redef fun to_lower do return new Concat(left.to_lower, right.to_lower)
145
146 redef fun +(o) do
147 var s = o.to_s
148 var slen = s.length
149 if s isa Concat then
150 return new Concat(self, s)
151 else
152 var r = right
153 var rlen = r.length
154 if rlen + slen > maxlen then return new Concat(left, new Concat(r, s))
155 return new Concat(left, r + s)
156 end
157 end
158 end
159
160 # Mutable `Rope`, optimized for concatenation operations
161 #
162 # A `RopeBuffer` is an efficient way of building a `String` when
163 # concatenating small strings.
164 #
165 # It does concatenations in an optimized way by having a
166 # mutable part and an immutable part built by efficiently
167 # concatenating strings in chain.
168 #
169 # Every concatenation operation is done by copying a string to
170 # the mutable part and flushing it when full.
171 #
172 # However, when a long string is appended to the `Buffer`,
173 # the concatenation is done at it would be in a `Rope`.
174 class RopeBuffer
175 super Rope
176 super Buffer
177
178 redef fun chars: Sequence[Char] is cached do return new RopeBufferChars(self)
179
180 # The final string being built on the fly
181 private var str: String is noinit
182
183 # Current concatenation buffer
184 private var ns: NativeString is noinit
185
186 # Next available (e.g. unset) character in the `Buffer`
187 private var rpos = 0
188
189 # Keeps track of the buffer's currently dumped part
190 #
191 # This might happen if for instance, a String was being
192 # built by concatenating small parts of string and suddenly
193 # a long string (length > maxlen) is appended.
194 private var dumped: Int is noinit
195
196 # Length of the complete rope
197 redef var length = 0
198
199 # Length of the mutable part
200 #
201 # Is also used as base to compute the size of the next
202 # mutable native string (`ns`)
203 private var buf_size: Int is noinit
204
205 redef fun substrings: Iterator[String] do return new RopeBufSubstringIterator(self)
206
207 # Builds an empty `RopeBuffer`
208 init do
209 str = ""
210 ns = new NativeString(maxlen)
211 buf_size = maxlen
212 dumped = 0
213 end
214
215 # Builds a new `RopeBuffer` with `str` in it.
216 init from(str: String) do
217 self.str = str
218 ns = new NativeString(maxlen)
219 buf_size = maxlen
220 length = str.length
221 dumped = 0
222 end
223
224 # Resets the informations of the `Buffer`
225 #
226 # This is called when doing in-place modifications
227 # on a previously to_s'd `RopeBuffer`
228 private fun reset do
229 var nns = new NativeString(buf_size)
230 var blen = rpos - dumped
231 ns.copy_to(nns, blen, dumped, 0)
232 dumped = 0
233 rpos = blen
234 written = false
235 end
236
237 redef fun empty do return new RopeBuffer
238
239 redef fun clear do
240 str = ""
241 length = 0
242 rpos = 0
243 dumped = 0
244 if written then
245 ns = new NativeString(buf_size)
246 written = false
247 end
248 end
249
250 redef fun substring(from, count) do
251 var strlen = str.length
252
253 if from < 0 then
254 count += from
255 if count < 0 then count = 0
256 from = 0
257 end
258
259 if count > length then count = length - from
260
261 if count == 0 then return empty
262
263 if from < strlen then
264 var subpos = strlen - from
265 if count <= subpos then
266 return new RopeBuffer.from(str.substring(from, count))
267 else
268 var l = str.substring_from(from)
269 var rem = count - subpos
270 var nns = new NativeString(rem)
271 ns.copy_to(nns, rem, dumped, 0)
272 return new RopeBuffer.from(l + nns.to_s_with_length(rem))
273 end
274 else
275 var nns = new NativeString(count)
276 ns.copy_to(nns, count, dumped, 0)
277 return new RopeBuffer.from(nns.to_s_with_length(count))
278 end
279 end
280
281 redef fun append(s) do
282 var slen = s.length
283 length += slen
284 var rp = rpos
285 if s isa Rope or slen > maxlen then
286 if rp > 0 and dumped != rp then
287 str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1)
288 dumped = rp
289 end
290 str = str + s
291 return
292 end
293 var remsp = buf_size - rp
294 var sits: NativeString
295 var begin: Int
296 if s isa FlatString then
297 begin = s.index_from
298 sits = s.items
299 else if s isa FlatBuffer then
300 begin = 0
301 sits = s.items
302 else
303 if slen <= remsp then
304 for i in s.chars do
305 ns[rpos] = i
306 rpos += 1
307 end
308 else
309 var spos = 0
310 for i in [0..remsp[ do
311 ns[rpos] = s[spos]
312 rpos += 1
313 spos += 1
314 end
315 dump_buffer
316 while spos < slen do
317 ns[rpos] = s[spos]
318 spos += 1
319 rpos += 1
320 end
321 end
322 return
323 end
324 if slen <= remsp then
325 if remsp <= 0 then
326 dump_buffer
327 rpos = 0
328 else
329 sits.copy_to(ns, slen, begin, rp)
330 rpos += slen
331 end
332 else
333 sits.copy_to(ns, remsp, begin, rp)
334 rpos = buf_size
335 dump_buffer
336 var nlen = slen - remsp
337 sits.copy_to(ns, nlen, begin + remsp, 0)
338 rpos = nlen
339 end
340 end
341
342 redef fun add(c) do
343 var rp = rpos
344 if rp >= buf_size then
345 dump_buffer
346 rp = 0
347 end
348 ns[rp] = c
349 rp += 1
350 length += 1
351 rpos = rp
352 end
353
354 # Converts the Buffer to a FlatString, appends it to
355 # the final String and re-allocates a new larger Buffer.
356 private fun dump_buffer do
357 written = false
358 var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
359 str += nstr
360 var bs = buf_size
361 bs = bs * 2
362 ns = new NativeString(bs)
363 buf_size = bs
364 dumped = 0
365 end
366
367 redef fun output do
368 str.output
369 new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output
370 end
371
372 # Enlarge is useless here since the `Buffer`
373 # part is automatically dumped when necessary.
374 #
375 # Also, since the buffer can not be overused by a
376 # single string, there is no need for manual
377 # resizing.
378 #
379 # "You have no power here !"
380 redef fun enlarge(i) do end
381
382 redef fun to_s do
383 written = true
384 var nnslen = rpos - dumped
385 if nnslen == 0 then return str
386 return str + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
387 end
388
389 redef fun reverse do
390 # Flush the buffer in order to only have to reverse `str`.
391 if rpos > 0 and dumped != rpos then
392 str += new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
393 dumped = rpos
394 end
395 str = str.reversed
396 end
397
398 redef fun upper do
399 if written then reset
400 str = str.to_upper
401 var mits = ns
402 for i in [0 .. rpos[ do
403 mits[i] = mits[i].to_upper
404 end
405 end
406
407 redef fun lower do
408 if written then reset
409 str = str.to_lower
410 var mits = ns
411 for i in [0 .. rpos[ do
412 mits[i] = mits[i].to_lower
413 end
414 end
415 end
416
417 redef class FlatString
418
419 redef fun insert_at(s, pos) do
420 var l = substring(0, pos)
421 var r = substring_from(pos)
422 return l + s + r
423 end
424
425 redef fun +(o) do
426 var s = o.to_s
427 var slen = s.length
428 var mlen = length
429 if slen == 0 then return self
430 if mlen == 0 then return s
431 var nlen = slen + mlen
432 if s isa FlatString then
433 if nlen > maxlen then return new Concat(self, s)
434 var mits = items
435 var sifrom = s.index_from
436 var mifrom = index_from
437 var sits = s.items
438 var ns = new NativeString(nlen + 1)
439 mits.copy_to(ns, mlen, mifrom, 0)
440 sits.copy_to(ns, slen, sifrom, mlen)
441 return ns.to_s_with_length(nlen)
442 else if s isa Concat then
443 var sl = s.left
444 var sllen = sl.length
445 if sllen + mlen > maxlen then return new Concat(self, s)
446 return new Concat(self + sl, s.right)
447 else
448 abort
449 end
450 end
451 end
452
453 # A simple linked list for use with iterators
454 private class RopeIterPiece
455 # The encapsulated node of the `Rope`
456 var node: String
457 # Was its left child (if any) visited ?
458 var ldone: Bool
459 # Was its right child (if any) visited ?
460 var rdone: Bool
461 # The previous node in the list.
462 var prev: nullable RopeIterPiece
463 end
464
465 # A reverse iterator capable of working with `Rope` objects
466 private class RopeReviter
467 super IndexedIterator[Char]
468
469 # Current NativeString
470 var ns: String
471 # Current position in NativeString
472 var pns: Int
473 # Position in the Rope (0-indexed)
474 var pos: Int
475 # Iterator on the substrings, does the Postfix part of
476 # the Rope traversal.
477 var subs: IndexedIterator[String]
478
479 init(root: RopeString) is old_style_init do
480 pos = root.length - 1
481 subs = new ReverseRopeSubstrings(root)
482 ns = subs.item
483 pns = ns.length - 1
484 end
485
486 init from(root: RopeString, pos: Int) do
487 self.pos = pos
488 subs = new ReverseRopeSubstrings.from(root, pos)
489 ns = subs.item
490 pns = pos - subs.index
491 end
492
493 redef fun index do return pos
494
495 redef fun is_ok do return pos >= 0
496
497 redef fun item do return ns[pns]
498
499 redef fun next do
500 pns -= 1
501 pos -= 1
502 if pns >= 0 then return
503 if not subs.is_ok then return
504 subs.next
505 if not subs.is_ok then return
506 ns = subs.item
507 pns = ns.length - 1
508 end
509 end
510
511 # Forward iterator on the chars of a `Rope`
512 private class RopeIter
513 super IndexedIterator[Char]
514
515 # Position in current `String`
516 var pns: Int
517 # Current `String` being iterated on
518 var str: String
519 # Substrings of the Rope
520 var subs: IndexedIterator[String]
521 # Maximum position to iterate on (e.g. Rope.length)
522 var max: Int
523 # Position (char) in the Rope (0-indexed)
524 var pos: Int
525
526 init(root: RopeString) is old_style_init do
527 subs = new RopeSubstrings(root)
528 pns = 0
529 str = subs.item
530 max = root.length - 1
531 pos = 0
532 end
533
534 init from(root: RopeString, pos: Int) do
535 subs = new RopeSubstrings.from(root, pos)
536 pns = pos - subs.index
537 self.pos = pos
538 str = subs.item
539 max = root.length - 1
540 end
541
542 redef fun item do return str[pns]
543
544 redef fun is_ok do return pos <= max
545
546 redef fun index do return pos
547
548 redef fun next do
549 pns += 1
550 pos += 1
551 if pns < subs.item.length then return
552 if not subs.is_ok then return
553 subs.next
554 if not subs.is_ok then return
555 str = subs.item
556 pns = 0
557 end
558 end
559
560 # Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
561 private class ReverseRopeSubstrings
562 super IndexedIterator[String]
563
564 # Visit Stack
565 var iter: RopeIterPiece is noinit
566 # Position in `Rope`
567 var pos: Int is noinit
568
569 # Current leaf
570 var str: String is noinit
571
572 init(root: RopeString) is old_style_init do
573 var r = new RopeIterPiece(root, false, true, null)
574 pos = root.length - 1
575 var lnod: String = root
576 loop
577 if lnod isa Concat then
578 lnod = lnod.right
579 r = new RopeIterPiece(lnod, false, true, r)
580 else
581 str = lnod
582 iter = r
583 break
584 end
585 end
586 end
587
588 init from(root: RopeString, pos: Int) do
589 var r = new RopeIterPiece(root, false, true, null)
590 var rnod: String = root
591 var off = pos
592 loop
593 if rnod isa Concat then
594 if off >= rnod.left.length then
595 off -= rnod.left.length
596 rnod = rnod.right
597 r = new RopeIterPiece(rnod, false, true, r)
598 else
599 r.ldone = true
600 rnod = rnod.left
601 r = new RopeIterPiece(rnod, false, true, r)
602 end
603 else
604 str = rnod
605 r.ldone = true
606 iter = r
607 self.pos = pos - off
608 break
609 end
610 end
611 end
612
613 redef fun item do return str
614
615 redef fun index do return pos
616
617 redef fun is_ok do return pos >= 0
618
619 redef fun next do
620 if pos < 0 then return
621 var curr = iter.prev
622 var currit = curr.node
623 while curr != null do
624 currit = curr.node
625 if not currit isa Concat then
626 str = currit
627 pos -= str.length
628 iter = curr
629 return
630 end
631 if not curr.rdone then
632 curr.rdone = true
633 curr = new RopeIterPiece(currit.right, false, false, curr)
634 continue
635 end
636 if not curr.ldone then
637 curr.ldone = true
638 curr = new RopeIterPiece(currit.left, false, false, curr)
639 continue
640 end
641 curr = curr.prev
642 end
643 pos = -1
644 end
645 end
646
647 private class RopeBufSubstringIterator
648 super Iterator[String]
649
650 # Iterator on the substrings of the building string
651 var iter: Iterator[String]
652 # Makes a String out of the buffered part of the Ropebuffer
653 var nsstr: String
654 # Did we attain the buffered part ?
655 var nsstr_done = false
656
657 init(str: RopeBuffer) is old_style_init do
658 iter = str.str.substrings
659 nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1)
660 if str.length == 0 then nsstr_done = true
661 end
662
663 redef fun is_ok do return iter.is_ok or not nsstr_done
664
665 redef fun item do
666 assert is_ok
667 if iter.is_ok then return iter.item
668 return nsstr
669 end
670
671 redef fun next do
672 if iter.is_ok then
673 iter.next
674 return
675 end
676 nsstr_done = true
677 end
678 end
679
680 # Substrings of a Rope (i.e. Postfix iterator on leaves)
681 private class RopeSubstrings
682 super IndexedIterator[String]
683
684 # Visit Stack
685 var iter: RopeIterPiece is noinit
686 # Position in `Rope`
687 var pos: Int is noinit
688 # Maximum position in `Rope` (i.e. length - 1)
689 var max: Int is noinit
690
691 # Current leaf
692 var str: String is noinit
693
694 init(root: RopeString) is old_style_init do
695 var r = new RopeIterPiece(root, true, false, null)
696 pos = 0
697 max = root.length - 1
698 var rnod: String = root
699 loop
700 if rnod isa Concat then
701 rnod = rnod.left
702 r = new RopeIterPiece(rnod, true, false, r)
703 else
704 str = rnod
705 r.rdone = true
706 iter = r
707 break
708 end
709 end
710 end
711
712 init from(root: RopeString, pos: Int) do
713 var r = new RopeIterPiece(root, true, false, null)
714 max = root.length - 1
715 var rnod: String = root
716 var off = pos
717 loop
718 if rnod isa Concat then
719 if off >= rnod.left.length then
720 r.rdone = true
721 off -= rnod.left.length
722 rnod = rnod.right
723 r = new RopeIterPiece(rnod, true, false, r)
724 else
725 rnod = rnod.left
726 r = new RopeIterPiece(rnod, true, false, r)
727 end
728 else
729 str = rnod
730 r.rdone = true
731 iter = r
732 self.pos = pos - off
733 break
734 end
735 end
736 end
737
738 redef fun item do return str
739
740 redef fun is_ok do return pos <= max
741
742 redef fun index do return pos
743
744 redef fun next do
745 pos += str.length
746 if pos > max then return
747 var it = iter.prev
748 var rnod = it.node
749 loop
750 if not rnod isa Concat then
751 it.ldone = true
752 it.rdone = true
753 str = rnod
754 iter = it.as(not null)
755 break
756 end
757 if not it.ldone then
758 rnod = rnod.left
759 it.ldone = true
760 it = new RopeIterPiece(rnod, false, false, it)
761 else if not it.rdone then
762 it.rdone = true
763 rnod = rnod.right
764 it = new RopeIterPiece(rnod, false, false, it)
765 else
766 it = it.prev
767 rnod = it.node
768 continue
769 end
770 end
771 end
772 end
773
774 # Implementation of a `StringCharView` for `RopeString` objects
775 private class RopeChars
776 super StringCharView
777
778 var tgt: RopeString
779
780 init(s: RopeString) is old_style_init do tgt = s
781
782 redef fun [](i) do
783 return tgt[i]
784 end
785
786 redef fun iterator_from(i) do return new RopeIter.from(tgt, i)
787
788 redef fun reverse_iterator_from(i) do return new RopeReviter.from(tgt, i)
789
790 end
791
792 # An Iterator over a RopeBuffer.
793 class RopeBufferIter
794 super IndexedIterator[Char]
795
796 # Subiterator.
797 var sit: IndexedIterator[Char]
798
799 # Native string iterated over.
800 var ns: NativeString
801
802 # Current position in `ns`.
803 var pns: Int
804
805 # Maximum position iterable.
806 var maxpos: Int
807
808 redef var index: Int
809
810 # Init the iterator from a RopeBuffer.
811 init(t: RopeBuffer) is old_style_init do
812 ns = t.ns
813 maxpos = t.rpos
814 sit = t.str.chars.iterator
815 pns = t.dumped
816 index = 0
817 end
818
819 # Init the iterator from a RopeBuffer starting from `pos`.
820 init from(t: RopeBuffer, pos: Int) do
821 ns = t.ns
822 maxpos = t.length
823 sit = t.str.chars.iterator_from(pos)
824 pns = pos - t.str.length
825 index = pos
826 end
827
828 redef fun is_ok do return index < maxpos
829
830 redef fun item do
831 if sit.is_ok then return sit.item
832 return ns[pns]
833 end
834
835 redef fun next do
836 index += 1
837 if sit.is_ok then
838 sit.next
839 else
840 pns += 1
841 end
842 end
843 end
844
845 # Reverse iterator over a RopeBuffer.
846 class RopeBufferReviter
847 super IndexedIterator[Char]
848
849 # Subiterator.
850 var sit: IndexedIterator[Char]
851
852 # Native string iterated over.
853 var ns: NativeString
854
855 # Current position in `ns`.
856 var pns: Int
857
858 redef var index: Int
859
860 # Init the iterator from a RopeBuffer.
861 init(tgt: RopeBuffer) is old_style_init do
862 sit = tgt.str.chars.reverse_iterator
863 pns = tgt.rpos - 1
864 index = tgt.length - 1
865 ns = tgt.ns
866 end
867
868 # Init the iterator from a RopeBuffer starting from `pos`.
869 init from(tgt: RopeBuffer, pos: Int) do
870 sit = tgt.str.chars.reverse_iterator_from(pos - tgt.rpos - tgt.dumped)
871 pns = pos - tgt.str.length
872 index = pos
873 ns = tgt.ns
874 end
875
876 redef fun is_ok do return index > 0
877
878 redef fun item do
879 if pns >= 0 then return ns[pns]
880 return sit.item
881 end
882
883 redef fun next do
884 index -= 1
885 if pns >= 0 then
886 pns -= 1
887 else
888 sit.next
889 end
890 end
891 end
892
893 # View on the chars of a `RopeBuffer`
894 class RopeBufferChars
895 super BufferCharView
896
897 redef type SELFTYPE: RopeBuffer
898
899 redef fun [](i) do
900 if i < target.str.length then
901 return target.str[i]
902 else
903 return target.ns[i - target.str.length]
904 end
905 end
906
907 redef fun []=(i,c) do
908 if i == target.length then target.add c
909 if i < target.str.length then
910 var s = target.str
911 var l = s.substring(0, i)
912 var r = s.substring_from(i + 1)
913 target.str = l + c.to_s + r
914 else
915 target.ns[i - target.str.length] = c
916 end
917 end
918
919 redef fun add(c) do target.add c
920
921 redef fun push(c) do target.add c
922
923 redef fun iterator_from(i) do return new RopeBufferIter.from(target, i)
924
925 redef fun reverse_iterator_from(i) do return new RopeBufferReviter.from(target, i)
926 end