ropes: Fix `RopeBuffer.reset`.
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Tree-based representation of a String.
16 #
17 # Ropes are a data structure introduced in a 1995 paper from
18 # Hans J. Boehm, Russ Atkinson and Michael Plass.
19 # See : `Ropes : an Alternative to Strings`, `Software - Practice and Experience,
20 # Vol. 25(12), 1315-1330 (December 1995)`.
21 #
22 # The implementation developed here provides an automatic change
23 # of data structure depending on the length of the leaves.
24 #
25 # The length from which a `Rope` is built from a `flat` string
26 # if defined at top-level (see `maxlen`) and can be redefined by clients
27 # depending on their needs (e.g. if you need to bench the speed of
28 # the creation of concat nodes, lower the size of maxlen).
29 #
30 # A Rope as defined in the original paper is a Tree made of concatenation
31 # nodes and containing `Flat` (that is Array-based) strings as Leaves.
32 #
33 # Example :
34 #
35 # ` Concat `
36 # ` / \ `
37 # ` Concat Concat `
38 # ` / \ / \ `
39 # `"My" " Name" " is" " Simon." `
40 #
41 # Note that the above example is not representative of the actual implementation
42 # of `Ropes`, since short leaves are merged to keep the rope at an acceptable
43 # height (hence, this rope here might actually be a `FlatString` !).
44 module ropes
45
46 intrude import string
47
48 # Maxlen is the maximum length of a Leaf node
49 #
50 # When concatenating two leaves, if `new_length` > `maxlen`,
51 # A `Concat` node is created instead
52 #
53 # Its purpose is to limit the depth of the `Rope` (this
54 # improves performance when accessing/iterating).
55 fun maxlen: Int do return 64
56
57 # String using a tree-based representation with leaves as `FlatStrings`
58 private abstract class Rope
59 super Text
60 end
61
62 private abstract class RopeString
63 super Rope
64 super String
65
66 redef fun chars is cached do return new RopeChars(self)
67 end
68
69 # Node that represents a concatenation between two `String`
70 private class Concat
71 super RopeString
72
73 redef var length: Int
74
75 redef fun substrings do return new RopeSubstrings(self)
76
77 redef fun empty do return ""
78
79 redef fun to_cstring is cached do
80 var len = length
81 var ns = new NativeString(len + 1)
82 ns[len] = '\0'
83 var off = 0
84 for i in substrings do
85 var ilen = i.length
86 i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).index_from, off)
87 off += ilen
88 end
89 return ns
90 end
91
92 # Left child of the node
93 var left: String
94 # Right child of the node
95 var right: String
96
97 init(l: String, r: String) is old_style_init do
98 left = l
99 right = r
100 length = l.length + r.length
101 end
102
103 redef fun output do
104 left.output
105 right.output
106 end
107
108 redef fun iterator do return new RopeIter(self)
109
110 redef fun *(i) do
111 var x: String = self
112 for j in [1 .. i[ do x += self
113 return x
114 end
115
116 redef fun [](i) do
117 var llen = left.length
118 if i >= llen then return right[i - llen]
119 return left[i]
120 end
121
122 redef fun substring(from, len) do
123 var llen = left.length
124 if from < llen then
125 if from + len < llen then return left.substring(from,len)
126 var lsublen = llen - from
127 return left.substring_from(from) + right.substring(0, len - lsublen)
128 else
129 return right.substring(from - llen, len)
130 end
131 end
132
133 redef fun reversed do return new Concat(right.reversed, left.reversed)
134
135 redef fun insert_at(s, pos) do
136 if pos > left.length then
137 return left + right.insert_at(s, pos - left.length)
138 end
139 return left.insert_at(s, pos) + right
140 end
141
142 redef fun to_upper do return new Concat(left.to_upper, right.to_upper)
143
144 redef fun to_lower do return new Concat(left.to_lower, right.to_lower)
145
146 redef fun +(o) do
147 var s = o.to_s
148 var slen = s.length
149 if s isa Concat then
150 return new Concat(self, s)
151 else
152 var r = right
153 var rlen = r.length
154 if rlen + slen > maxlen then return new Concat(left, new Concat(r, s))
155 return new Concat(left, r + s)
156 end
157 end
158 end
159
160 # Mutable `Rope`, optimized for concatenation operations
161 #
162 # A `RopeBuffer` is an efficient way of building a `String` when
163 # concatenating small strings.
164 #
165 # It does concatenations in an optimized way by having a
166 # mutable part and an immutable part built by efficiently
167 # concatenating strings in chain.
168 #
169 # Every concatenation operation is done by copying a string to
170 # the mutable part and flushing it when full.
171 #
172 # However, when a long string is appended to the `Buffer`,
173 # the concatenation is done at it would be in a `Rope`.
174 class RopeBuffer
175 super Rope
176 super Buffer
177
178 redef fun chars: Sequence[Char] is cached do return new RopeBufferChars(self)
179
180 # The final string being built on the fly
181 private var str: String is noinit
182
183 # Current concatenation buffer
184 private var ns: NativeString is noinit
185
186 # Next available (e.g. unset) character in the `Buffer`
187 private var rpos = 0
188
189 # Keeps track of the buffer's currently dumped part
190 #
191 # This might happen if for instance, a String was being
192 # built by concatenating small parts of string and suddenly
193 # a long string (length > maxlen) is appended.
194 private var dumped: Int is noinit
195
196 # Length of the complete rope
197 redef var length = 0
198
199 # Length of the mutable part
200 #
201 # Is also used as base to compute the size of the next
202 # mutable native string (`ns`)
203 private var buf_size: Int is noinit
204
205 redef fun substrings: Iterator[String] do return new RopeBufSubstringIterator(self)
206
207 # Builds an empty `RopeBuffer`
208 init do
209 str = ""
210 ns = new NativeString(maxlen)
211 buf_size = maxlen
212 dumped = 0
213 end
214
215 # Builds a new `RopeBuffer` with `str` in it.
216 init from(str: String) do
217 self.str = str
218 ns = new NativeString(maxlen)
219 buf_size = maxlen
220 length = str.length
221 dumped = 0
222 end
223
224 # Resets the informations of the `Buffer`
225 #
226 # This is called when doing in-place modifications
227 # on a previously to_s'd `RopeBuffer`
228 private fun reset do
229 var nns = new NativeString(buf_size)
230 var blen = rpos - dumped
231 ns.copy_to(nns, blen, dumped, 0)
232 ns = nns
233 dumped = 0
234 rpos = blen
235 written = false
236 end
237
238 redef fun empty do return new RopeBuffer
239
240 redef fun clear do
241 str = ""
242 length = 0
243 rpos = 0
244 dumped = 0
245 if written then
246 ns = new NativeString(buf_size)
247 written = false
248 end
249 end
250
251 redef fun substring(from, count) do
252 var strlen = str.length
253
254 if from < 0 then
255 count += from
256 if count < 0 then count = 0
257 from = 0
258 end
259
260 if count > length then count = length - from
261
262 if count == 0 then return empty
263
264 if from < strlen then
265 var subpos = strlen - from
266 if count <= subpos then
267 return new RopeBuffer.from(str.substring(from, count))
268 else
269 var l = str.substring_from(from)
270 var rem = count - subpos
271 var nns = new NativeString(rem)
272 ns.copy_to(nns, rem, dumped, 0)
273 return new RopeBuffer.from(l + nns.to_s_with_length(rem))
274 end
275 else
276 var nns = new NativeString(count)
277 ns.copy_to(nns, count, dumped, 0)
278 return new RopeBuffer.from(nns.to_s_with_length(count))
279 end
280 end
281
282 redef fun append(s) do
283 var slen = s.length
284 length += slen
285 var rp = rpos
286 if s isa Rope or slen > maxlen then
287 if rp > 0 and dumped != rp then
288 str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1)
289 dumped = rp
290 end
291 str = str + s
292 return
293 end
294 var remsp = buf_size - rp
295 var sits: NativeString
296 var begin: Int
297 if s isa FlatString then
298 begin = s.index_from
299 sits = s.items
300 else if s isa FlatBuffer then
301 begin = 0
302 sits = s.items
303 else
304 if slen <= remsp then
305 for i in s.chars do
306 ns[rpos] = i
307 rpos += 1
308 end
309 else
310 var spos = 0
311 for i in [0..remsp[ do
312 ns[rpos] = s[spos]
313 rpos += 1
314 spos += 1
315 end
316 dump_buffer
317 while spos < slen do
318 ns[rpos] = s[spos]
319 spos += 1
320 rpos += 1
321 end
322 end
323 return
324 end
325 if slen <= remsp then
326 if remsp <= 0 then
327 dump_buffer
328 rpos = 0
329 else
330 sits.copy_to(ns, slen, begin, rp)
331 rpos += slen
332 end
333 else
334 sits.copy_to(ns, remsp, begin, rp)
335 rpos = buf_size
336 dump_buffer
337 var nlen = slen - remsp
338 sits.copy_to(ns, nlen, begin + remsp, 0)
339 rpos = nlen
340 end
341 end
342
343 redef fun add(c) do
344 var rp = rpos
345 if rp >= buf_size then
346 dump_buffer
347 rp = 0
348 end
349 ns[rp] = c
350 rp += 1
351 length += 1
352 rpos = rp
353 end
354
355 # Converts the Buffer to a FlatString, appends it to
356 # the final String and re-allocates a new larger Buffer.
357 private fun dump_buffer do
358 written = false
359 var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
360 str += nstr
361 var bs = buf_size
362 bs = bs * 2
363 ns = new NativeString(bs)
364 buf_size = bs
365 dumped = 0
366 end
367
368 redef fun output do
369 str.output
370 new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output
371 end
372
373 # Enlarge is useless here since the `Buffer`
374 # part is automatically dumped when necessary.
375 #
376 # Also, since the buffer can not be overused by a
377 # single string, there is no need for manual
378 # resizing.
379 #
380 # "You have no power here !"
381 redef fun enlarge(i) do end
382
383 redef fun to_s do
384 written = true
385 var nnslen = rpos - dumped
386 if nnslen == 0 then return str
387 return str + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
388 end
389
390 redef fun reverse do
391 # Flush the buffer in order to only have to reverse `str`.
392 if rpos > 0 and dumped != rpos then
393 str += new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
394 dumped = rpos
395 end
396 str = str.reversed
397 end
398
399 redef fun upper do
400 if written then reset
401 str = str.to_upper
402 var mits = ns
403 for i in [0 .. rpos[ do
404 mits[i] = mits[i].to_upper
405 end
406 end
407
408 redef fun lower do
409 if written then reset
410 str = str.to_lower
411 var mits = ns
412 for i in [0 .. rpos[ do
413 mits[i] = mits[i].to_lower
414 end
415 end
416 end
417
418 redef class FlatString
419
420 redef fun insert_at(s, pos) do
421 var l = substring(0, pos)
422 var r = substring_from(pos)
423 return l + s + r
424 end
425
426 redef fun +(o) do
427 var s = o.to_s
428 var slen = s.length
429 var mlen = length
430 if slen == 0 then return self
431 if mlen == 0 then return s
432 var nlen = slen + mlen
433 if s isa FlatString then
434 if nlen > maxlen then return new Concat(self, s)
435 var mits = items
436 var sifrom = s.index_from
437 var mifrom = index_from
438 var sits = s.items
439 var ns = new NativeString(nlen + 1)
440 mits.copy_to(ns, mlen, mifrom, 0)
441 sits.copy_to(ns, slen, sifrom, mlen)
442 return ns.to_s_with_length(nlen)
443 else if s isa Concat then
444 var sl = s.left
445 var sllen = sl.length
446 if sllen + mlen > maxlen then return new Concat(self, s)
447 return new Concat(self + sl, s.right)
448 else
449 abort
450 end
451 end
452 end
453
454 # A simple linked list for use with iterators
455 private class RopeIterPiece
456 # The encapsulated node of the `Rope`
457 var node: String
458 # Was its left child (if any) visited ?
459 var ldone: Bool
460 # Was its right child (if any) visited ?
461 var rdone: Bool
462 # The previous node in the list.
463 var prev: nullable RopeIterPiece
464 end
465
466 # A reverse iterator capable of working with `Rope` objects
467 private class RopeReviter
468 super IndexedIterator[Char]
469
470 # Current NativeString
471 var ns: String
472 # Current position in NativeString
473 var pns: Int
474 # Position in the Rope (0-indexed)
475 var pos: Int
476 # Iterator on the substrings, does the Postfix part of
477 # the Rope traversal.
478 var subs: IndexedIterator[String]
479
480 init(root: RopeString) is old_style_init do
481 pos = root.length - 1
482 subs = new ReverseRopeSubstrings(root)
483 ns = subs.item
484 pns = ns.length - 1
485 end
486
487 init from(root: RopeString, pos: Int) do
488 self.pos = pos
489 subs = new ReverseRopeSubstrings.from(root, pos)
490 ns = subs.item
491 pns = pos - subs.index
492 end
493
494 redef fun index do return pos
495
496 redef fun is_ok do return pos >= 0
497
498 redef fun item do return ns[pns]
499
500 redef fun next do
501 pns -= 1
502 pos -= 1
503 if pns >= 0 then return
504 if not subs.is_ok then return
505 subs.next
506 if not subs.is_ok then return
507 ns = subs.item
508 pns = ns.length - 1
509 end
510 end
511
512 # Forward iterator on the chars of a `Rope`
513 private class RopeIter
514 super IndexedIterator[Char]
515
516 # Position in current `String`
517 var pns: Int
518 # Current `String` being iterated on
519 var str: String
520 # Substrings of the Rope
521 var subs: IndexedIterator[String]
522 # Maximum position to iterate on (e.g. Rope.length)
523 var max: Int
524 # Position (char) in the Rope (0-indexed)
525 var pos: Int
526
527 init(root: RopeString) is old_style_init do
528 subs = new RopeSubstrings(root)
529 pns = 0
530 str = subs.item
531 max = root.length - 1
532 pos = 0
533 end
534
535 init from(root: RopeString, pos: Int) do
536 subs = new RopeSubstrings.from(root, pos)
537 pns = pos - subs.index
538 self.pos = pos
539 str = subs.item
540 max = root.length - 1
541 end
542
543 redef fun item do return str[pns]
544
545 redef fun is_ok do return pos <= max
546
547 redef fun index do return pos
548
549 redef fun next do
550 pns += 1
551 pos += 1
552 if pns < subs.item.length then return
553 if not subs.is_ok then return
554 subs.next
555 if not subs.is_ok then return
556 str = subs.item
557 pns = 0
558 end
559 end
560
561 # Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
562 private class ReverseRopeSubstrings
563 super IndexedIterator[String]
564
565 # Visit Stack
566 var iter: RopeIterPiece is noinit
567 # Position in `Rope`
568 var pos: Int is noinit
569
570 # Current leaf
571 var str: String is noinit
572
573 init(root: RopeString) is old_style_init do
574 var r = new RopeIterPiece(root, false, true, null)
575 pos = root.length - 1
576 var lnod: String = root
577 loop
578 if lnod isa Concat then
579 lnod = lnod.right
580 r = new RopeIterPiece(lnod, false, true, r)
581 else
582 str = lnod
583 iter = r
584 break
585 end
586 end
587 end
588
589 init from(root: RopeString, pos: Int) do
590 var r = new RopeIterPiece(root, false, true, null)
591 var rnod: String = root
592 var off = pos
593 loop
594 if rnod isa Concat then
595 if off >= rnod.left.length then
596 off -= rnod.left.length
597 rnod = rnod.right
598 r = new RopeIterPiece(rnod, false, true, r)
599 else
600 r.ldone = true
601 rnod = rnod.left
602 r = new RopeIterPiece(rnod, false, true, r)
603 end
604 else
605 str = rnod
606 r.ldone = true
607 iter = r
608 self.pos = pos - off
609 break
610 end
611 end
612 end
613
614 redef fun item do return str
615
616 redef fun index do return pos
617
618 redef fun is_ok do return pos >= 0
619
620 redef fun next do
621 if pos < 0 then return
622 var curr = iter.prev
623 var currit = curr.node
624 while curr != null do
625 currit = curr.node
626 if not currit isa Concat then
627 str = currit
628 pos -= str.length
629 iter = curr
630 return
631 end
632 if not curr.rdone then
633 curr.rdone = true
634 curr = new RopeIterPiece(currit.right, false, false, curr)
635 continue
636 end
637 if not curr.ldone then
638 curr.ldone = true
639 curr = new RopeIterPiece(currit.left, false, false, curr)
640 continue
641 end
642 curr = curr.prev
643 end
644 pos = -1
645 end
646 end
647
648 private class RopeBufSubstringIterator
649 super Iterator[String]
650
651 # Iterator on the substrings of the building string
652 var iter: Iterator[String]
653 # Makes a String out of the buffered part of the Ropebuffer
654 var nsstr: String
655 # Did we attain the buffered part ?
656 var nsstr_done = false
657
658 init(str: RopeBuffer) is old_style_init do
659 iter = str.str.substrings
660 nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1)
661 if str.length == 0 then nsstr_done = true
662 end
663
664 redef fun is_ok do return iter.is_ok or not nsstr_done
665
666 redef fun item do
667 assert is_ok
668 if iter.is_ok then return iter.item
669 return nsstr
670 end
671
672 redef fun next do
673 if iter.is_ok then
674 iter.next
675 return
676 end
677 nsstr_done = true
678 end
679 end
680
681 # Substrings of a Rope (i.e. Postfix iterator on leaves)
682 private class RopeSubstrings
683 super IndexedIterator[String]
684
685 # Visit Stack
686 var iter: RopeIterPiece is noinit
687 # Position in `Rope`
688 var pos: Int is noinit
689 # Maximum position in `Rope` (i.e. length - 1)
690 var max: Int is noinit
691
692 # Current leaf
693 var str: String is noinit
694
695 init(root: RopeString) is old_style_init do
696 var r = new RopeIterPiece(root, true, false, null)
697 pos = 0
698 max = root.length - 1
699 var rnod: String = root
700 loop
701 if rnod isa Concat then
702 rnod = rnod.left
703 r = new RopeIterPiece(rnod, true, false, r)
704 else
705 str = rnod
706 r.rdone = true
707 iter = r
708 break
709 end
710 end
711 end
712
713 init from(root: RopeString, pos: Int) do
714 var r = new RopeIterPiece(root, true, false, null)
715 max = root.length - 1
716 var rnod: String = root
717 var off = pos
718 loop
719 if rnod isa Concat then
720 if off >= rnod.left.length then
721 r.rdone = true
722 off -= rnod.left.length
723 rnod = rnod.right
724 r = new RopeIterPiece(rnod, true, false, r)
725 else
726 rnod = rnod.left
727 r = new RopeIterPiece(rnod, true, false, r)
728 end
729 else
730 str = rnod
731 r.rdone = true
732 iter = r
733 self.pos = pos - off
734 break
735 end
736 end
737 end
738
739 redef fun item do return str
740
741 redef fun is_ok do return pos <= max
742
743 redef fun index do return pos
744
745 redef fun next do
746 pos += str.length
747 if pos > max then return
748 var it = iter.prev
749 var rnod = it.node
750 loop
751 if not rnod isa Concat then
752 it.ldone = true
753 it.rdone = true
754 str = rnod
755 iter = it.as(not null)
756 break
757 end
758 if not it.ldone then
759 rnod = rnod.left
760 it.ldone = true
761 it = new RopeIterPiece(rnod, false, false, it)
762 else if not it.rdone then
763 it.rdone = true
764 rnod = rnod.right
765 it = new RopeIterPiece(rnod, false, false, it)
766 else
767 it = it.prev
768 rnod = it.node
769 continue
770 end
771 end
772 end
773 end
774
775 # Implementation of a `StringCharView` for `RopeString` objects
776 private class RopeChars
777 super StringCharView
778
779 var tgt: RopeString
780
781 init(s: RopeString) is old_style_init do tgt = s
782
783 redef fun [](i) do
784 return tgt[i]
785 end
786
787 redef fun iterator_from(i) do return new RopeIter.from(tgt, i)
788
789 redef fun reverse_iterator_from(i) do return new RopeReviter.from(tgt, i)
790
791 end
792
793 # An Iterator over a RopeBuffer.
794 class RopeBufferIter
795 super IndexedIterator[Char]
796
797 # Subiterator.
798 var sit: IndexedIterator[Char]
799
800 # Native string iterated over.
801 var ns: NativeString
802
803 # Current position in `ns`.
804 var pns: Int
805
806 # Maximum position iterable.
807 var maxpos: Int
808
809 redef var index: Int
810
811 # Init the iterator from a RopeBuffer.
812 init(t: RopeBuffer) is old_style_init do
813 ns = t.ns
814 maxpos = t.rpos
815 sit = t.str.chars.iterator
816 pns = t.dumped
817 index = 0
818 end
819
820 # Init the iterator from a RopeBuffer starting from `pos`.
821 init from(t: RopeBuffer, pos: Int) do
822 ns = t.ns
823 maxpos = t.length
824 sit = t.str.chars.iterator_from(pos)
825 pns = pos - t.str.length
826 index = pos
827 end
828
829 redef fun is_ok do return index < maxpos
830
831 redef fun item do
832 if sit.is_ok then return sit.item
833 return ns[pns]
834 end
835
836 redef fun next do
837 index += 1
838 if sit.is_ok then
839 sit.next
840 else
841 pns += 1
842 end
843 end
844 end
845
846 # Reverse iterator over a RopeBuffer.
847 class RopeBufferReviter
848 super IndexedIterator[Char]
849
850 # Subiterator.
851 var sit: IndexedIterator[Char]
852
853 # Native string iterated over.
854 var ns: NativeString
855
856 # Current position in `ns`.
857 var pns: Int
858
859 redef var index: Int
860
861 # Init the iterator from a RopeBuffer.
862 init(tgt: RopeBuffer) is old_style_init do
863 sit = tgt.str.chars.reverse_iterator
864 pns = tgt.rpos - 1
865 index = tgt.length - 1
866 ns = tgt.ns
867 end
868
869 # Init the iterator from a RopeBuffer starting from `pos`.
870 init from(tgt: RopeBuffer, pos: Int) do
871 sit = tgt.str.chars.reverse_iterator_from(pos - tgt.rpos - tgt.dumped)
872 pns = pos - tgt.str.length
873 index = pos
874 ns = tgt.ns
875 end
876
877 redef fun is_ok do return index > 0
878
879 redef fun item do
880 if pns >= 0 then return ns[pns]
881 return sit.item
882 end
883
884 redef fun next do
885 index -= 1
886 if pns >= 0 then
887 pns -= 1
888 else
889 sit.next
890 end
891 end
892 end
893
894 # View on the chars of a `RopeBuffer`
895 class RopeBufferChars
896 super BufferCharView
897
898 redef type SELFTYPE: RopeBuffer
899
900 redef fun [](i) do
901 if i < target.str.length then
902 return target.str[i]
903 else
904 return target.ns[i - target.str.length]
905 end
906 end
907
908 redef fun []=(i,c) do
909 if i == target.length then target.add c
910 if i < target.str.length then
911 var s = target.str
912 var l = s.substring(0, i)
913 var r = s.substring_from(i + 1)
914 target.str = l + c.to_s + r
915 else
916 target.ns[i - target.str.length] = c
917 end
918 end
919
920 redef fun add(c) do target.add c
921
922 redef fun push(c) do target.add c
923
924 redef fun iterator_from(i) do return new RopeBufferIter.from(target, i)
925
926 redef fun reverse_iterator_from(i) do return new RopeBufferReviter.from(target, i)
927 end