1 # This file is part of NIT (http://www.nitlanguage.org).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Tree-based representation of a String.
17 # Ropes are a data structure introduced in a 1995 paper from
18 # Hans J. Boehm, Russ Atkinson and Michael Plass.
22 # > Ropes: an Alternative to Strings,
23 # > *Software - Practice and Experience*,
24 # > Vol. 25(12), 1315-1330 (December 1995).
26 # The implementation developed here provides an automatic change
27 # of data structure depending on the length of the leaves.
29 # The length from which a `Rope` is built from a `flat` string
30 # if defined at top-level (see `maxlen`) and can be redefined by clients
31 # depending on their needs (e.g. if you need to bench the speed of
32 # the creation of concat nodes, lower the size of maxlen).
34 # A Rope as defined in the original paper is a Tree made of concatenation
35 # nodes and containing `Flat` (that is Array-based) strings as Leaves.
43 # "My" " Name" " is" " Simon."
45 # Note that the above example is not representative of the actual implementation
46 # of `Ropes`, since short leaves are merged to keep the rope at an acceptable
47 # height (hence, this rope here might actually be a `FlatString` !).
52 # Maxlen is the maximum length of a Leaf node
54 # When concatenating two leaves, if `new_length` > `maxlen`,
55 # A `Concat` node is created instead
57 # Its purpose is to limit the depth of the `Rope` (this
58 # improves performance when accessing/iterating).
59 fun maxlen
: Int do return 64
61 # String using a tree-based representation with leaves as `FlatStrings`
62 private abstract class Rope
66 private abstract class RopeString
70 redef var chars
is lazy
do return new RopeChars(self)
73 # Node that represents a concatenation between two `String`
77 redef var length
: Int is noinit
79 redef fun substrings
do return new RopeSubstrings(self)
81 redef fun empty
do return ""
83 redef var to_cstring
is lazy
do
85 var ns
= new NativeString(len
+ 1)
88 for i
in substrings
do
90 i
.as(FlatString).items
.copy_to
(ns
, ilen
, i
.as(FlatString).index_from
, off
)
96 # Left child of the node
98 # Right child of the node
102 length
= left
.length
+ right
.length
110 redef fun iterator
do return new RopeIter(self)
114 for j
in [1 .. i
[ do x
+= self
119 var llen
= left
.length
120 if i
>= llen
then return right
[i
- llen
]
124 redef fun substring
(from
, len
) do
125 var llen
= left
.length
127 if from
+ len
< llen
then return left
.substring
(from
,len
)
128 var lsublen
= llen
- from
129 return left
.substring_from
(from
) + right
.substring
(0, len
- lsublen
)
131 return right
.substring
(from
- llen
, len
)
135 redef fun reversed
do return new Concat(right
.reversed
, left
.reversed
)
137 redef fun insert_at
(s
, pos
) do
138 if pos
> left
.length
then
139 return left
+ right
.insert_at
(s
, pos
- left
.length
)
141 return left
.insert_at
(s
, pos
) + right
144 redef fun to_upper
do return new Concat(left
.to_upper
, right
.to_upper
)
146 redef fun to_lower
do return new Concat(left
.to_lower
, right
.to_lower
)
152 return new Concat(self, s
)
156 if rlen
+ slen
> maxlen
then return new Concat(self, s
)
157 return new Concat(left
, r
+ s
)
161 redef fun copy_to_native
(dest
, n
, src_offset
, dest_offset
) do
163 var subs
= new RopeSubstrings.from
(self, src_offset
)
164 var st
= src_offset
- subs
.pos
165 var off
= dest_offset
168 if n
> it
.length
then
169 var cplen
= it
.length
- st
170 it
.items
.copy_to
(dest
, cplen
, st
, off
)
174 it
.items
.copy_to
(dest
, n
, st
, off
)
183 # Mutable `Rope`, optimized for concatenation operations
185 # A `RopeBuffer` is an efficient way of building a `String` when
186 # concatenating small strings.
188 # It does concatenations in an optimized way by having a
189 # mutable part and an immutable part built by efficiently
190 # concatenating strings in chain.
192 # Every concatenation operation is done by copying a string to
193 # the mutable part and flushing it when full.
195 # However, when a long string is appended to the `Buffer`,
196 # the concatenation is done at it would be in a `Rope`.
201 redef var chars
: Sequence[Char] is lazy
do return new RopeBufferChars(self)
203 # The final string being built on the fly
204 private var str
: String is noinit
206 # Current concatenation buffer
207 private var ns
: NativeString is noinit
209 # Next available (e.g. unset) character in the `Buffer`
212 # Keeps track of the buffer's currently dumped part
214 # This might happen if for instance, a String was being
215 # built by concatenating small parts of string and suddenly
216 # a long string (length > maxlen) is appended.
217 private var dumped
: Int is noinit
219 # Length of the complete rope
222 # Length of the mutable part
224 # Is also used as base to compute the size of the next
225 # mutable native string (`ns`)
226 private var buf_size
: Int is noinit
228 redef fun substrings
do return new RopeBufSubstringIterator(self)
230 # Builds an empty `RopeBuffer`
233 ns
= new NativeString(maxlen
)
238 # Builds a new `RopeBuffer` with `str` in it.
239 init from
(str
: String) do
241 ns
= new NativeString(maxlen
)
247 # Resets the informations of the `Buffer`
249 # This is called when doing in-place modifications
250 # on a previously to_s'd `RopeBuffer`
252 var nns
= new NativeString(buf_size
)
253 var blen
= rpos
- dumped
254 ns
.copy_to
(nns
, blen
, dumped
, 0)
261 redef fun empty
do return new RopeBuffer
269 ns
= new NativeString(buf_size
)
274 redef fun substring
(from
, count
) do
275 var strlen
= str
.length
279 if count
< 0 then count
= 0
283 if count
> length
then count
= length
- from
285 if count
== 0 then return empty
287 if from
< strlen
then
288 var subpos
= strlen
- from
289 if count
<= subpos
then
290 return new RopeBuffer.from
(str
.substring
(from
, count
))
292 var l
= str
.substring_from
(from
)
293 var rem
= count
- subpos
294 var nns
= new NativeString(rem
)
295 ns
.copy_to
(nns
, rem
, dumped
, 0)
296 return new RopeBuffer.from
(l
+ nns
.to_s_with_length
(rem
))
299 var nns
= new NativeString(count
)
300 ns
.copy_to
(nns
, count
, dumped
, 0)
301 return new RopeBuffer.from
(nns
.to_s_with_length
(count
))
305 redef fun append
(s
) do
309 if s
isa Rope or slen
> maxlen
then
310 if rp
> 0 and dumped
!= rp
then
311 str
+= new FlatString.with_infos
(ns
, rp
- dumped
, dumped
, rp
- 1)
317 var remsp
= buf_size
- rp
318 var sits
: NativeString
320 if s
isa FlatString then
323 else if s
isa FlatBuffer then
327 if slen
<= remsp
then
334 for i
in [0..remsp
[ do
348 if slen
<= remsp
then
353 sits
.copy_to
(ns
, slen
, begin
, rp
)
357 sits
.copy_to
(ns
, remsp
, begin
, rp
)
360 var nlen
= slen
- remsp
361 sits
.copy_to
(ns
, nlen
, begin
+ remsp
, 0)
368 if rp
>= buf_size
then
378 # Converts the Buffer to a FlatString, appends it to
379 # the final String and re-allocates a new larger Buffer.
380 private fun dump_buffer
do
382 var nstr
= new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1)
386 ns
= new NativeString(bs
)
393 new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1).output
396 # Enlarge is useless here since the `Buffer`
397 # part is automatically dumped when necessary.
399 # Also, since the buffer can not be overused by a
400 # single string, there is no need for manual
403 # "You have no power here !"
404 redef fun enlarge
(i
) do end
408 var nnslen
= rpos
- dumped
409 if nnslen
== 0 then return str
410 return str
+ new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1)
414 # Flush the buffer in order to only have to reverse `str`.
415 if rpos
> 0 and dumped
!= rpos
then
416 str
+= new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1)
423 if written
then reset
426 for i
in [0 .. rpos
[ do
427 mits
[i
] = mits
[i
].to_upper
432 if written
then reset
435 for i
in [0 .. rpos
[ do
436 mits
[i
] = mits
[i
].to_lower
441 redef class FlatString
443 redef fun insert_at
(s
, pos
) do
444 var l
= substring
(0, pos
)
445 var r
= substring_from
(pos
)
453 if slen
== 0 then return self
454 if mlen
== 0 then return s
455 var nlen
= slen
+ mlen
456 if s
isa FlatString then
457 if nlen
> maxlen
then return new Concat(self, s
)
459 var sifrom
= s
.index_from
460 var mifrom
= index_from
462 var ns
= new NativeString(nlen
+ 1)
463 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
464 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
465 return ns
.to_s_with_length
(nlen
)
466 else if s
isa Concat then
468 var sllen
= sl
.length
469 if sllen
+ mlen
> maxlen
then return new Concat(self, s
)
470 return new Concat(self + sl
, s
.right
)
477 # A simple linked list for use with iterators
478 private class RopeIterPiece
479 # The encapsulated node of the `Rope`
481 # Was its left child (if any) visited ?
483 # Was its right child (if any) visited ?
485 # The previous node in the list.
486 var prev
: nullable RopeIterPiece
489 # A reverse iterator capable of working with `Rope` objects
490 private class RopeReviter
491 super IndexedIterator[Char]
493 # Current NativeString
495 # Current position in NativeString
497 # Position in the Rope (0-indexed)
499 # Iterator on the substrings, does the Postfix part of
500 # the Rope traversal.
501 var subs
: IndexedIterator[String]
503 init(root
: RopeString) is old_style_init
do
504 pos
= root
.length
- 1
505 subs
= new ReverseRopeSubstrings(root
)
510 init from
(root
: RopeString, pos
: Int) do
512 subs
= new ReverseRopeSubstrings.from
(root
, pos
)
514 pns
= pos
- subs
.index
517 redef fun index
do return pos
519 redef fun is_ok
do return pos
>= 0
521 redef fun item
do return ns
[pns
]
526 if pns
>= 0 then return
527 if not subs
.is_ok
then return
529 if not subs
.is_ok
then return
535 # Forward iterator on the chars of a `Rope`
536 private class RopeIter
537 super IndexedIterator[Char]
539 # Position in current `String`
541 # Current `String` being iterated on
543 # Substrings of the Rope
544 var subs
: IndexedIterator[String]
545 # Maximum position to iterate on (e.g. Rope.length)
547 # Position (char) in the Rope (0-indexed)
550 init(root
: RopeString) is old_style_init
do
551 subs
= new RopeSubstrings(root
)
554 max
= root
.length
- 1
558 init from
(root
: RopeString, pos
: Int) do
559 subs
= new RopeSubstrings.from
(root
, pos
)
560 pns
= pos
- subs
.index
563 max
= root
.length
- 1
566 redef fun item
do return str
[pns
]
568 redef fun is_ok
do return pos
<= max
570 redef fun index
do return pos
575 if pns
< subs
.item
.length
then return
576 if not subs
.is_ok
then return
578 if not subs
.is_ok
then return
584 # Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
585 private class ReverseRopeSubstrings
586 super IndexedIterator[String]
589 var iter
: RopeIterPiece is noinit
591 var pos
: Int is noinit
594 var str
: String is noinit
596 init(root
: RopeString) is old_style_init
do
597 var r
= new RopeIterPiece(root
, false, true, null)
598 pos
= root
.length
- 1
599 var lnod
: String = root
601 if lnod
isa Concat then
603 r
= new RopeIterPiece(lnod
, false, true, r
)
612 init from
(root
: RopeString, pos
: Int) do
613 var r
= new RopeIterPiece(root
, false, true, null)
614 var rnod
: String = root
617 if rnod
isa Concat then
618 if off
>= rnod
.left
.length
then
619 off
-= rnod
.left
.length
621 r
= new RopeIterPiece(rnod
, false, true, r
)
625 r
= new RopeIterPiece(rnod
, false, true, r
)
637 redef fun item
do return str
639 redef fun index
do return pos
641 redef fun is_ok
do return pos
>= 0
644 if pos
< 0 then return
646 var currit
= curr
.node
647 while curr
!= null do
649 if not currit
isa Concat then
655 if not curr
.rdone
then
657 curr
= new RopeIterPiece(currit
.right
, false, false, curr
)
660 if not curr
.ldone
then
662 curr
= new RopeIterPiece(currit
.left
, false, false, curr
)
671 private class RopeBufSubstringIterator
672 super Iterator[FlatText]
674 # Iterator on the substrings of the building string
675 var iter
: Iterator[FlatText]
676 # Makes a String out of the buffered part of the Ropebuffer
677 var nsstr
: FlatString
678 # Did we attain the buffered part ?
679 var nsstr_done
= false
681 init(str
: RopeBuffer) is old_style_init
do
682 iter
= str
.str
.substrings
683 nsstr
= new FlatString.with_infos
(str
.ns
, str
.rpos
- str
.dumped
, str
.dumped
, str
.rpos
- 1)
684 if str
.length
== 0 then nsstr_done
= true
687 redef fun is_ok
do return iter
.is_ok
or not nsstr_done
691 if iter
.is_ok
then return iter
.item
704 # Substrings of a Rope (i.e. Postfix iterator on leaves)
705 private class RopeSubstrings
706 super IndexedIterator[FlatString]
709 var iter
: RopeIterPiece is noinit
711 var pos
: Int is noinit
712 # Maximum position in `Rope` (i.e. length - 1)
713 var max
: Int is noinit
716 var str
: FlatString is noinit
718 init(root
: RopeString) is old_style_init
do
719 var r
= new RopeIterPiece(root
, true, false, null)
721 max
= root
.length
- 1
722 var rnod
: String = root
724 if rnod
isa Concat then
726 r
= new RopeIterPiece(rnod
, true, false, r
)
728 str
= rnod
.as(FlatString)
736 init from
(root
: RopeString, pos
: Int) do
737 var r
= new RopeIterPiece(root
, true, false, null)
738 max
= root
.length
- 1
739 var rnod
: String = root
742 if rnod
isa Concat then
743 if off
>= rnod
.left
.length
then
745 off
-= rnod
.left
.length
747 r
= new RopeIterPiece(rnod
, true, false, r
)
750 r
= new RopeIterPiece(rnod
, true, false, r
)
753 str
= rnod
.as(FlatString)
762 redef fun item
do return str
764 redef fun is_ok
do return pos
<= max
766 redef fun index
do return pos
770 if pos
> max
then return
774 if not rnod
isa Concat then
777 str
= rnod
.as(FlatString)
778 iter
= it
.as(not null)
784 it
= new RopeIterPiece(rnod
, false, false, it
)
785 else if not it
.rdone
then
788 it
= new RopeIterPiece(rnod
, false, false, it
)
798 # Implementation of a `StringCharView` for `RopeString` objects
799 private class RopeChars
802 redef type SELFTYPE: RopeString
808 redef fun iterator_from
(i
) do return new RopeIter.from
(target
, i
)
810 redef fun reverse_iterator_from
(i
) do return new RopeReviter.from
(target
, i
)
814 # An Iterator over a RopeBuffer.
816 super IndexedIterator[Char]
819 var sit
: IndexedIterator[Char]
821 # Native string iterated over.
824 # Current position in `ns`.
827 # Maximum position iterable.
832 # Init the iterator from a RopeBuffer.
833 init(t
: RopeBuffer) is old_style_init
do
836 sit
= t
.str
.chars
.iterator
841 # Init the iterator from a RopeBuffer starting from `pos`.
842 init from
(t
: RopeBuffer, pos
: Int) do
845 sit
= t
.str
.chars
.iterator_from
(pos
)
846 pns
= pos
- t
.str
.length
850 redef fun is_ok
do return index
< maxpos
853 if sit
.is_ok
then return sit
.item
867 # Reverse iterator over a RopeBuffer.
868 class RopeBufferReviter
869 super IndexedIterator[Char]
872 var sit
: IndexedIterator[Char]
874 # Native string iterated over.
877 # Current position in `ns`.
882 # Init the iterator from a RopeBuffer.
883 init(tgt
: RopeBuffer) is old_style_init
do
884 sit
= tgt
.str
.chars
.reverse_iterator
886 index
= tgt
.length
- 1
890 # Init the iterator from a RopeBuffer starting from `pos`.
891 init from
(tgt
: RopeBuffer, pos
: Int) do
892 sit
= tgt
.str
.chars
.reverse_iterator_from
(pos
- tgt
.rpos
- tgt
.dumped
)
893 pns
= pos
- tgt
.str
.length
898 redef fun is_ok
do return index
> 0
901 if pns
>= 0 then return ns
[pns
]
915 # View on the chars of a `RopeBuffer`
916 class RopeBufferChars
919 redef type SELFTYPE: RopeBuffer
922 if i
< target
.str
.length
then
925 return target
.ns
[i
- target
.str
.length
]
929 redef fun []=(i
,c
) do
930 if i
== target
.length
then target
.add c
931 if i
< target
.str
.length
then
933 var l
= s
.substring
(0, i
)
934 var r
= s
.substring_from
(i
+ 1)
935 target
.str
= l
+ c
.to_s
+ r
937 target
.ns
[i
- target
.str
.length
] = c
941 redef fun add
(c
) do target
.add c
943 redef fun push
(c
) do target
.add c
945 redef fun iterator_from
(i
) do return new RopeBufferIter.from
(target
, i
)
947 redef fun reverse_iterator_from
(i
) do return new RopeBufferReviter.from
(target
, i
)