1 # This file is part of NIT (http://www.nitlanguage.org).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Tree-based representation of a String.
17 # Ropes are a data structure introduced in a 1995 paper from
18 # Hans J. Boehm, Russ Atkinson and Michael Plass.
22 # > Ropes: an Alternative to Strings,
23 # > *Software - Practice and Experience*,
24 # > Vol. 25(12), 1315-1330 (December 1995).
26 # The implementation developed here provides an automatic change
27 # of data structure depending on the length of the leaves.
29 # The length from which a `Rope` is built from a `flat` string
30 # if defined at top-level (see `maxlen`) and can be redefined by clients
31 # depending on their needs (e.g. if you need to bench the speed of
32 # the creation of concat nodes, lower the size of maxlen).
34 # A Rope as defined in the original paper is a Tree made of concatenation
35 # nodes and containing `Flat` (that is Array-based) strings as Leaves.
44 # "My" " Name" " is" " Simon."
47 # Note that the above example is not representative of the actual implementation
48 # of `Ropes`, since short leaves are merged to keep the rope at an acceptable
49 # height (hence, this rope here might actually be a `FlatString` !).
54 # Maxlen is the maximum length of a Leaf node
56 # When concatenating two leaves, if `new_length` > `maxlen`,
57 # A `Concat` node is created instead
59 # Its purpose is to limit the depth of the `Rope` (this
60 # improves performance when accessing/iterating).
61 fun maxlen
: Int do return 64
63 # String using a tree-based representation with leaves as `FlatStrings`
64 private abstract class Rope
68 # Node that represents a concatenation between two `String`
73 redef var chars
is lazy
do return new RopeChars(self)
75 redef var length
is noinit
77 redef fun substrings
do return new RopeSubstrings(self)
79 redef fun empty
do return ""
81 redef var to_cstring
is lazy
do
83 var ns
= new NativeString(len
+ 1)
86 for i
in substrings
do
88 i
.as(FlatString).items
.copy_to
(ns
, ilen
, i
.as(FlatString).index_from
, off
)
94 # Left child of the node
96 # Right child of the node
100 length
= left
.length
+ right
.length
108 redef fun iterator
do return new RopeIter(self)
112 for j
in [1 .. i
[ do x
+= self
117 var llen
= left
.length
118 if i
>= llen
then return right
[i
- llen
]
122 redef fun substring
(from
, len
) do
123 var llen
= left
.length
125 if from
+ len
< llen
then return left
.substring
(from
,len
)
126 var lsublen
= llen
- from
127 return left
.substring_from
(from
) + right
.substring
(0, len
- lsublen
)
129 return right
.substring
(from
- llen
, len
)
133 redef fun reversed
do return new Concat(right
.reversed
, left
.reversed
)
135 redef fun insert_at
(s
, pos
) do
136 if pos
> left
.length
then
137 return left
+ right
.insert_at
(s
, pos
- left
.length
)
139 return left
.insert_at
(s
, pos
) + right
142 redef fun to_upper
do return new Concat(left
.to_upper
, right
.to_upper
)
144 redef fun to_lower
do return new Concat(left
.to_lower
, right
.to_lower
)
150 return new Concat(self, s
)
154 if rlen
+ slen
> maxlen
then return new Concat(self, s
)
155 return new Concat(left
, r
+ s
)
159 redef fun copy_to_native
(dest
, n
, src_offset
, dest_offset
) do
160 var subs
= new RopeSubstrings.from
(self, src_offset
)
161 var st
= src_offset
- subs
.pos
162 var off
= dest_offset
165 if n
> it
.length
then
166 var cplen
= it
.length
- st
167 it
.items
.copy_to
(dest
, cplen
, st
, off
)
171 it
.items
.copy_to
(dest
, n
, st
, off
)
180 # Mutable `Rope`, optimized for concatenation operations
182 # A `RopeBuffer` is an efficient way of building a `String` when
183 # concatenating small strings.
185 # It does concatenations in an optimized way by having a
186 # mutable part and an immutable part built by efficiently
187 # concatenating strings in chain.
189 # Every concatenation operation is done by copying a string to
190 # the mutable part and flushing it when full.
192 # However, when a long string is appended to the `Buffer`,
193 # the concatenation is done at it would be in a `Rope`.
198 redef var chars
: Sequence[Char] is lazy
do return new RopeBufferChars(self)
200 # The final string being built on the fly
201 private var str
: String is noinit
203 # Current concatenation buffer
204 private var ns
: NativeString is noinit
206 # Next available (e.g. unset) character in the `Buffer`
209 # Keeps track of the buffer's currently dumped part
211 # This might happen if for instance, a String was being
212 # built by concatenating small parts of string and suddenly
213 # a long string (length > maxlen) is appended.
214 private var dumped
: Int is noinit
216 # Length of the complete rope
219 # Length of the mutable part
221 # Is also used as base to compute the size of the next
222 # mutable native string (`ns`)
223 private var buf_size
: Int is noinit
225 redef fun substrings
do return new RopeBufSubstringIterator(self)
227 # Builds an empty `RopeBuffer`
230 ns
= new NativeString(maxlen
)
235 # Builds a new `RopeBuffer` with `str` in it.
236 init from
(str
: String) do
238 ns
= new NativeString(maxlen
)
244 # Resets the informations of the `Buffer`
246 # This is called when doing in-place modifications
247 # on a previously to_s'd `RopeBuffer`
249 var nns
= new NativeString(buf_size
)
250 var blen
= rpos
- dumped
251 ns
.copy_to
(nns
, blen
, dumped
, 0)
258 redef fun empty
do return new RopeBuffer
266 ns
= new NativeString(buf_size
)
271 redef fun substring
(from
, count
) do
272 var strlen
= str
.length
276 if count
< 0 then count
= 0
280 if count
> length
then count
= length
- from
282 if count
== 0 then return empty
284 if from
< strlen
then
285 var subpos
= strlen
- from
286 if count
<= subpos
then
287 return new RopeBuffer.from
(str
.substring
(from
, count
))
289 var l
= str
.substring_from
(from
)
290 var rem
= count
- subpos
291 var nns
= new NativeString(rem
)
292 ns
.copy_to
(nns
, rem
, dumped
, 0)
293 return new RopeBuffer.from
(l
+ nns
.to_s_with_length
(rem
))
296 var nns
= new NativeString(count
)
297 ns
.copy_to
(nns
, count
, dumped
, 0)
298 return new RopeBuffer.from
(nns
.to_s_with_length
(count
))
302 redef fun append
(s
) do
306 if s
isa Rope or slen
> maxlen
then
307 if rp
> 0 and dumped
!= rp
then
308 str
+= new FlatString.with_infos
(ns
, rp
- dumped
, dumped
, rp
- 1)
314 var remsp
= buf_size
- rp
315 var sits
: NativeString
317 if s
isa FlatString then
320 else if s
isa FlatBuffer then
324 if slen
<= remsp
then
331 for i
in [0..remsp
[ do
345 if slen
<= remsp
then
350 sits
.copy_to
(ns
, slen
, begin
, rp
)
354 sits
.copy_to
(ns
, remsp
, begin
, rp
)
357 var nlen
= slen
- remsp
358 sits
.copy_to
(ns
, nlen
, begin
+ remsp
, 0)
365 if rp
>= buf_size
then
375 # Converts the Buffer to a FlatString, appends it to
376 # the final String and re-allocates a new larger Buffer.
377 private fun dump_buffer
do
379 var nstr
= new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1)
383 ns
= new NativeString(bs
)
390 new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1).output
393 # Enlarge is useless here since the `Buffer`
394 # part is automatically dumped when necessary.
396 # Also, since the buffer can not be overused by a
397 # single string, there is no need for manual
400 # "You have no power here !"
401 redef fun enlarge
(i
) do end
405 var nnslen
= rpos
- dumped
406 if nnslen
== 0 then return str
407 return str
+ new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1)
411 # Flush the buffer in order to only have to reverse `str`.
412 if rpos
> 0 and dumped
!= rpos
then
413 str
+= new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1)
420 if written
then reset
423 for i
in [0 .. rpos
[ do
424 mits
[i
] = mits
[i
].to_upper
429 if written
then reset
432 for i
in [0 .. rpos
[ do
433 mits
[i
] = mits
[i
].to_lower
438 redef class FlatString
440 redef fun insert_at
(s
, pos
) do
441 var l
= substring
(0, pos
)
442 var r
= substring_from
(pos
)
450 if slen
== 0 then return self
451 if mlen
== 0 then return s
452 var nlen
= slen
+ mlen
453 if s
isa FlatString then
454 if nlen
> maxlen
then return new Concat(self, s
)
456 var sifrom
= s
.index_from
457 var mifrom
= index_from
459 var ns
= new NativeString(nlen
+ 1)
460 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
461 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
462 return ns
.to_s_with_length
(nlen
)
463 else if s
isa Concat then
465 var sllen
= sl
.length
466 if sllen
+ mlen
> maxlen
then return new Concat(self, s
)
467 return new Concat(self + sl
, s
.right
)
474 # A simple linked list for use with iterators
475 private class RopeIterPiece
476 # The encapsulated node of the `Rope`
478 # Was its left child (if any) visited ?
480 # Was its right child (if any) visited ?
482 # The previous node in the list.
483 var prev
: nullable RopeIterPiece
486 # A reverse iterator capable of working with `Rope` objects
487 private class RopeReviter
488 super IndexedIterator[Char]
490 # Current NativeString
492 # Current position in NativeString
494 # Position in the Rope (0-indexed)
496 # Iterator on the substrings, does the Postfix part of
497 # the Rope traversal.
498 var subs
: IndexedIterator[String]
500 init(root
: Concat) is old_style_init
do
501 pos
= root
.length
- 1
502 subs
= new ReverseRopeSubstrings(root
)
507 init from
(root
: Concat, pos
: Int) do
509 subs
= new ReverseRopeSubstrings.from
(root
, pos
)
511 pns
= pos
- subs
.index
514 redef fun index
do return pos
516 redef fun is_ok
do return pos
>= 0
518 redef fun item
do return ns
[pns
]
523 if pns
>= 0 then return
524 if not subs
.is_ok
then return
526 if not subs
.is_ok
then return
532 # Forward iterator on the chars of a `Rope`
533 private class RopeIter
534 super IndexedIterator[Char]
536 # Position in current `String`
538 # Current `String` being iterated on
540 # Substrings of the Rope
541 var subs
: IndexedIterator[String]
542 # Maximum position to iterate on (e.g. Rope.length)
544 # Position (char) in the Rope (0-indexed)
547 init(root
: Concat) is old_style_init
do
548 subs
= new RopeSubstrings(root
)
551 max
= root
.length
- 1
555 init from
(root
: Concat, pos
: Int) do
556 subs
= new RopeSubstrings.from
(root
, pos
)
557 pns
= pos
- subs
.index
560 max
= root
.length
- 1
563 redef fun item
do return str
[pns
]
565 redef fun is_ok
do return pos
<= max
567 redef fun index
do return pos
572 if pns
< subs
.item
.length
then return
573 if not subs
.is_ok
then return
575 if not subs
.is_ok
then return
581 # Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
582 private class ReverseRopeSubstrings
583 super IndexedIterator[String]
586 var iter
: RopeIterPiece is noinit
588 var pos
: Int is noinit
591 var str
: String is noinit
593 init(root
: Concat) is old_style_init
do
594 var r
= new RopeIterPiece(root
, false, true, null)
595 pos
= root
.length
- 1
596 var lnod
: String = root
598 if lnod
isa Concat then
600 r
= new RopeIterPiece(lnod
, false, true, r
)
609 init from
(root
: Concat, pos
: Int) do
610 var r
= new RopeIterPiece(root
, false, true, null)
611 var rnod
: String = root
614 if rnod
isa Concat then
615 if off
>= rnod
.left
.length
then
616 off
-= rnod
.left
.length
618 r
= new RopeIterPiece(rnod
, false, true, r
)
622 r
= new RopeIterPiece(rnod
, false, true, r
)
634 redef fun item
do return str
636 redef fun index
do return pos
638 redef fun is_ok
do return pos
>= 0
641 if pos
< 0 then return
643 var currit
= curr
.node
644 while curr
!= null do
646 if not currit
isa Concat then
652 if not curr
.rdone
then
654 curr
= new RopeIterPiece(currit
.right
, false, false, curr
)
657 if not curr
.ldone
then
659 curr
= new RopeIterPiece(currit
.left
, false, false, curr
)
668 private class RopeBufSubstringIterator
669 super Iterator[FlatText]
671 # Iterator on the substrings of the building string
672 var iter
: Iterator[FlatText]
673 # Makes a String out of the buffered part of the Ropebuffer
674 var nsstr
: FlatString
675 # Did we attain the buffered part ?
676 var nsstr_done
= false
678 init(str
: RopeBuffer) is old_style_init
do
679 iter
= str
.str
.substrings
680 nsstr
= new FlatString.with_infos
(str
.ns
, str
.rpos
- str
.dumped
, str
.dumped
, str
.rpos
- 1)
681 if str
.length
== 0 then nsstr_done
= true
684 redef fun is_ok
do return iter
.is_ok
or not nsstr_done
688 if iter
.is_ok
then return iter
.item
701 # Substrings of a Rope (i.e. Postfix iterator on leaves)
702 private class RopeSubstrings
703 super IndexedIterator[FlatString]
706 var iter
: RopeIterPiece is noinit
708 var pos
: Int is noinit
709 # Maximum position in `Rope` (i.e. length - 1)
710 var max
: Int is noinit
713 var str
: FlatString is noinit
715 init(root
: Concat) is old_style_init
do
716 var r
= new RopeIterPiece(root
, true, false, null)
718 max
= root
.length
- 1
719 var rnod
: String = root
721 if rnod
isa Concat then
723 r
= new RopeIterPiece(rnod
, true, false, r
)
725 str
= rnod
.as(FlatString)
733 init from
(root
: Concat, pos
: Int) do
734 var r
= new RopeIterPiece(root
, true, false, null)
735 max
= root
.length
- 1
736 var rnod
: String = root
739 if rnod
isa Concat then
740 if off
>= rnod
.left
.length
then
742 off
-= rnod
.left
.length
744 r
= new RopeIterPiece(rnod
, true, false, r
)
747 r
= new RopeIterPiece(rnod
, true, false, r
)
750 str
= rnod
.as(FlatString)
759 redef fun item
do return str
761 redef fun is_ok
do return pos
<= max
763 redef fun index
do return pos
767 if pos
> max
then return
771 if not rnod
isa Concat then
774 str
= rnod
.as(FlatString)
775 iter
= it
.as(not null)
781 it
= new RopeIterPiece(rnod
, false, false, it
)
782 else if not it
.rdone
then
785 it
= new RopeIterPiece(rnod
, false, false, it
)
795 # Implementation of a `StringCharView` for `Concat` objects
796 private class RopeChars
799 redef type SELFTYPE: Concat
805 redef fun iterator_from
(i
) do return new RopeIter.from
(target
, i
)
807 redef fun reverse_iterator_from
(i
) do return new RopeReviter.from
(target
, i
)
811 # An Iterator over a RopeBuffer.
813 super IndexedIterator[Char]
816 var sit
: IndexedIterator[Char]
818 # Native string iterated over.
821 # Current position in `ns`.
824 # Maximum position iterable.
829 # Init the iterator from a RopeBuffer.
830 init(t
: RopeBuffer) is old_style_init
do
833 sit
= t
.str
.chars
.iterator
838 # Init the iterator from a RopeBuffer starting from `pos`.
839 init from
(t
: RopeBuffer, pos
: Int) do
842 sit
= t
.str
.chars
.iterator_from
(pos
)
843 pns
= pos
- t
.str
.length
847 redef fun is_ok
do return index
< maxpos
850 if sit
.is_ok
then return sit
.item
864 # Reverse iterator over a RopeBuffer.
865 class RopeBufferReviter
866 super IndexedIterator[Char]
869 var sit
: IndexedIterator[Char]
871 # Native string iterated over.
874 # Current position in `ns`.
879 # Init the iterator from a RopeBuffer.
880 init(tgt
: RopeBuffer) is old_style_init
do
881 sit
= tgt
.str
.chars
.reverse_iterator
883 index
= tgt
.length
- 1
887 # Init the iterator from a RopeBuffer starting from `pos`.
888 init from
(tgt
: RopeBuffer, pos
: Int) do
889 sit
= tgt
.str
.chars
.reverse_iterator_from
(pos
- tgt
.rpos
- tgt
.dumped
)
890 pns
= pos
- tgt
.str
.length
895 redef fun is_ok
do return index
> 0
898 if pns
>= 0 then return ns
[pns
]
912 # View on the chars of a `RopeBuffer`
913 class RopeBufferChars
916 redef type SELFTYPE: RopeBuffer
919 if i
< target
.str
.length
then
922 return target
.ns
[i
- target
.str
.length
]
926 redef fun []=(i
,c
) do
927 if i
== target
.length
then target
.add c
928 if i
< target
.str
.length
then
930 var l
= s
.substring
(0, i
)
931 var r
= s
.substring_from
(i
+ 1)
932 target
.str
= l
+ c
.to_s
+ r
934 target
.ns
[i
- target
.str
.length
] = c
938 redef fun add
(c
) do target
.add c
940 redef fun push
(c
) do target
.add c
942 redef fun iterator_from
(i
) do return new RopeBufferIter.from
(target
, i
)
944 redef fun reverse_iterator_from
(i
) do return new RopeBufferReviter.from
(target
, i
)