db635d9b828ab9af923c5d67415b5165b56898b6
1 # This file is part of NIT (http://www.nitlanguage.org).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Tree-based representation of a String.
17 # Ropes are a data structure introduced in a 1995 paper from
18 # Hans J. Boehm, Russ Atkinson and Michael Plass.
19 # See : `Ropes : an Alternative to Strings`, `Software - Practice and Experience,
20 # Vol. 25(12), 1315-1330 (December 1995)`.
22 # The implementation developed here provides an automatic change
23 # of data structure depending on the length of the leaves.
25 # The length from which a `Rope` is built from a `flat` string
26 # if defined at top-level (see `maxlen`) and can be redefined by clients
27 # depending on their needs (e.g. if you need to bench the speed of
28 # the creation of concat nodes, lower the size of maxlen).
30 # A Rope as defined in the original paper is a Tree made of concatenation
31 # nodes and containing `Flat` (that is Array-based) strings as Leaves.
39 # `"My" " Name" " is" " Simon." `
41 # Note that the above example is not representative of the actual implementation
42 # of `Ropes`, since short leaves are merged to keep the rope at an acceptable
43 # height (hence, this rope here might actually be a `FlatString` !).
48 # Maxlen is the maximum length of a Leaf node
50 # When concatenating two leaves, if `new_length` > `maxlen`,
51 # A `Concat` node is created instead
53 # Its purpose is to limit the depth of the `Rope` (this
54 # improves performance when accessing/iterating).
55 fun maxlen
: Int do return 64
57 # String using a tree-based representation with leaves as `FlatStrings`
58 private abstract class Rope
62 private abstract class RopeString
66 redef fun chars
is cached
do return new RopeChars(self)
69 # Node that represents a concatenation between two `String`
75 redef fun substrings
do return new RopeSubstrings(self)
77 redef fun empty
do return ""
79 redef fun to_cstring
is cached
do
81 var ns
= new NativeString(len
+ 1)
84 for i
in substrings
do
86 i
.as(FlatString).items
.copy_to
(ns
, ilen
, i
.as(FlatString).index_from
, off
)
92 # Left child of the node
94 # Right child of the node
97 init(l
: String, r
: String) is old_style_init
do
100 length
= l
.length
+ r
.length
108 redef fun iterator
do return new RopeIter(self)
112 for j
in [1 .. i
[ do x
+= self
117 var llen
= left
.length
118 if i
>= llen
then return right
[i
- llen
]
122 redef fun substring
(from
, len
) do
123 var llen
= left
.length
125 if from
+ len
< llen
then return left
.substring
(from
,len
)
126 var lsublen
= llen
- from
127 return left
.substring_from
(from
) + right
.substring
(0, len
- lsublen
)
129 return right
.substring
(from
- llen
, len
)
133 redef fun reversed
do return new Concat(right
.reversed
, left
.reversed
)
135 redef fun insert_at
(s
, pos
) do
136 if pos
> left
.length
then
137 return left
+ right
.insert_at
(s
, pos
- left
.length
)
139 return left
.insert_at
(s
, pos
) + right
142 redef fun to_upper
do return new Concat(left
.to_upper
, right
.to_upper
)
144 redef fun to_lower
do return new Concat(left
.to_lower
, right
.to_lower
)
150 return new Concat(self, s
)
154 if rlen
+ slen
> maxlen
then return new Concat(left
, new Concat(r
, s
))
155 return new Concat(left
, r
+ s
)
160 # Mutable `Rope`, optimized for concatenation operations
162 # A `RopeBuffer` is an efficient way of building a `String` when
163 # concatenating small strings.
165 # It does concatenations in an optimized way by having a
166 # mutable part and an immutable part built by efficiently
167 # concatenating strings in chain.
169 # Every concatenation operation is done by copying a string to
170 # the mutable part and flushing it when full.
172 # However, when a long string is appended to the `Buffer`,
173 # the concatenation is done at it would be in a `Rope`.
178 redef fun chars
: Sequence[Char] is cached
do return new RopeBufferChars(self)
180 # The final string being built on the fly
181 private var str
: String is noinit
183 # Current concatenation buffer
184 private var ns
: NativeString is noinit
186 # Next available (e.g. unset) character in the `Buffer`
189 # Keeps track of the buffer's currently dumped part
191 # This might happen if for instance, a String was being
192 # built by concatenating small parts of string and suddenly
193 # a long string (length > maxlen) is appended.
194 private var dumped
: Int is noinit
196 # Length of the complete rope
199 # Length of the mutable part
201 # Is also used as base to compute the size of the next
202 # mutable native string (`ns`)
203 private var buf_size
: Int is noinit
205 redef fun substrings
: Iterator[String] do return new RopeBufSubstringIterator(self)
207 # Builds an empty `RopeBuffer`
210 ns
= new NativeString(maxlen
)
215 # Builds a new `RopeBuffer` with `str` in it.
216 init from
(str
: String) do
218 ns
= new NativeString(maxlen
)
224 # Resets the informations of the `Buffer`
226 # This is called when doing in-place modifications
227 # on a previously to_s'd `RopeBuffer`
229 var nns
= new NativeString(buf_size
)
230 var blen
= rpos
- dumped
231 ns
.copy_to
(nns
, blen
, dumped
, 0)
237 redef fun empty
do return new RopeBuffer
246 redef fun substring
(from
, count
) do
247 var strlen
= str
.length
251 if count
< 0 then count
= 0
255 if count
> length
then count
= length
- from
257 if count
== 0 then return empty
259 if from
< strlen
then
260 var subpos
= strlen
- from
261 if count
<= subpos
then
262 return new RopeBuffer.from
(str
.substring
(from
, count
))
264 var l
= str
.substring_from
(from
)
265 var rem
= count
- subpos
266 var nns
= new NativeString(rem
)
267 ns
.copy_to
(nns
, rem
, dumped
, 0)
268 return new RopeBuffer.from
(l
+ nns
.to_s_with_length
(rem
))
271 var nns
= new NativeString(count
)
272 ns
.copy_to
(nns
, count
, dumped
, 0)
273 return new RopeBuffer.from
(nns
.to_s_with_length
(count
))
277 redef fun append
(s
) do
282 if rp
> 0 and dumped
!= rp
then
283 str
+= new FlatString.with_infos
(ns
, rp
- dumped
, dumped
, rp
- 1)
289 if slen
> maxlen
then
290 if rp
> 0 and dumped
!= rp
then
291 str
+= new FlatString.with_infos
(ns
, rp
- dumped
, dumped
, rp
- 1)
297 var remsp
= buf_size
- rp
298 var sits
: NativeString
300 if s
isa FlatString then
303 else if s
isa FlatBuffer then
307 if slen
<= remsp
then
314 for i
in [0..remsp
[ do
328 if slen
<= remsp
then
329 sits
.copy_to
(ns
, slen
, begin
, rp
)
330 if rp
== buf_size
then
338 sits
.copy_to
(ns
, remsp
, begin
, rp
)
341 var nlen
= slen
- remsp
342 sits
.copy_to
(ns
, nlen
, begin
+ remsp
, 0)
352 if rp
== buf_size
then
360 # Converts the Buffer to a FlatString, appends it to
361 # the final String and re-allocates a new larger Buffer.
362 private fun dump_buffer
do
364 var nstr
= new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1)
368 ns
= new NativeString(bs
)
375 new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1).output
378 # Enlarge is useless here since the `Buffer`
379 # part is automatically dumped when necessary.
381 # Also, since the buffer can not be overused by a
382 # single string, there is no need for manual
385 # "You have no power here !"
386 redef fun enlarge
(i
) do end
390 var nnslen
= rpos
- dumped
391 if nnslen
== 0 then return str
392 return str
+ new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1)
397 var nns
= new NativeString(buf_size
)
400 for i
in [0 .. rpos
[ do
408 if written
then reset
411 for i
in [0 .. rpos
[ do
412 mits
[i
] = mits
[i
].to_upper
417 if written
then reset
420 for i
in [0 .. rpos
[ do
421 mits
[i
] = mits
[i
].to_lower
426 redef class FlatString
428 redef fun insert_at
(s
, pos
) do
429 var l
= substring
(0, pos
)
430 var r
= substring_from
(pos
)
438 if slen
== 0 then return self
439 if mlen
== 0 then return s
440 var nlen
= slen
+ mlen
441 if s
isa FlatString then
442 if nlen
> maxlen
then return new Concat(self, s
)
444 var sifrom
= s
.index_from
445 var mifrom
= index_from
447 var ns
= new NativeString(nlen
+ 1)
448 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
449 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
450 return ns
.to_s_with_length
(nlen
)
451 else if s
isa Concat then
453 var sllen
= sl
.length
454 if sllen
+ mlen
> maxlen
then return new Concat(self, s
)
455 return new Concat(self + sl
, s
.right
)
462 # A simple linked list for use with iterators
463 private class RopeIterPiece
464 # The encapsulated node of the `Rope`
466 # Was its left child (if any) visited ?
468 # Was its right child (if any) visited ?
470 # The previous node in the list.
471 var prev
: nullable RopeIterPiece
474 # A reverse iterator capable of working with `Rope` objects
475 private class RopeReviter
476 super IndexedIterator[Char]
478 # Current NativeString
480 # Current position in NativeString
482 # Position in the Rope (0-indexed)
484 # Iterator on the substrings, does the Postfix part of
485 # the Rope traversal.
486 var subs
: IndexedIterator[String]
488 init(root
: RopeString) is old_style_init
do
489 pos
= root
.length
- 1
490 subs
= new ReverseRopeSubstrings(root
)
495 init from
(root
: RopeString, pos
: Int) do
497 subs
= new ReverseRopeSubstrings.from
(root
, pos
)
499 pns
= pos
- subs
.index
502 redef fun index
do return pos
504 redef fun is_ok
do return pos
>= 0
506 redef fun item
do return ns
[pns
]
511 if pns
>= 0 then return
512 if not subs
.is_ok
then return
514 if not subs
.is_ok
then return
520 # Forward iterator on the chars of a `Rope`
521 private class RopeIter
522 super IndexedIterator[Char]
524 # Position in current `String`
526 # Current `String` being iterated on
528 # Substrings of the Rope
529 var subs
: IndexedIterator[String]
530 # Maximum position to iterate on (e.g. Rope.length)
532 # Position (char) in the Rope (0-indexed)
535 init(root
: RopeString) is old_style_init
do
536 subs
= new RopeSubstrings(root
)
539 max
= root
.length
- 1
543 init from
(root
: RopeString, pos
: Int) do
544 subs
= new RopeSubstrings.from
(root
, pos
)
545 pns
= pos
- subs
.index
548 max
= root
.length
- 1
551 redef fun item
do return str
[pns
]
553 redef fun is_ok
do return pos
<= max
555 redef fun index
do return pos
560 if pns
< subs
.item
.length
then return
561 if not subs
.is_ok
then return
563 if not subs
.is_ok
then return
569 # Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
570 private class ReverseRopeSubstrings
571 super IndexedIterator[String]
574 var iter
: RopeIterPiece is noinit
576 var pos
: Int is noinit
579 var str
: String is noinit
581 init(root
: RopeString) is old_style_init
do
582 var r
= new RopeIterPiece(root
, false, true, null)
583 pos
= root
.length
- 1
584 var lnod
: String = root
586 if lnod
isa Concat then
588 r
= new RopeIterPiece(lnod
, false, true, r
)
597 init from
(root
: RopeString, pos
: Int) do
598 var r
= new RopeIterPiece(root
, false, true, null)
599 var rnod
: String = root
602 if rnod
isa Concat then
603 if off
>= rnod
.left
.length
then
604 off
-= rnod
.left
.length
606 r
= new RopeIterPiece(rnod
, false, true, r
)
610 r
= new RopeIterPiece(rnod
, false, true, r
)
622 redef fun item
do return str
624 redef fun index
do return pos
626 redef fun is_ok
do return pos
>= 0
629 if pos
< 0 then return
631 var currit
= curr
.node
632 while curr
!= null do
634 if not currit
isa Concat then
640 if not curr
.rdone
then
642 curr
= new RopeIterPiece(currit
.right
, false, false, curr
)
645 if not curr
.ldone
then
647 curr
= new RopeIterPiece(currit
.left
, false, false, curr
)
656 private class RopeBufSubstringIterator
657 super Iterator[String]
659 # Iterator on the substrings of the building string
660 var iter
: Iterator[String]
661 # Makes a String out of the buffered part of the Ropebuffer
663 # Did we attain the buffered part ?
664 var nsstr_done
= false
666 init(str
: RopeBuffer) is old_style_init
do
667 iter
= str
.str
.substrings
668 nsstr
= new FlatString.with_infos
(str
.ns
, str
.rpos
- str
.dumped
, str
.dumped
, str
.rpos
- 1)
669 if str
.length
== 0 then nsstr_done
= true
672 redef fun is_ok
do return iter
.is_ok
or not nsstr_done
676 if iter
.is_ok
then return iter
.item
689 # Substrings of a Rope (i.e. Postfix iterator on leaves)
690 private class RopeSubstrings
691 super IndexedIterator[String]
694 var iter
: RopeIterPiece is noinit
696 var pos
: Int is noinit
697 # Maximum position in `Rope` (i.e. length - 1)
698 var max
: Int is noinit
701 var str
: String is noinit
703 init(root
: RopeString) is old_style_init
do
704 var r
= new RopeIterPiece(root
, true, false, null)
706 max
= root
.length
- 1
707 var rnod
: String = root
709 if rnod
isa Concat then
711 r
= new RopeIterPiece(rnod
, true, false, r
)
721 init from
(root
: RopeString, pos
: Int) do
722 var r
= new RopeIterPiece(root
, true, false, null)
723 max
= root
.length
- 1
724 var rnod
: String = root
727 if rnod
isa Concat then
728 if off
>= rnod
.left
.length
then
730 off
-= rnod
.left
.length
732 r
= new RopeIterPiece(rnod
, true, false, r
)
735 r
= new RopeIterPiece(rnod
, true, false, r
)
747 redef fun item
do return str
749 redef fun is_ok
do return pos
<= max
751 redef fun index
do return pos
755 if pos
> max
then return
759 if not rnod
isa Concat then
763 iter
= it
.as(not null)
769 it
= new RopeIterPiece(rnod
, false, false, it
)
770 else if not it
.rdone
then
773 it
= new RopeIterPiece(rnod
, false, false, it
)
783 # Implementation of a `StringCharView` for `RopeString` objects
784 private class RopeChars
789 init(s
: RopeString) is old_style_init
do tgt
= s
795 redef fun iterator_from
(i
) do return new RopeIter.from
(tgt
, i
)
797 redef fun reverse_iterator_from
(i
) do return new RopeReviter.from
(tgt
, i
)
801 # An Iterator over a RopeBuffer.
803 super IndexedIterator[Char]
806 var sit
: IndexedIterator[Char]
808 # Native string iterated over.
811 # Current position in `ns`.
814 # Maximum position iterable.
819 # Init the iterator from a RopeBuffer.
820 init(t
: RopeBuffer) is old_style_init
do
823 sit
= t
.str
.chars
.iterator
828 # Init the iterator from a RopeBuffer starting from `pos`.
829 init from
(t
: RopeBuffer, pos
: Int) do
832 sit
= t
.str
.chars
.iterator_from
(pos
)
833 pns
= pos
- t
.str
.length
837 redef fun is_ok
do return index
< maxpos
840 if sit
.is_ok
then return sit
.item
854 # Reverse iterator over a RopeBuffer.
855 class RopeBufferReviter
856 super IndexedIterator[Char]
859 var sit
: IndexedIterator[Char]
861 # Native string iterated over.
864 # Current position in `ns`.
869 # Init the iterator from a RopeBuffer.
870 init(tgt
: RopeBuffer) is old_style_init
do
871 sit
= tgt
.str
.chars
.reverse_iterator
873 index
= tgt
.length
- 1
877 # Init the iterator from a RopeBuffer starting from `pos`.
878 init from
(tgt
: RopeBuffer, pos
: Int) do
879 sit
= tgt
.str
.chars
.reverse_iterator_from
(pos
- tgt
.rpos
- tgt
.dumped
)
880 pns
= pos
- tgt
.str
.length
885 redef fun is_ok
do return index
> 0
888 if pns
>= 0 then return ns
[pns
]
902 # View on the chars of a `RopeBuffer`
903 class RopeBufferChars
906 redef type SELFTYPE: RopeBuffer
909 if i
< target
.str
.length
then
912 return target
.ns
[i
- target
.str
.length
]
916 redef fun []=(i
,c
) do
917 if i
== target
.length
then target
.add c
918 if i
< target
.str
.length
then
920 var l
= s
.substring
(0, i
)
921 var r
= s
.substring_from
(i
+ 1)
922 target
.str
= l
+ c
.to_s
+ r
924 target
.ns
[i
- target
.str
.length
] = c
928 redef fun add
(c
) do target
.add c
930 redef fun push
(c
) do target
.add c
932 redef fun iterator_from
(i
) do return new RopeBufferIter.from
(target
, i
)
934 redef fun reverse_iterator_from
(i
) do return new RopeBufferReviter.from
(target
, i
)