c0a2421effc141a1d6c1486f753412cba81418ff
1 # This file is part of NIT (http://www.nitlanguage.org).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Tree-based representation of a String.
17 # Ropes are a data structure introduced in a 1995 paper from
18 # Hans J. Boehm, Russ Atkinson and Michael Plass.
22 # > Ropes: an Alternative to Strings,
23 # > *Software - Practice and Experience*,
24 # > Vol. 25(12), 1315-1330 (December 1995).
26 # The implementation developed here provides an automatic change
27 # of data structure depending on the length of the leaves.
29 # The length from which a `Rope` is built from a `flat` string
30 # if defined at top-level (see `maxlen`) and can be redefined by clients
31 # depending on their needs (e.g. if you need to bench the speed of
32 # the creation of concat nodes, lower the size of maxlen).
34 # A Rope as defined in the original paper is a Tree made of concatenation
35 # nodes and containing `Flat` (that is Array-based) strings as Leaves.
44 # "My" " Name" " is" " Simon."
47 # Note that the above example is not representative of the actual implementation
48 # of `Ropes`, since short leaves are merged to keep the rope at an acceptable
49 # height (hence, this rope here might actually be a `FlatString` !).
54 # Maxlen is the maximum length of a Leaf node
56 # When concatenating two leaves, if `new_length` > `maxlen`,
57 # A `Concat` node is created instead
59 # Its purpose is to limit the depth of the `Rope` (this
60 # improves performance when accessing/iterating).
61 fun maxlen
: Int do return 64
63 # String using a tree-based representation with leaves as `FlatStrings`
64 private abstract class Rope
68 # Node that represents a concatenation between two `String`
73 redef var chars
is lazy
do return new RopeChars(self)
75 redef var bytes
is lazy
do return new RopeBytes(self)
77 redef var length
is noinit
79 redef fun substrings
do return new RopeSubstrings(self)
81 redef fun empty
do return ""
83 redef var to_cstring
is lazy
do
85 var ns
= new NativeString(len
+ 1)
88 for i
in substrings
do
90 i
.as(FlatString).items
.copy_to
(ns
, ilen
, i
.as(FlatString).index_from
, off
)
96 # Left child of the node
98 # Right child of the node
102 length
= left
.length
+ right
.length
110 redef fun iterator
do return new RopeCharIterator(self)
114 for j
in [1 .. i
[ do x
+= self
119 var llen
= left
.length
120 if i
>= llen
then return right
[i
- llen
]
124 redef fun substring
(from
, len
) do
125 var llen
= left
.length
127 if from
+ len
< llen
then return left
.substring
(from
,len
)
128 var lsublen
= llen
- from
129 return left
.substring_from
(from
) + right
.substring
(0, len
- lsublen
)
131 return right
.substring
(from
- llen
, len
)
135 redef fun reversed
do return new Concat(right
.reversed
, left
.reversed
)
137 redef fun insert_at
(s
, pos
) do
138 if pos
> left
.length
then
139 return left
+ right
.insert_at
(s
, pos
- left
.length
)
141 return left
.insert_at
(s
, pos
) + right
144 redef fun to_upper
do return new Concat(left
.to_upper
, right
.to_upper
)
146 redef fun to_lower
do return new Concat(left
.to_lower
, right
.to_lower
)
152 return new Concat(self, s
)
156 if rlen
+ slen
> maxlen
then return new Concat(self, s
)
157 return new Concat(left
, r
+ s
)
161 redef fun copy_to_native
(dest
, n
, src_offset
, dest_offset
) do
162 var subs
= new RopeSubstrings.from
(self, src_offset
)
163 var st
= src_offset
- subs
.pos
164 var off
= dest_offset
167 if n
> it
.length
then
168 var cplen
= it
.length
- st
169 it
.items
.copy_to
(dest
, cplen
, st
, off
)
173 it
.items
.copy_to
(dest
, n
, st
, off
)
182 # Mutable `Rope`, optimized for concatenation operations
184 # A `RopeBuffer` is an efficient way of building a `String` when
185 # concatenating small strings.
187 # It does concatenations in an optimized way by having a
188 # mutable part and an immutable part built by efficiently
189 # concatenating strings in chain.
191 # Every concatenation operation is done by copying a string to
192 # the mutable part and flushing it when full.
194 # However, when a long string is appended to the `Buffer`,
195 # the concatenation is done at it would be in a `Rope`.
200 redef var chars
: Sequence[Char] is lazy
do return new RopeBufferChars(self)
202 redef var bytes
: Sequence[Byte] is lazy
do return new RopeBufferBytes(self)
204 # The final string being built on the fly
205 private var str
: String is noinit
207 # Current concatenation buffer
208 private var ns
: NativeString is noinit
210 # Next available (e.g. unset) character in the `Buffer`
213 # Keeps track of the buffer's currently dumped part
215 # This might happen if for instance, a String was being
216 # built by concatenating small parts of string and suddenly
217 # a long string (length > maxlen) is appended.
218 private var dumped
: Int is noinit
220 # Length of the complete rope
223 # Length of the mutable part
225 # Is also used as base to compute the size of the next
226 # mutable native string (`ns`)
227 private var buf_size
: Int is noinit
229 redef fun substrings
do return new RopeBufSubstringIterator(self)
231 # Builds an empty `RopeBuffer`
234 ns
= new NativeString(maxlen
)
239 # Builds a new `RopeBuffer` with `str` in it.
240 init from
(str
: String) do
242 ns
= new NativeString(maxlen
)
248 # Resets the informations of the `Buffer`
250 # This is called when doing in-place modifications
251 # on a previously to_s'd `RopeBuffer`
253 var nns
= new NativeString(buf_size
)
254 var blen
= rpos
- dumped
255 ns
.copy_to
(nns
, blen
, dumped
, 0)
262 redef fun empty
do return new RopeBuffer
270 ns
= new NativeString(buf_size
)
275 redef fun substring
(from
, count
) do
276 var strlen
= str
.length
280 if count
< 0 then count
= 0
284 if count
> length
then count
= length
- from
286 if count
== 0 then return empty
288 if from
< strlen
then
289 var subpos
= strlen
- from
290 if count
<= subpos
then
291 return new RopeBuffer.from
(str
.substring
(from
, count
))
293 var l
= str
.substring_from
(from
)
294 var rem
= count
- subpos
295 var nns
= new NativeString(rem
)
296 ns
.copy_to
(nns
, rem
, dumped
, 0)
297 return new RopeBuffer.from
(l
+ nns
.to_s_with_length
(rem
))
300 var nns
= new NativeString(count
)
301 ns
.copy_to
(nns
, count
, dumped
, 0)
302 return new RopeBuffer.from
(nns
.to_s_with_length
(count
))
306 redef fun append
(s
) do
310 if s
isa Rope or slen
> maxlen
then
311 if rp
> 0 and dumped
!= rp
then
312 str
+= new FlatString.with_infos
(ns
, rp
- dumped
, dumped
, rp
- 1)
318 var remsp
= buf_size
- rp
319 var sits
: NativeString
321 if s
isa FlatString then
324 else if s
isa FlatBuffer then
328 if slen
<= remsp
then
335 for i
in [0..remsp
[ do
349 if slen
<= remsp
then
354 sits
.copy_to
(ns
, slen
, begin
, rp
)
358 sits
.copy_to
(ns
, remsp
, begin
, rp
)
361 var nlen
= slen
- remsp
362 sits
.copy_to
(ns
, nlen
, begin
+ remsp
, 0)
369 if rp
>= buf_size
then
373 # TODO: Fix when supporting UTF-8
374 ns
[rp
] = c
.ascii
.to_b
380 private fun add_byte
(b
: Byte) do
382 if rp
>= buf_size
then
392 # Converts the Buffer to a FlatString, appends it to
393 # the final String and re-allocates a new larger Buffer.
394 private fun dump_buffer
do
396 var nstr
= new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1)
400 ns
= new NativeString(bs
)
407 new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1).output
410 # Enlarge is useless here since the `Buffer`
411 # part is automatically dumped when necessary.
413 # Also, since the buffer can not be overused by a
414 # single string, there is no need for manual
417 # "You have no power here !"
418 redef fun enlarge
(i
) do end
422 var nnslen
= rpos
- dumped
423 if nnslen
== 0 then return str
424 return str
+ new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1)
428 # Flush the buffer in order to only have to reverse `str`.
429 if rpos
> 0 and dumped
!= rpos
then
430 str
+= new FlatString.with_infos
(ns
, rpos
- dumped
, dumped
, rpos
- 1)
437 if written
then reset
440 for i
in [0 .. rpos
[ do
441 mits
[i
] = mits
[i
].to_upper
446 if written
then reset
449 for i
in [0 .. rpos
[ do
450 mits
[i
] = mits
[i
].to_lower
455 redef class FlatString
457 redef fun insert_at
(s
, pos
) do
458 var l
= substring
(0, pos
)
459 var r
= substring_from
(pos
)
467 if slen
== 0 then return self
468 if mlen
== 0 then return s
469 var nlen
= slen
+ mlen
470 if s
isa FlatString then
471 if nlen
> maxlen
then return new Concat(self, s
)
473 var sifrom
= s
.index_from
474 var mifrom
= index_from
476 var ns
= new NativeString(nlen
+ 1)
477 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
478 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
479 return ns
.to_s_with_length
(nlen
)
480 else if s
isa Concat then
482 var sllen
= sl
.length
483 if sllen
+ mlen
> maxlen
then return new Concat(self, s
)
484 return new Concat(self + sl
, s
.right
)
491 # A simple linked list for use with iterators
492 private class RopeCharIteratorPiece
493 # The encapsulated node of the `Rope`
495 # Was its left child (if any) visited ?
497 # Was its right child (if any) visited ?
499 # The previous node in the list.
500 var prev
: nullable RopeCharIteratorPiece
503 # A reverse iterator capable of working with `Rope` objects
504 private class RopeByteReverseIterator
505 super IndexedIterator[Byte]
507 # Current NativeString
509 # Current position in NativeString
511 # Position in the Rope (0-indexed)
513 # Iterator on the substrings, does the Postfix part of
514 # the Rope traversal.
515 var subs
: IndexedIterator[FlatString]
517 init(root
: Concat) is old_style_init
do
518 pos
= root
.length
- 1
519 subs
= new ReverseRopeSubstrings(root
)
525 init from
(root
: Concat, pos
: Int) do
527 subs
= new ReverseRopeSubstrings.from
(root
, pos
)
530 pns
= pos
- subs
.index
533 redef fun index
do return pos
535 redef fun is_ok
do return pos
>= 0
537 redef fun item
do return ns
[pns
]
542 if pns
>= 0 then return
543 if not subs
.is_ok
then return
545 if not subs
.is_ok
then return
552 # Forward iterator on the bytes of a `Rope`
553 private class RopeByteIterator
554 super IndexedIterator[Byte]
556 # Position in current `String`
558 # Current `String` being iterated on
560 # Substrings of the Rope
561 var subs
: IndexedIterator[FlatString]
562 # Maximum position to iterate on (e.g. Rope.length)
564 # Position (char) in the Rope (0-indexed)
567 init(root
: Concat) is old_style_init
do
568 subs
= new RopeSubstrings(root
)
571 max
= root
.length
- 1
575 init from
(root
: Concat, pos
: Int) do
576 subs
= new RopeSubstrings.from
(root
, pos
)
577 pns
= pos
- subs
.index
580 max
= root
.length
- 1
583 redef fun item
do return ns
[pns
]
585 redef fun is_ok
do return pos
<= max
587 redef fun index
do return pos
592 if pns
< subs
.item
.length
then return
593 if not subs
.is_ok
then return
595 if not subs
.is_ok
then return
602 # A reverse iterator capable of working with `Rope` objects
603 private class RopeCharReverseIterator
604 super IndexedIterator[Char]
606 # Current NativeString
608 # Current position in NativeString
610 # Position in the Rope (0-indexed)
612 # Iterator on the substrings, does the Postfix part of
613 # the Rope traversal.
614 var subs
: IndexedIterator[String]
616 init(root
: Concat) is old_style_init
do
617 pos
= root
.length
- 1
618 subs
= new ReverseRopeSubstrings(root
)
623 init from
(root
: Concat, pos
: Int) do
625 subs
= new ReverseRopeSubstrings.from
(root
, pos
)
627 pns
= pos
- subs
.index
630 redef fun index
do return pos
632 redef fun is_ok
do return pos
>= 0
634 redef fun item
do return ns
[pns
]
639 if pns
>= 0 then return
640 if not subs
.is_ok
then return
642 if not subs
.is_ok
then return
648 # Forward iterator on the chars of a `Rope`
649 private class RopeCharIterator
650 super IndexedIterator[Char]
652 # Position in current `String`
654 # Current `String` being iterated on
656 # Substrings of the Rope
657 var subs
: IndexedIterator[String]
658 # Maximum position to iterate on (e.g. Rope.length)
660 # Position (char) in the Rope (0-indexed)
663 init(root
: Concat) is old_style_init
do
664 subs
= new RopeSubstrings(root
)
667 max
= root
.length
- 1
671 init from
(root
: Concat, pos
: Int) do
672 subs
= new RopeSubstrings.from
(root
, pos
)
673 pns
= pos
- subs
.index
676 max
= root
.length
- 1
679 redef fun item
do return str
[pns
]
681 redef fun is_ok
do return pos
<= max
683 redef fun index
do return pos
688 if pns
< subs
.item
.length
then return
689 if not subs
.is_ok
then return
691 if not subs
.is_ok
then return
697 # Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
698 private class ReverseRopeSubstrings
699 super IndexedIterator[String]
702 var iter
: RopeIterPiece is noinit
704 var pos
: Int is noinit
707 var str
: String is noinit
709 init(root
: Concat) is old_style_init
do
710 var r
= new RopeIterPiece(root
, false, true, null)
711 pos
= root
.length
- 1
712 var lnod
: String = root
714 if lnod
isa Concat then
716 r
= new RopeIterPiece(lnod
, false, true, r
)
725 init from
(root
: Concat, pos
: Int) do
726 var r
= new RopeIterPiece(root
, false, true, null)
727 var rnod
: String = root
730 if rnod
isa Concat then
731 if off
>= rnod
.left
.length
then
732 off
-= rnod
.left
.length
734 r
= new RopeIterPiece(rnod
, false, true, r
)
738 r
= new RopeIterPiece(rnod
, false, true, r
)
750 redef fun item
do return str
752 redef fun index
do return pos
754 redef fun is_ok
do return pos
>= 0
757 if pos
< 0 then return
759 var currit
= curr
.node
760 while curr
!= null do
762 if not currit
isa Concat then
768 if not curr
.rdone
then
770 curr
= new RopeIterPiece(currit
.right
, false, false, curr
)
773 if not curr
.ldone
then
775 curr
= new RopeIterPiece(currit
.left
, false, false, curr
)
784 private class RopeBufSubstringIterator
785 super Iterator[FlatText]
787 # Iterator on the substrings of the building string
788 var iter
: Iterator[FlatText]
789 # Makes a String out of the buffered part of the Ropebuffer
790 var nsstr
: FlatString
791 # Did we attain the buffered part ?
792 var nsstr_done
= false
794 init(str
: RopeBuffer) is old_style_init
do
795 iter
= str
.str
.substrings
796 nsstr
= new FlatString.with_infos
(str
.ns
, str
.rpos
- str
.dumped
, str
.dumped
, str
.rpos
- 1)
797 if str
.length
== 0 then nsstr_done
= true
800 redef fun is_ok
do return iter
.is_ok
or not nsstr_done
804 if iter
.is_ok
then return iter
.item
817 # Substrings of a Rope (i.e. Postfix iterator on leaves)
818 private class RopeSubstrings
819 super IndexedIterator[FlatString]
822 var iter
: RopeIterPiece is noinit
824 var pos
: Int is noinit
825 # Maximum position in `Rope` (i.e. length - 1)
826 var max
: Int is noinit
829 var str
: FlatString is noinit
831 init(root
: Concat) is old_style_init
do
832 var r
= new RopeIterPiece(root
, true, false, null)
834 max
= root
.length
- 1
835 var rnod
: String = root
837 if rnod
isa Concat then
839 r
= new RopeIterPiece(rnod
, true, false, r
)
841 str
= rnod
.as(FlatString)
849 init from
(root
: Concat, pos
: Int) do
850 var r
= new RopeIterPiece(root
, true, false, null)
851 max
= root
.length
- 1
852 var rnod
: String = root
855 if rnod
isa Concat then
856 if off
>= rnod
.left
.length
then
858 off
-= rnod
.left
.length
860 r
= new RopeIterPiece(rnod
, true, false, r
)
863 r
= new RopeIterPiece(rnod
, true, false, r
)
866 str
= rnod
.as(FlatString)
875 redef fun item
do return str
877 redef fun is_ok
do return pos
<= max
879 redef fun index
do return pos
883 if pos
> max
then return
887 if not rnod
isa Concat then
890 str
= rnod
.as(FlatString)
891 iter
= it
.as(not null)
897 it
= new RopeIterPiece(rnod
, false, false, it
)
898 else if not it
.rdone
then
901 it
= new RopeIterPiece(rnod
, false, false, it
)
911 # Implementation of a `StringCharView` for `Concat` objects
912 private class RopeChars
915 redef type SELFTYPE: Concat
921 redef fun iterator_from
(i
) do return new RopeCharIterator.from
(target
, i
)
923 redef fun reverse_iterator_from
(i
) do return new RopeCharReverseIterator.from
(target
, i
)
927 # Implementation of a `StringCharView` for `Concat` objects
928 private class RopeBytes
931 redef type SELFTYPE: Concat
935 var nod
: String = target
937 if nod
isa FlatString then return nod
.items
[i
]
938 if not nod
isa Concat then abort
939 if nod
.left
.bytelen
>= i
then
947 redef fun iterator_from
(i
) do return new RopeByteIterator.from
(target
, i
)
949 redef fun reverse_iterator_from
(i
) do return new RopeByteReverseIterator.from
(target
, i
)
953 # An Iterator over a RopeBuffer.
954 class RopeBufferCharIterator
955 super IndexedIterator[Char]
958 var sit
: IndexedIterator[Char]
960 redef fun index
do return sit
.index
962 # Init the iterator from a RopeBuffer.
963 init(t
: RopeBuffer) is old_style_init
do
965 sit
= t
.str
.chars
.iterator
968 # Init the iterator from a RopeBuffer starting from `pos`.
969 init from
(t
: RopeBuffer, pos
: Int) do
971 sit
= t
.str
.chars
.iterator_from
(pos
)
974 redef fun is_ok
do return sit
.is_ok
981 redef fun next
do sit
.next
984 # Reverse iterator over a RopeBuffer.
985 class RopeBufferCharReverseIterator
986 super IndexedIterator[Char]
989 var sit
: IndexedIterator[Char]
991 redef fun index
do return sit
.index
993 # Init the iterator from a RopeBuffer.
994 init(tgt
: RopeBuffer) is old_style_init
do
996 sit
= tgt
.str
.chars
.reverse_iterator
999 # Init the iterator from a RopeBuffer starting from `pos`.
1000 init from
(tgt
: RopeBuffer, pos
: Int) do
1002 sit
= tgt
.str
.chars
.reverse_iterator_from
(pos
)
1005 redef fun is_ok
do return sit
.is_ok
1012 redef fun next
do sit
.next
1015 # View on the chars of a `RopeBuffer`
1016 class RopeBufferChars
1017 super BufferCharView
1019 redef type SELFTYPE: RopeBuffer
1022 if i
< target
.str
.length
then
1023 return target
.str
[i
]
1025 # TODO: Fix when supporting UTF-8
1026 return target
.ns
[i
- target
.str
.length
].to_i
.ascii
1030 redef fun []=(i
,c
) do
1031 if i
== target
.length
then target
.add c
1032 if i
< target
.str
.length
then
1034 var l
= s
.substring
(0, i
)
1035 var r
= s
.substring_from
(i
+ 1)
1036 target
.str
= l
+ c
.to_s
+ r
1038 # TODO: Fix when supporting UTF-8
1039 target
.ns
[i
- target
.str
.length
] = c
.to_i
.to_b
1043 redef fun add
(c
) do target
.add c
1045 redef fun push
(c
) do target
.add c
1047 redef fun iterator_from
(i
) do return new RopeBufferCharIterator.from
(target
, i
)
1049 redef fun reverse_iterator_from
(i
) do return new RopeBufferCharReverseIterator.from
(target
, i
)
1052 # An Iterator over a RopeBuffer.
1053 class RopeBufferByteIterator
1054 super IndexedIterator[Byte]
1057 var sit
: IndexedIterator[Byte]
1059 # Native string iterated over.
1060 var ns
: NativeString
1062 # Current position in `ns`.
1065 # Maximum position iterable.
1070 # Init the iterator from a RopeBuffer.
1071 init(t
: RopeBuffer) is old_style_init
do
1074 sit
= t
.str
.bytes
.iterator
1079 # Init the iterator from a RopeBuffer starting from `pos`.
1080 init from
(t
: RopeBuffer, pos
: Int) do
1083 sit
= t
.str
.bytes
.iterator_from
(pos
)
1084 pns
= pos
- t
.str
.length
1088 redef fun is_ok
do return index
< maxpos
1091 if sit
.is_ok
then return sit
.item
1105 # Reverse iterator over a RopeBuffer.
1106 class RopeBufferByteReverseIterator
1107 super IndexedIterator[Byte]
1110 var sit
: IndexedIterator[Byte]
1112 # Native string iterated over.
1113 var ns
: NativeString
1115 # Current position in `ns`.
1120 # Init the iterator from a RopeBuffer.
1121 init(tgt
: RopeBuffer) is old_style_init
do
1122 sit
= tgt
.str
.bytes
.reverse_iterator
1124 index
= tgt
.length
- 1
1128 # Init the iterator from a RopeBuffer starting from `pos`.
1129 init from
(tgt
: RopeBuffer, pos
: Int) do
1130 sit
= tgt
.str
.bytes
.reverse_iterator_from
(pos
- tgt
.rpos
- tgt
.dumped
)
1131 pns
= pos
- tgt
.str
.length
1136 redef fun is_ok
do return index
> 0
1139 if pns
>= 0 then return ns
[pns
]
1153 # View on the chars of a `RopeBuffer`
1154 class RopeBufferBytes
1155 super BufferByteView
1157 redef type SELFTYPE: RopeBuffer
1160 if i
< target
.str
.bytelen
then
1161 return target
.str
.bytes
[i
]
1163 return target
.ns
[i
- target
.str
.length
]
1167 redef fun []=(i
,c
) do
1168 if i
== target
.length
then target
.add_byte c
1169 if i
< target
.str
.length
then
1170 # FIXME: Will need to be optimized and rewritten with Unicode
1172 var l
= s
.substring
(0, i
)
1173 var r
= s
.substring_from
(i
+ 1)
1174 target
.str
= l
+ c
.to_i
.ascii
.to_s
+ r
1176 target
.ns
[i
- target
.str
.length
] = c
1180 redef fun add
(c
) do target
.add_byte c
1182 redef fun push
(c
) do target
.add_byte c
1184 redef fun iterator_from
(i
) do return new RopeBufferByteIterator.from
(target
, i
)
1186 redef fun reverse_iterator_from
(i
) do return new RopeBufferByteReverseIterator.from
(target
, i
)