1 # This file is part of NIT (http://www.nitlanguage.org).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Tree-based representation of a String.
17 # Ropes are a data structure introduced in a 1995 paper from
18 # Hans J. Boehm, Russ Atkinson and Michael Plass.
22 # > Ropes: an Alternative to Strings,
23 # > *Software - Practice and Experience*,
24 # > Vol. 25(12), 1315-1330 (December 1995).
26 # The implementation developed here provides an automatic change
27 # of data structure depending on the length of the leaves.
29 # The length from which a `Rope` is built from a `flat` string
30 # if defined at top-level (see `maxlen`) and can be redefined by clients
31 # depending on their needs (e.g. if you need to bench the speed of
32 # the creation of concat nodes, lower the size of maxlen).
34 # A Rope as defined in the original paper is a Tree made of concatenation
35 # nodes and containing `Flat` (that is Array-based) strings as Leaves.
44 # "My" " Name" " is" " Simon."
47 # Note that the above example is not representative of the actual implementation
48 # of `Ropes`, since short leaves are merged to keep the rope at an acceptable
49 # height (hence, this rope here might actually be a `FlatString` !).
54 # Maxlen is the maximum length of a Leaf node
56 # When concatenating two leaves, if `new_length` > `maxlen`,
57 # A `Concat` node is created instead
59 # Its purpose is to limit the depth of the `Rope` (this
60 # improves performance when accessing/iterating).
61 fun maxlen
: Int do return 512
63 # String using a tree-based representation with leaves as `FlatStrings`
64 private abstract class Rope
68 # Node that represents a concatenation between two `String`
73 redef fun chars
do return new RopeChars(self)
75 redef fun bytes
do return new RopeBytes(self)
77 redef var length
is noinit
79 redef var byte_length
is noinit
81 redef fun substrings
do return new RopeSubstrings.from
(self, 0)
83 redef fun empty
do return ""
85 # Cache for the latest accessed FlatString in `self`
86 var flat_cache
: FlatString is noinit
88 # Position of the beginning of `flat_cache` in `self`
89 var flat_last_pos_start
: Int is noinit
91 redef fun to_cstring
do
92 var len
= _byte_length
93 var ns
= new CString(len
+ 1)
96 for i
in substrings
do
97 var ilen
= i
._byte_length
98 i
.as(FlatString)._items
.copy_to
(ns
, ilen
, i
.as(FlatString)._first_byte
, off
)
104 # Left child of the node
106 # Right child of the node
112 _length
= l
.length
+ r
.length
113 _byte_length
= l
.byte_length
+ r
.byte_length
114 _flat_last_pos_start
= _length
117 redef fun is_empty
do return _byte_length
== 0
124 redef fun iterator
do return new RopeCharIterator.from
(self, 0)
128 for j
in [1 .. i
[ do x
+= self
133 assert i
>= 0 and i
< _length
134 var flps
= _flat_last_pos_start
137 if i
< flps
+ fc
._length
then return fc
.fetch_char_at
(i
- flps
)
139 var lf
= get_leaf_at
(i
)
140 return lf
.fetch_char_at
(i
- _flat_last_pos_start
)
143 fun get_leaf_at
(pos
: Int): FlatString do
144 var flps
= _flat_last_pos_start
147 if pos
< flps
+ fc
._length
then return fc
152 if s
isa FlatString then break
155 var llen
= lft
.length
163 _flat_last_pos_start
= st
- pos
168 redef fun substring
(from
, count
) do
171 if count
< 0 then return ""
176 if (count
+ from
) > ln
then count
= ln
- from
177 if count
<= 0 then return ""
178 var end_index
= from
+ count
- 1
180 var flps
= _flat_last_pos_start
183 if end_index
< flps
+ fc
._length
then
184 return fc
.substring_impl
(from
- flps
, count
, end_index
- flps
)
189 var llen
= lft
.length
191 if from
+ count
< llen
then return lft
.substring
(from
, count
)
192 var lsublen
= llen
- from
193 return lft
.substring_from
(from
) + _right
.substring
(0, count
- lsublen
)
195 return _right
.substring
(from
- llen
, count
)
199 redef fun reversed
do return new Concat(_right
.reversed
, _left
.reversed
)
201 redef fun insert_at
(s
, pos
) do
203 if pos
> lft
.length
then
204 return lft
+ _right
.insert_at
(s
, pos
- lft
.length
)
206 return lft
.insert_at
(s
, pos
) + _right
209 redef fun to_upper
do return new Concat(_left
.to_upper
, _right
.to_upper
)
211 redef fun to_lower
do return new Concat(_left
.to_lower
, _right
.to_lower
)
215 var slen
= s
.byte_length
217 return new Concat(self, s
)
220 var rlen
= r
.byte_length
221 if rlen
+ slen
> maxlen
then return new Concat(self, s
)
222 return new Concat(_left
, r
+ s
)
226 redef fun copy_to_native
(dest
, n
, src_offset
, dest_offset
) do
228 if src_offset
< l
.byte_length
then
229 var lcopy
= l
.byte_length
- src_offset
230 lcopy
= if lcopy
> n
then n
else lcopy
231 l
.copy_to_native
(dest
, lcopy
, src_offset
, dest_offset
)
236 _right
.copy_to_native
(dest
, n
, src_offset
, dest_offset
)
239 # Returns a balanced version of `self`
240 fun balance
: String do
241 var children
= new Array[String]
243 var iter
: nullable RopeCharIteratorPiece = new RopeCharIteratorPiece(self, false, false, null)
245 if iter
== null then break
247 if not rnod
isa Concat then
252 if not iter
.ldone
then
254 iter
= new RopeCharIteratorPiece(rnod
._left
, false, false, iter
)
255 else if not iter
.rdone
then
257 iter
= new RopeCharIteratorPiece(rnod
._right
, false, false, iter
)
263 return recurse_balance
(children
, children
.length
)
266 fun recurse_balance
(nodes
: Array[String], len
: Int): String do
270 if len
- stpos
> 1 then
271 nodes
[finpos
] = new Concat(nodes
[stpos
], nodes
[stpos
+ 1])
274 nodes
[finpos
] = nodes
[stpos
]
279 if finpos
== 1 then return nodes
[0]
280 return recurse_balance
(nodes
, finpos
)
284 redef class FlatString
286 redef fun insert_at
(s
, pos
) do
287 var l
= substring
(0, pos
)
288 var r
= substring_from
(pos
)
294 var slen
= s
.byte_length
295 var mlen
= _byte_length
296 if slen
== 0 then return self
297 if mlen
== 0 then return s
298 var nlen
= slen
+ mlen
299 if s
isa FlatString then
300 if nlen
> maxlen
then return new Concat(self, s
)
302 var sifrom
= s
._first_byte
303 var mifrom
= _first_byte
305 var ns
= new CString(nlen
+ 1)
306 mits
.copy_to
(ns
, mlen
, mifrom
, 0)
307 sits
.copy_to
(ns
, slen
, sifrom
, mlen
)
308 return new FlatString.full
(ns
, nlen
, 0, length
+ s
.length
)
309 else if s
isa Concat then
311 var sllen
= sl
.byte_length
312 if sllen
+ mlen
> maxlen
then return new Concat(self, s
)
313 return new Concat(self + sl
, s
._right
)
320 # A simple linked list for use with iterators
321 private class RopeCharIteratorPiece
322 # The encapsulated node of the `Rope`
324 # Was its _left child (if any) visited ?
326 # Was its _right child (if any) visited ?
328 # The previous node in the list.
329 var prev
: nullable RopeCharIteratorPiece
332 # A reverse iterator capable of working with `Rope` objects
333 private class RopeByteReverseIterator
334 super IndexedIterator[Int]
337 var ns
: CString is noautoinit
338 # Current position in CString
339 var pns
: Int is noautoinit
340 # Position in the Rope (0-indexed)
341 var pos
: Int is noautoinit
342 # Iterator on the substrings, does the Postfix part of
343 # the Rope traversal.
344 var subs
: IndexedIterator[FlatString] is noautoinit
346 init from
(root
: Concat, pos
: Int) do
348 subs
= new ReverseRopeSubstrings.from
(root
, pos
)
351 pns
= pos
- subs
.index
354 redef fun index
do return pos
356 redef fun is_ok
do return pos
>= 0
358 redef fun item
do return ns
[pns
]
363 if pns
>= 0 then return
364 if not subs
.is_ok
then return
366 if not subs
.is_ok
then return
373 # Forward iterator on the bytes of a `Rope`
374 private class RopeByteIterator
375 super IndexedIterator[Int]
377 # Position in current `String`
378 var pns
: Int is noautoinit
379 # Current `String` being iterated on
380 var ns
: CString is noautoinit
381 # Substrings of the Rope
382 var subs
: IndexedIterator[FlatString] is noautoinit
383 # Maximum position to iterate on (e.g. Rope.byte_length)
384 var max
: Int is noautoinit
385 # Position (char) in the Rope (0-indexed)
386 var pos
: Int is noautoinit
388 init from
(root
: Concat, pos
: Int) do
389 subs
= new RopeSubstrings.from
(root
, pos
)
390 pns
= pos
- subs
.index
392 ns
= subs
.item
._items
393 max
= root
.byte_length
- 1
396 redef fun item
do return ns
[pns
]
398 redef fun is_ok
do return pos
<= max
400 redef fun index
do return pos
405 if pns
< subs
.item
._byte_length
then return
406 if not subs
.is_ok
then return
408 if not subs
.is_ok
then return
409 ns
= subs
.item
._items
415 # A reverse iterator capable of working with `Rope` objects
416 private class RopeCharReverseIterator
417 super IndexedIterator[Char]
420 var ns
: String is noautoinit
421 # Current position in CString
422 var pns
: Int is noautoinit
423 # Position in the Rope (0-indexed)
424 var pos
: Int is noautoinit
425 # Iterator on the substrings, does the Postfix part of
426 # the Rope traversal.
427 var subs
: IndexedIterator[String] is noautoinit
429 init from
(root
: Concat, pos
: Int) do
431 subs
= new ReverseRopeSubstrings.from
(root
, pos
)
433 pns
= pos
- subs
.index
436 redef fun index
do return pos
438 redef fun is_ok
do return pos
>= 0
440 redef fun item
do return ns
[pns
]
445 if pns
>= 0 then return
446 if not subs
.is_ok
then return
448 if not subs
.is_ok
then return
454 # Forward iterator on the chars of a `Rope`
455 private class RopeCharIterator
456 super IndexedIterator[Char]
458 # Position in current `String`
459 var pns
: Int is noautoinit
460 # Current `String` being iterated on
461 var str
: String is noautoinit
462 # Substrings of the Rope
463 var subs
: IndexedIterator[String] is noautoinit
464 # Maximum position to iterate on (e.g. Rope.length)
465 var max
: Int is noautoinit
466 # Position (char) in the Rope (0-indexed)
467 var pos
: Int is noautoinit
469 init from
(root
: Concat, pos
: Int) do
470 subs
= new RopeSubstrings.from
(root
, pos
)
471 pns
= pos
- subs
.index
474 max
= root
.length
- 1
477 redef fun item
do return str
[pns
]
479 redef fun is_ok
do return pos
<= max
481 redef fun index
do return pos
486 if pns
< subs
.item
.length
then return
487 if not subs
.is_ok
then return
489 if not subs
.is_ok
then return
495 # Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
496 private class ReverseRopeSubstrings
497 super IndexedIterator[FlatString]
500 var iter
: RopeCharIteratorPiece is noinit
502 var pos
: Int is noinit
505 var str
: FlatString is noinit
507 init from
(root
: Concat, pos
: Int) do
508 var r
= new RopeCharIteratorPiece(root
, false, true, null)
509 var rnod
: String = root
512 if rnod
isa Concat then
513 if off
>= rnod
._left
.length
then
514 off
-= rnod
._left
.length
516 r
= new RopeCharIteratorPiece(rnod
, false, true, r
)
520 r
= new RopeCharIteratorPiece(rnod
, false, true, r
)
523 str
= rnod
.as(FlatString)
532 redef fun item
do return str
534 redef fun index
do return pos
536 redef fun is_ok
do return pos
>= 0
539 if pos
< 0 then return
541 var currit
= curr
.as(not null).node
542 while curr
!= null do
544 if not currit
isa Concat then
545 str
= currit
.as(FlatString)
550 if not curr
.rdone
then
552 curr
= new RopeCharIteratorPiece(currit
._right
, false, false, curr
)
555 if not curr
.ldone
then
557 curr
= new RopeCharIteratorPiece(currit
._left
, false, false, curr
)
566 # Substrings of a Rope (i.e. Postfix iterator on leaves)
567 private class RopeSubstrings
568 super IndexedIterator[FlatString]
571 var iter
: RopeCharIteratorPiece is noinit
573 var pos
: Int is noinit
574 # Maximum position in `Rope` (i.e. length - 1)
575 var max
: Int is noinit
578 var str
: FlatString is noinit
580 init from
(root
: Concat, pos
: Int) do
581 var r
= new RopeCharIteratorPiece(root
, true, false, null)
582 max
= root
.length
- 1
583 var rnod
: String = root
586 if rnod
isa Concat then
587 if off
>= rnod
._left
.length
then
589 off
-= rnod
._left
.length
591 r
= new RopeCharIteratorPiece(rnod
, true, false, r
)
594 r
= new RopeCharIteratorPiece(rnod
, true, false, r
)
597 str
= rnod
.as(FlatString)
606 redef fun item
do return str
608 redef fun is_ok
do return pos
<= max
610 redef fun index
do return pos
614 if pos
> max
then return
615 var it
= iter
.prev
.as(not null)
618 if not rnod
isa Concat then
621 str
= rnod
.as(FlatString)
628 it
= new RopeCharIteratorPiece(rnod
, false, false, it
)
629 else if not it
.rdone
then
632 it
= new RopeCharIteratorPiece(rnod
, false, false, it
)
634 it
= it
.prev
.as(not null)
642 # Implementation of a `StringCharView` for `Concat` objects
643 private class RopeChars
646 redef type SELFTYPE: Concat
652 redef fun iterator_from
(i
) do return new RopeCharIterator.from
(target
, i
)
654 redef fun reverse_iterator_from
(i
) do return new RopeCharReverseIterator.from
(target
, i
)
658 # Implementation of a `StringCharView` for `Concat` objects
659 private class RopeBytes
662 redef type SELFTYPE: Concat
664 var cache
: FlatString is noinit
666 var cache_start
: Int = -1
668 var cache_end
: Int = -1
671 assert i
>= 0 and i
< target
._byte_length
672 var flps
= _cache_start
673 if i
>= flps
and i
<= _cache_end
then
674 return _cache
.bytes
[i
- flps
]
676 var lf
= get_leaf_at
(i
)
677 return lf
.bytes
[i
- _cache_start
]
680 fun get_leaf_at
(pos
: Int): FlatString do
681 var flps
= _cache_start
682 if pos
>= flps
and pos
<= _cache_end
then
685 var s
: String = target
688 if s
isa FlatString then break
691 var llen
= lft
.byte_length
699 _cache_start
= st
- pos
700 _cache_end
= st
- pos
+ s
.byte_length
- 1
705 redef fun iterator_from
(i
) do return new RopeByteIterator.from
(target
, i
)
707 redef fun reverse_iterator_from
(i
) do return new RopeByteReverseIterator.from
(target
, i
)