lib/standard/ropes: Added balancing method to Concat
[nit.git] / lib / standard / text / ropes.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Tree-based representation of a String.
16 #
17 # Ropes are a data structure introduced in a 1995 paper from
18 # Hans J. Boehm, Russ Atkinson and Michael Plass.
19 #
20 # See:
21 #
22 # > Ropes: an Alternative to Strings,
23 # > *Software - Practice and Experience*,
24 # > Vol. 25(12), 1315-1330 (December 1995).
25 #
26 # The implementation developed here provides an automatic change
27 # of data structure depending on the length of the leaves.
28 #
29 # The length from which a `Rope` is built from a `flat` string
30 # if defined at top-level (see `maxlen`) and can be redefined by clients
31 # depending on their needs (e.g. if you need to bench the speed of
32 # the creation of concat nodes, lower the size of maxlen).
33 #
34 # A Rope as defined in the original paper is a Tree made of concatenation
35 # nodes and containing `Flat` (that is Array-based) strings as Leaves.
36 #
37 # Example :
38 #
39 # ~~~raw
40 # Concat
41 # / \
42 # Concat Concat
43 # / \ / \
44 # "My" " Name" " is" " Simon."
45 # ~~~
46 #
47 # Note that the above example is not representative of the actual implementation
48 # of `Ropes`, since short leaves are merged to keep the rope at an acceptable
49 # height (hence, this rope here might actually be a `FlatString` !).
50 module ropes
51
52 intrude import flat
53
54 # Maxlen is the maximum length of a Leaf node
55 #
56 # When concatenating two leaves, if `new_length` > `maxlen`,
57 # A `Concat` node is created instead
58 #
59 # Its purpose is to limit the depth of the `Rope` (this
60 # improves performance when accessing/iterating).
61 fun maxlen: Int do return 64
62
63 # String using a tree-based representation with leaves as `FlatStrings`
64 private abstract class Rope
65 super Text
66 end
67
68 # Node that represents a concatenation between two `String`
69 private class Concat
70 super Rope
71 super String
72
73 redef var chars is lazy do return new RopeChars(self)
74
75 redef var bytes is lazy do return new RopeBytes(self)
76
77 redef var length is noinit
78
79 redef var bytelen is noinit
80
81 redef fun substrings do return new RopeSubstrings(self)
82
83 redef fun empty do return ""
84
85 # Cache for the latest accessed FlatString in `self`
86 var flat_cache: String = ""
87
88 # Position of the beginning of `flat_cache` in `self`
89 var flat_last_pos_start: Int = -1
90
91 redef var to_cstring is lazy do
92 var len = bytelen
93 var ns = new NativeString(len + 1)
94 ns[len] = 0u8
95 var off = 0
96 for i in substrings do
97 var ilen = i.bytelen
98 i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).first_byte, off)
99 off += ilen
100 end
101 return ns
102 end
103
104 # Left child of the node
105 var left: String
106 # Right child of the node
107 var right: String
108
109 init do
110 length = left.length + right.length
111 bytelen = left.bytelen + right.bytelen
112 end
113
114 redef fun output do
115 left.output
116 right.output
117 end
118
119 redef fun iterator do return new RopeCharIterator(self)
120
121 redef fun *(i) do
122 var x: String = self
123 for j in [1 .. i[ do x += self
124 return x
125 end
126
127 redef fun [](i) do
128 if flat_last_pos_start != -1 then
129 var fsp = i - flat_last_pos_start
130 if fsp >= 0 and fsp < flat_cache.length then return flat_cache[fsp]
131 end
132 var s: String = self
133 var st = i
134 loop
135 if s isa FlatString then break
136 s = s.as(Concat)
137 var lft = s.left
138 var llen = lft.length
139 if i >= llen then
140 s = s.right
141 i -= llen
142 else
143 s = s.left
144 end
145 end
146 flat_last_pos_start = st - i
147 flat_cache = s
148 return s[i]
149 end
150
151 redef fun substring(from, len) do
152 var llen = left.length
153 if from < llen then
154 if from + len < llen then return left.substring(from,len)
155 var lsublen = llen - from
156 return left.substring_from(from) + right.substring(0, len - lsublen)
157 else
158 return right.substring(from - llen, len)
159 end
160 end
161
162 redef fun reversed do return new Concat(right.reversed, left.reversed)
163
164 redef fun insert_at(s, pos) do
165 if pos > left.length then
166 return left + right.insert_at(s, pos - left.length)
167 end
168 return left.insert_at(s, pos) + right
169 end
170
171 redef fun to_upper do return new Concat(left.to_upper, right.to_upper)
172
173 redef fun to_lower do return new Concat(left.to_lower, right.to_lower)
174
175 redef fun +(o) do
176 var s = o.to_s
177 var slen = s.bytelen
178 if s isa Concat then
179 return new Concat(self, s)
180 else
181 var r = right
182 var rlen = r.bytelen
183 if rlen + slen > maxlen then return new Concat(self, s)
184 return new Concat(left, r + s)
185 end
186 end
187
188 redef fun copy_to_native(dest, n, src_offset, dest_offset) do
189 var subs = new RopeSubstrings.from(self, src_offset)
190 var st = src_offset - subs.pos
191 var off = dest_offset
192 while n > 0 do
193 var it = subs.item
194 if n > it.length then
195 var cplen = it.length - st
196 it.items.copy_to(dest, cplen, st, off)
197 off += cplen
198 n -= cplen
199 else
200 it.items.copy_to(dest, n, st, off)
201 n = 0
202 end
203 subs.next
204 st = 0
205 end
206 end
207
208 # Returns a balanced version of `self`
209 fun balance: String do
210 var children = new Array[String]
211 var rnod: String
212 var iter: nullable RopeCharIteratorPiece = new RopeCharIteratorPiece(self, false, false, null)
213 loop
214 if iter == null then break
215 rnod = iter.node
216 if not rnod isa Concat then
217 children.push rnod
218 iter = iter.prev
219 continue
220 end
221 if not iter.ldone then
222 iter.ldone = true
223 iter = new RopeCharIteratorPiece(rnod.left, false, false, iter)
224 else if not iter.rdone then
225 iter.rdone = true
226 iter = new RopeCharIteratorPiece(rnod.right, false, false, iter)
227 else
228 iter = iter.prev
229 end
230
231 end
232 return recurse_balance(children, children.length)
233 end
234
235 fun recurse_balance(nodes: Array[String], len: Int): String do
236 var finpos = 0
237 var stpos = 0
238 while stpos < len do
239 if len - stpos > 1 then
240 nodes[finpos] = new Concat(nodes[stpos], nodes[stpos + 1])
241 stpos += 2
242 else
243 nodes[finpos] = nodes[stpos]
244 stpos += 1
245 end
246 finpos += 1
247 end
248 if finpos == 1 then return nodes[0]
249 return recurse_balance(nodes, finpos)
250 end
251 end
252
253 # Mutable `Rope`, optimized for concatenation operations
254 #
255 # A `RopeBuffer` is an efficient way of building a `String` when
256 # concatenating small strings.
257 #
258 # It does concatenations in an optimized way by having a
259 # mutable part and an immutable part built by efficiently
260 # concatenating strings in chain.
261 #
262 # Every concatenation operation is done by copying a string to
263 # the mutable part and flushing it when full.
264 #
265 # However, when a long string is appended to the `Buffer`,
266 # the concatenation is done at it would be in a `Rope`.
267 class RopeBuffer
268 super Rope
269 super Buffer
270
271 redef var chars: Sequence[Char] is lazy do return new RopeBufferChars(self)
272
273 redef var bytes: Sequence[Byte] is lazy do return new RopeBufferBytes(self)
274
275 # The final string being built on the fly
276 private var str: String = ""
277
278 # Current concatenation buffer
279 private var ns: NativeString is noinit
280
281 # Next available (e.g. unset) character in the `Buffer`
282 private var rpos = 0
283
284 # Keeps track of the buffer's currently dumped part
285 #
286 # This might happen if for instance, a String was being
287 # built by concatenating small parts of string and suddenly
288 # a long string (length > maxlen) is appended.
289 private var dumped: Int is noinit
290
291 # Length of the complete rope in chars (0)
292 redef fun length do
293 var st = dumped
294 var len = str.length
295 while st < rpos do
296 st += ns[st].u8len
297 len += 1
298 end
299 return len
300 end
301
302 # Length of the complete rope in bytes
303 redef var bytelen = 0
304
305 # Length of the mutable part (in bytes)
306 #
307 # Is also used as base to compute the size of the next
308 # mutable native string (`ns`)
309 private var buf_size: Int is noinit
310
311 redef fun substrings do return new RopeBufSubstringIterator(self)
312
313 # Builds an empty `RopeBuffer`
314 init do
315 ns = new NativeString(maxlen)
316 buf_size = maxlen
317 dumped = 0
318 end
319
320 # Builds a new `RopeBuffer` with `str` in it.
321 init from(str: String) do
322 self.str = str
323 ns = new NativeString(maxlen)
324 buf_size = maxlen
325 bytelen = str.length
326 dumped = 0
327 end
328
329 # Resets the informations of the `Buffer`
330 #
331 # This is called when doing in-place modifications
332 # on a previously to_s'd `RopeBuffer`
333 private fun reset do
334 var nns = new NativeString(buf_size)
335 var blen = rpos - dumped
336 ns.copy_to(nns, blen, dumped, 0)
337 ns = nns
338 dumped = 0
339 rpos = blen
340 written = false
341 end
342
343 redef fun [](i) do
344 if i < str.length then
345 return str[i]
346 else
347 var index = ns.char_to_byte_index_cached(i - str.length, 0, dumped)
348 return ns.char_at(index)
349 end
350 end
351
352 redef fun []=(i, c) do
353 assert i >= 0 and i <= length
354 if i == length then add c
355 if i < str.length then
356 bytelen += c.u8char_len - str[i].u8char_len
357 var s = str
358 var l = s.substring(0, i)
359 var r = s.substring_from(i + 1)
360 str = l + c.to_s + r
361 else
362 var reali = i - str.length
363 var index = ns.char_to_byte_index_cached(reali, 0, dumped)
364 var st_nxt = ns.char_to_byte_index_cached(reali + 1, reali, index)
365 var loc_c = ns.char_at(index)
366 if loc_c.u8char_len != c.u8char_len then
367 var delta = c.u8char_len - loc_c.u8char_len
368 var remsp = buf_size - rpos
369 if remsp < delta then
370 buf_size *= 2
371 var nns = new NativeString(buf_size)
372 ns.copy_to(nns, index - dumped, dumped, 0)
373 ns.copy_to(nns, rpos - index - loc_c.u8char_len, index + loc_c.u8char_len, index - dumped + delta)
374 ns = nns
375 index = index - dumped
376 else
377 ns.copy_to(ns, rpos - st_nxt, st_nxt, st_nxt + delta)
378 end
379 bytelen += delta
380 rpos += delta
381 end
382 ns.set_char_at(index, c)
383 end
384 end
385
386 redef fun empty do return new RopeBuffer
387
388 redef fun clear do
389 str = ""
390 bytelen = 0
391 rpos = 0
392 dumped = 0
393 if written then
394 ns = new NativeString(buf_size)
395 written = false
396 end
397 end
398
399 redef fun substring(from, count) do
400 var strlen = str.length
401
402 if from < 0 then
403 count += from
404 if count < 0 then count = 0
405 from = 0
406 end
407
408 if count > length then count = length - from
409
410 if count == 0 then return empty
411
412 if from < strlen then
413 var subpos = strlen - from
414 if count <= subpos then
415 return new RopeBuffer.from(str.substring(from, count))
416 else
417 var l = str.substring_from(from)
418 var rem = count - subpos
419 var nns = new NativeString(rem)
420 ns.copy_to(nns, rem, dumped, 0)
421 return new RopeBuffer.from(l + nns.to_s_with_length(rem))
422 end
423 else
424 var nns = new NativeString(count)
425 ns.copy_to(nns, count, dumped, 0)
426 return new RopeBuffer.from(nns.to_s_with_length(count))
427 end
428 end
429
430 redef fun append(s) do
431 var slen = s.bytelen
432 if slen >= maxlen then
433 persist_buffer
434 str += s.to_s
435 return
436 end
437 if s isa FlatText then
438 var oits = s.items
439 var from = if s isa FlatString then s.first_byte else 0
440 var remsp = buf_size - rpos
441 if slen <= remsp then
442 oits.copy_to(ns, slen, from, rpos)
443 rpos += slen
444 return
445 end
446 var brk = oits.find_beginning_of_char_at(from + remsp)
447 oits.copy_to(ns, brk, from, rpos)
448 rpos += brk
449 dump_buffer
450 oits.copy_to(ns, slen - remsp, brk, 0)
451 rpos = slen - remsp
452 else
453 for i in s.substrings do append i
454 end
455 end
456
457 redef fun add(c) do
458 var rp = rpos
459 if rp >= buf_size then
460 dump_buffer
461 rp = 0
462 end
463 # TODO: Fix when supporting UTF-8
464 ns[rp] = c.ascii.to_b
465 rp += 1
466 bytelen += 1
467 rpos = rp
468 end
469
470 private fun add_byte(b: Byte) do
471 var rp = rpos
472 if rp >= buf_size then
473 dump_buffer
474 rp = 0
475 end
476 ns[rp] = b
477 rp += 1
478 bytelen += 1
479 rpos = rp
480 end
481
482 # Converts the Buffer to a FlatString, appends it to
483 # the final String and re-allocates a new larger Buffer.
484 private fun dump_buffer do
485 written = false
486 var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
487 str += nstr
488 var bs = buf_size
489 bs = bs * 2
490 ns = new NativeString(bs)
491 buf_size = bs
492 dumped = 0
493 rpos = 0
494 end
495
496 # Similar to dump_buffer, but does not reallocate a new NativeString
497 private fun persist_buffer do
498 if rpos == dumped then return
499 var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
500 str += nstr
501 dumped = rpos
502 end
503
504 redef fun output do
505 str.output
506 new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output
507 end
508
509 # Enlarge is useless here since the `Buffer`
510 # part is automatically dumped when necessary.
511 #
512 # Also, since the buffer can not be overused by a
513 # single string, there is no need for manual
514 # resizing.
515 #
516 # "You have no power here !"
517 redef fun enlarge(i) do end
518
519 redef fun to_s do
520 dump_buffer
521 return str
522 end
523
524 redef fun reverse do
525 # Flush the buffer in order to only have to reverse `str`.
526 if rpos > 0 and dumped != rpos then
527 str += new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
528 dumped = rpos
529 end
530 str = str.reversed
531 end
532
533 redef fun upper do
534 if written then reset
535 persist_buffer
536 str = str.to_upper
537 end
538
539 redef fun lower do
540 if written then reset
541 persist_buffer
542 str = str.to_lower
543 end
544 end
545
546 redef class FlatString
547
548 redef fun insert_at(s, pos) do
549 var l = substring(0, pos)
550 var r = substring_from(pos)
551 return l + s + r
552 end
553
554 redef fun +(o) do
555 var s = o.to_s
556 var slen = s.bytelen
557 var mlen = bytelen
558 if slen == 0 then return self
559 if mlen == 0 then return s
560 var nlen = slen + mlen
561 if s isa FlatString then
562 if nlen > maxlen then return new Concat(self, s)
563 var mits = items
564 var sifrom = s.first_byte
565 var mifrom = first_byte
566 var sits = s.items
567 var ns = new NativeString(nlen + 1)
568 mits.copy_to(ns, mlen, mifrom, 0)
569 sits.copy_to(ns, slen, sifrom, mlen)
570 return ns.to_s_with_length(nlen)
571 else if s isa Concat then
572 var sl = s.left
573 var sllen = sl.bytelen
574 if sllen + mlen > maxlen then return new Concat(self, s)
575 return new Concat(self + sl, s.right)
576 else
577 abort
578 end
579 end
580 end
581
582 # A simple linked list for use with iterators
583 private class RopeCharIteratorPiece
584 # The encapsulated node of the `Rope`
585 var node: String
586 # Was its left child (if any) visited ?
587 var ldone: Bool
588 # Was its right child (if any) visited ?
589 var rdone: Bool
590 # The previous node in the list.
591 var prev: nullable RopeCharIteratorPiece
592 end
593
594 # A reverse iterator capable of working with `Rope` objects
595 private class RopeByteReverseIterator
596 super IndexedIterator[Byte]
597
598 # Current NativeString
599 var ns: NativeString
600 # Current position in NativeString
601 var pns: Int
602 # Position in the Rope (0-indexed)
603 var pos: Int
604 # Iterator on the substrings, does the Postfix part of
605 # the Rope traversal.
606 var subs: IndexedIterator[FlatString]
607
608 init(root: Concat) is old_style_init do
609 pos = root.bytelen - 1
610 subs = new ReverseRopeSubstrings(root)
611 var s = subs.item
612 ns = s.items
613 pns = s.last_byte
614 end
615
616 init from(root: Concat, pos: Int) do
617 self.pos = pos
618 subs = new ReverseRopeSubstrings.from(root, pos)
619 var s = subs.item
620 ns = s.items
621 pns = pos - subs.index
622 end
623
624 redef fun index do return pos
625
626 redef fun is_ok do return pos >= 0
627
628 redef fun item do return ns[pns]
629
630 redef fun next do
631 pns -= 1
632 pos -= 1
633 if pns >= 0 then return
634 if not subs.is_ok then return
635 subs.next
636 if not subs.is_ok then return
637 var s = subs.item
638 ns = s.items
639 pns = s.last_byte
640 end
641 end
642
643 # Forward iterator on the bytes of a `Rope`
644 private class RopeByteIterator
645 super IndexedIterator[Byte]
646
647 # Position in current `String`
648 var pns: Int
649 # Current `String` being iterated on
650 var ns: NativeString
651 # Substrings of the Rope
652 var subs: IndexedIterator[FlatString]
653 # Maximum position to iterate on (e.g. Rope.length)
654 var max: Int
655 # Position (char) in the Rope (0-indexed)
656 var pos: Int
657
658 init(root: Concat) is old_style_init do
659 subs = new RopeSubstrings(root)
660 pns = 0
661 ns = subs.item.items
662 max = root.length - 1
663 pos = 0
664 end
665
666 init from(root: Concat, pos: Int) do
667 subs = new RopeSubstrings.from(root, pos)
668 pns = pos - subs.index
669 self.pos = pos
670 ns = subs.item.items
671 max = root.length - 1
672 end
673
674 redef fun item do return ns[pns]
675
676 redef fun is_ok do return pos <= max
677
678 redef fun index do return pos
679
680 redef fun next do
681 pns += 1
682 pos += 1
683 if pns < subs.item.bytelen then return
684 if not subs.is_ok then return
685 subs.next
686 if not subs.is_ok then return
687 ns = subs.item.items
688 pns = 0
689 end
690 end
691
692
693 # A reverse iterator capable of working with `Rope` objects
694 private class RopeCharReverseIterator
695 super IndexedIterator[Char]
696
697 # Current NativeString
698 var ns: String
699 # Current position in NativeString
700 var pns: Int
701 # Position in the Rope (0-indexed)
702 var pos: Int
703 # Iterator on the substrings, does the Postfix part of
704 # the Rope traversal.
705 var subs: IndexedIterator[String]
706
707 init(root: Concat) is old_style_init do
708 pos = root.length - 1
709 subs = new ReverseRopeSubstrings(root)
710 ns = subs.item
711 pns = ns.length - 1
712 end
713
714 init from(root: Concat, pos: Int) do
715 self.pos = pos
716 subs = new ReverseRopeSubstrings.from(root, pos)
717 ns = subs.item
718 pns = pos - subs.index
719 end
720
721 redef fun index do return pos
722
723 redef fun is_ok do return pos >= 0
724
725 redef fun item do return ns[pns]
726
727 redef fun next do
728 pns -= 1
729 pos -= 1
730 if pns >= 0 then return
731 if not subs.is_ok then return
732 subs.next
733 if not subs.is_ok then return
734 ns = subs.item
735 pns = ns.length - 1
736 end
737 end
738
739 # Forward iterator on the chars of a `Rope`
740 private class RopeCharIterator
741 super IndexedIterator[Char]
742
743 # Position in current `String`
744 var pns: Int
745 # Current `String` being iterated on
746 var str: String
747 # Substrings of the Rope
748 var subs: IndexedIterator[String]
749 # Maximum position to iterate on (e.g. Rope.length)
750 var max: Int
751 # Position (char) in the Rope (0-indexed)
752 var pos: Int
753
754 init(root: Concat) is old_style_init do
755 subs = new RopeSubstrings(root)
756 pns = 0
757 str = subs.item
758 max = root.length - 1
759 pos = 0
760 end
761
762 init from(root: Concat, pos: Int) do
763 subs = new RopeSubstrings.from(root, pos)
764 pns = pos - subs.index
765 self.pos = pos
766 str = subs.item
767 max = root.length - 1
768 end
769
770 redef fun item do return str[pns]
771
772 redef fun is_ok do return pos <= max
773
774 redef fun index do return pos
775
776 redef fun next do
777 pns += 1
778 pos += 1
779 if pns < subs.item.length then return
780 if not subs.is_ok then return
781 subs.next
782 if not subs.is_ok then return
783 str = subs.item
784 pns = 0
785 end
786 end
787
788 # Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
789 private class ReverseRopeSubstrings
790 super IndexedIterator[FlatString]
791
792 # Visit Stack
793 var iter: RopeCharIteratorPiece is noinit
794 # Position in `Rope`
795 var pos: Int is noinit
796
797 # Current leaf
798 var str: FlatString is noinit
799
800 init(root: Concat) is old_style_init do
801 var r = new RopeCharIteratorPiece(root, false, true, null)
802 pos = root.length - 1
803 var lnod: String = root
804 loop
805 if lnod isa Concat then
806 lnod = lnod.right
807 r = new RopeCharIteratorPiece(lnod, false, true, r)
808 else
809 str = lnod.as(FlatString)
810 iter = r
811 break
812 end
813 end
814 end
815
816 init from(root: Concat, pos: Int) do
817 var r = new RopeCharIteratorPiece(root, false, true, null)
818 var rnod: String = root
819 var off = pos
820 loop
821 if rnod isa Concat then
822 if off >= rnod.left.length then
823 off -= rnod.left.length
824 rnod = rnod.right
825 r = new RopeCharIteratorPiece(rnod, false, true, r)
826 else
827 r.ldone = true
828 rnod = rnod.left
829 r = new RopeCharIteratorPiece(rnod, false, true, r)
830 end
831 else
832 str = rnod.as(FlatString)
833 r.ldone = true
834 iter = r
835 self.pos = pos - off
836 break
837 end
838 end
839 end
840
841 redef fun item do return str
842
843 redef fun index do return pos
844
845 redef fun is_ok do return pos >= 0
846
847 redef fun next do
848 if pos < 0 then return
849 var curr = iter.prev
850 var currit = curr.node
851 while curr != null do
852 currit = curr.node
853 if not currit isa Concat then
854 str = currit.as(FlatString)
855 pos -= str.length
856 iter = curr
857 return
858 end
859 if not curr.rdone then
860 curr.rdone = true
861 curr = new RopeCharIteratorPiece(currit.right, false, false, curr)
862 continue
863 end
864 if not curr.ldone then
865 curr.ldone = true
866 curr = new RopeCharIteratorPiece(currit.left, false, false, curr)
867 continue
868 end
869 curr = curr.prev
870 end
871 pos = -1
872 end
873 end
874
875 private class RopeBufSubstringIterator
876 super Iterator[FlatText]
877
878 # Iterator on the substrings of the building string
879 var iter: Iterator[FlatText]
880 # Makes a String out of the buffered part of the Ropebuffer
881 var nsstr: FlatString
882 # Did we attain the buffered part ?
883 var nsstr_done = false
884
885 init(str: RopeBuffer) is old_style_init do
886 iter = str.str.substrings
887 nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1)
888 if str.length == 0 then nsstr_done = true
889 end
890
891 redef fun is_ok do return iter.is_ok or not nsstr_done
892
893 redef fun item do
894 assert is_ok
895 if iter.is_ok then return iter.item
896 return nsstr
897 end
898
899 redef fun next do
900 if iter.is_ok then
901 iter.next
902 return
903 end
904 nsstr_done = true
905 end
906 end
907
908 # Substrings of a Rope (i.e. Postfix iterator on leaves)
909 private class RopeSubstrings
910 super IndexedIterator[FlatString]
911
912 # Visit Stack
913 var iter: RopeCharIteratorPiece is noinit
914 # Position in `Rope`
915 var pos: Int is noinit
916 # Maximum position in `Rope` (i.e. length - 1)
917 var max: Int is noinit
918
919 # Current leaf
920 var str: FlatString is noinit
921
922 init(root: Concat) is old_style_init do
923 var r = new RopeCharIteratorPiece(root, true, false, null)
924 pos = 0
925 max = root.length - 1
926 var rnod: String = root
927 loop
928 if rnod isa Concat then
929 rnod = rnod.left
930 r = new RopeCharIteratorPiece(rnod, true, false, r)
931 else
932 str = rnod.as(FlatString)
933 r.rdone = true
934 iter = r
935 break
936 end
937 end
938 end
939
940 init from(root: Concat, pos: Int) do
941 var r = new RopeCharIteratorPiece(root, true, false, null)
942 max = root.length - 1
943 var rnod: String = root
944 var off = pos
945 loop
946 if rnod isa Concat then
947 if off >= rnod.left.length then
948 r.rdone = true
949 off -= rnod.left.length
950 rnod = rnod.right
951 r = new RopeCharIteratorPiece(rnod, true, false, r)
952 else
953 rnod = rnod.left
954 r = new RopeCharIteratorPiece(rnod, true, false, r)
955 end
956 else
957 str = rnod.as(FlatString)
958 r.rdone = true
959 iter = r
960 self.pos = pos - off
961 break
962 end
963 end
964 end
965
966 redef fun item do return str
967
968 redef fun is_ok do return pos <= max
969
970 redef fun index do return pos
971
972 redef fun next do
973 pos += str.length
974 if pos > max then return
975 var it = iter.prev
976 var rnod = it.node
977 loop
978 if not rnod isa Concat then
979 it.ldone = true
980 it.rdone = true
981 str = rnod.as(FlatString)
982 iter = it.as(not null)
983 break
984 end
985 if not it.ldone then
986 rnod = rnod.left
987 it.ldone = true
988 it = new RopeCharIteratorPiece(rnod, false, false, it)
989 else if not it.rdone then
990 it.rdone = true
991 rnod = rnod.right
992 it = new RopeCharIteratorPiece(rnod, false, false, it)
993 else
994 it = it.prev
995 rnod = it.node
996 continue
997 end
998 end
999 end
1000 end
1001
1002 # Implementation of a `StringCharView` for `Concat` objects
1003 private class RopeChars
1004 super StringCharView
1005
1006 redef type SELFTYPE: Concat
1007
1008 redef fun [](i) do
1009 return target[i]
1010 end
1011
1012 redef fun iterator_from(i) do return new RopeCharIterator.from(target, i)
1013
1014 redef fun reverse_iterator_from(i) do return new RopeCharReverseIterator.from(target, i)
1015
1016 end
1017
1018 # Implementation of a `StringCharView` for `Concat` objects
1019 private class RopeBytes
1020 super StringByteView
1021
1022 redef type SELFTYPE: Concat
1023
1024 redef fun [](i) do
1025 var nod: String = target
1026 loop
1027 if nod isa FlatString then return nod.items[i]
1028 if not nod isa Concat then abort
1029 if nod.left.bytelen >= i then
1030 nod = nod.right
1031 else
1032 nod = nod.left
1033 end
1034 end
1035 end
1036
1037 redef fun iterator_from(i) do return new RopeByteIterator.from(target, i)
1038
1039 redef fun reverse_iterator_from(i) do return new RopeByteReverseIterator.from(target, i)
1040
1041 end
1042
1043 # An Iterator over a RopeBuffer.
1044 class RopeBufferCharIterator
1045 super IndexedIterator[Char]
1046
1047 # Subiterator.
1048 var sit: IndexedIterator[Char]
1049
1050 redef fun index do return sit.index
1051
1052 # Init the iterator from a RopeBuffer.
1053 init(t: RopeBuffer) is old_style_init do
1054 t.persist_buffer
1055 sit = t.str.chars.iterator
1056 end
1057
1058 # Init the iterator from a RopeBuffer starting from `pos`.
1059 init from(t: RopeBuffer, pos: Int) do
1060 t.persist_buffer
1061 sit = t.str.chars.iterator_from(pos)
1062 end
1063
1064 redef fun is_ok do return sit.is_ok
1065
1066 redef fun item do
1067 assert is_ok
1068 return sit.item
1069 end
1070
1071 redef fun next do sit.next
1072 end
1073
1074 # Reverse iterator over a RopeBuffer.
1075 class RopeBufferCharReverseIterator
1076 super IndexedIterator[Char]
1077
1078 # Subiterator.
1079 var sit: IndexedIterator[Char]
1080
1081 redef fun index do return sit.index
1082
1083 # Init the iterator from a RopeBuffer.
1084 init(tgt: RopeBuffer) is old_style_init do
1085 tgt.persist_buffer
1086 sit = tgt.str.chars.reverse_iterator
1087 end
1088
1089 # Init the iterator from a RopeBuffer starting from `pos`.
1090 init from(tgt: RopeBuffer, pos: Int) do
1091 tgt.persist_buffer
1092 sit = tgt.str.chars.reverse_iterator_from(pos)
1093 end
1094
1095 redef fun is_ok do return sit.is_ok
1096
1097 redef fun item do
1098 assert is_ok
1099 return sit.item
1100 end
1101
1102 redef fun next do sit.next
1103 end
1104
1105 # View on the chars of a `RopeBuffer`
1106 class RopeBufferChars
1107 super BufferCharView
1108
1109 redef type SELFTYPE: RopeBuffer
1110
1111 redef fun [](i) do return target[i]
1112
1113 redef fun []=(i,c) do target[i] = c
1114
1115 redef fun add(c) do target.add c
1116
1117 redef fun push(c) do target.add c
1118
1119 redef fun iterator_from(i) do return new RopeBufferCharIterator.from(target, i)
1120
1121 redef fun reverse_iterator_from(i) do return new RopeBufferCharReverseIterator.from(target, i)
1122 end
1123
1124 # An Iterator over a RopeBuffer.
1125 class RopeBufferByteIterator
1126 super IndexedIterator[Byte]
1127
1128 # Subiterator.
1129 var sit: IndexedIterator[Byte]
1130
1131 # Native string iterated over.
1132 var ns: NativeString
1133
1134 # Current position in `ns`.
1135 var pns: Int
1136
1137 # Maximum position iterable.
1138 var maxpos: Int
1139
1140 redef var index
1141
1142 # Init the iterator from a RopeBuffer.
1143 init(t: RopeBuffer) is old_style_init do
1144 ns = t.ns
1145 maxpos = t.bytelen
1146 sit = t.str.bytes.iterator
1147 pns = t.dumped
1148 index = 0
1149 end
1150
1151 # Init the iterator from a RopeBuffer starting from `pos`.
1152 init from(t: RopeBuffer, pos: Int) do
1153 ns = t.ns
1154 maxpos = t.bytelen
1155 sit = t.str.bytes.iterator_from(pos)
1156 pns = pos - t.str.length
1157 index = pos
1158 end
1159
1160 redef fun is_ok do return index < maxpos
1161
1162 redef fun item do
1163 if sit.is_ok then return sit.item
1164 return ns[pns]
1165 end
1166
1167 redef fun next do
1168 index += 1
1169 if sit.is_ok then
1170 sit.next
1171 else
1172 pns += 1
1173 end
1174 end
1175 end
1176
1177 # Reverse iterator over a RopeBuffer.
1178 class RopeBufferByteReverseIterator
1179 super IndexedIterator[Byte]
1180
1181 # Subiterator.
1182 var sit: IndexedIterator[Byte]
1183
1184 # Native string iterated over.
1185 var ns: NativeString
1186
1187 # Current position in `ns`.
1188 var pns: Int
1189
1190 redef var index
1191
1192 # Init the iterator from a RopeBuffer.
1193 init(tgt: RopeBuffer) is old_style_init do
1194 sit = tgt.str.bytes.reverse_iterator
1195 pns = tgt.rpos - 1
1196 index = tgt.bytelen - 1
1197 ns = tgt.ns
1198 end
1199
1200 # Init the iterator from a RopeBuffer starting from `pos`.
1201 init from(tgt: RopeBuffer, pos: Int) do
1202 sit = tgt.str.bytes.reverse_iterator_from(pos - (tgt.rpos - tgt.dumped))
1203 pns = pos - tgt.str.bytelen + tgt.rpos
1204 index = pos
1205 ns = tgt.ns
1206 end
1207
1208 redef fun is_ok do return index >= 0
1209
1210 redef fun item do
1211 if pns >= 0 then return ns[pns]
1212 return sit.item
1213 end
1214
1215 redef fun next do
1216 index -= 1
1217 if pns > 0 then
1218 pns -= 1
1219 else
1220 sit.next
1221 end
1222 end
1223 end
1224
1225 # View on the chars of a `RopeBuffer`
1226 class RopeBufferBytes
1227 super BufferByteView
1228
1229 redef type SELFTYPE: RopeBuffer
1230
1231 redef fun [](i) do
1232 if i < target.str.bytelen then
1233 return target.str.bytes[i]
1234 else
1235 return target.ns[i - target.str.bytelen]
1236 end
1237 end
1238
1239 redef fun []=(i,c) do
1240 if i == target.length then target.add_byte c
1241 if i < target.str.length then
1242 # FIXME: Will need to be optimized and rewritten with Unicode
1243 var s = target.str
1244 var l = s.substring(0, i)
1245 var r = s.substring_from(i + 1)
1246 target.str = l + c.to_i.ascii.to_s + r
1247 else
1248 target.ns[i - target.str.length] = c
1249 end
1250 end
1251
1252 redef fun add(c) do target.add_byte c
1253
1254 redef fun push(c) do target.add_byte c
1255
1256 redef fun iterator_from(i) do return new RopeBufferByteIterator.from(target, i)
1257
1258 redef fun reverse_iterator_from(i) do return new RopeBufferByteReverseIterator.from(target, i)
1259 end