lib/standard/ropes: Removed a few old-style-inits
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Tree-based representation of a String.
16 #
17 # Ropes are a data structure introduced in a 1995 paper from
18 # Hans J. Boehm, Russ Atkinson and Michael Plass.
19 #
20 # See:
21 #
22 # > Ropes: an Alternative to Strings,
23 # > *Software - Practice and Experience*,
24 # > Vol. 25(12), 1315-1330 (December 1995).
25 #
26 # The implementation developed here provides an automatic change
27 # of data structure depending on the length of the leaves.
28 #
29 # The length from which a `Rope` is built from a `flat` string
30 # if defined at top-level (see `maxlen`) and can be redefined by clients
31 # depending on their needs (e.g. if you need to bench the speed of
32 # the creation of concat nodes, lower the size of maxlen).
33 #
34 # A Rope as defined in the original paper is a Tree made of concatenation
35 # nodes and containing `Flat` (that is Array-based) strings as Leaves.
36 #
37 # Example :
38 #
39 # Concat
40 # / \
41 # Concat Concat
42 # / \ / \
43 # "My" " Name" " is" " Simon."
44 #
45 # Note that the above example is not representative of the actual implementation
46 # of `Ropes`, since short leaves are merged to keep the rope at an acceptable
47 # height (hence, this rope here might actually be a `FlatString` !).
48 module ropes
49
50 intrude import string
51
52 # Maxlen is the maximum length of a Leaf node
53 #
54 # When concatenating two leaves, if `new_length` > `maxlen`,
55 # A `Concat` node is created instead
56 #
57 # Its purpose is to limit the depth of the `Rope` (this
58 # improves performance when accessing/iterating).
59 fun maxlen: Int do return 64
60
61 # String using a tree-based representation with leaves as `FlatStrings`
62 private abstract class Rope
63 super Text
64 end
65
66 private abstract class RopeString
67 super Rope
68 super String
69
70 redef fun chars is cached do return new RopeChars(self)
71 end
72
73 # Node that represents a concatenation between two `String`
74 private class Concat
75 super RopeString
76
77 redef var length: Int is noinit
78
79 redef fun substrings do return new RopeSubstrings(self)
80
81 redef fun empty do return ""
82
83 redef fun to_cstring is cached do
84 var len = length
85 var ns = new NativeString(len + 1)
86 ns[len] = '\0'
87 var off = 0
88 for i in substrings do
89 var ilen = i.length
90 i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).index_from, off)
91 off += ilen
92 end
93 return ns
94 end
95
96 # Left child of the node
97 var left: String
98 # Right child of the node
99 var right: String
100
101 init do
102 length = left.length + right.length
103 end
104
105 redef fun output do
106 left.output
107 right.output
108 end
109
110 redef fun iterator do return new RopeIter(self)
111
112 redef fun *(i) do
113 var x: String = self
114 for j in [1 .. i[ do x += self
115 return x
116 end
117
118 redef fun [](i) do
119 var llen = left.length
120 if i >= llen then return right[i - llen]
121 return left[i]
122 end
123
124 redef fun substring(from, len) do
125 var llen = left.length
126 if from < llen then
127 if from + len < llen then return left.substring(from,len)
128 var lsublen = llen - from
129 return left.substring_from(from) + right.substring(0, len - lsublen)
130 else
131 return right.substring(from - llen, len)
132 end
133 end
134
135 redef fun reversed do return new Concat(right.reversed, left.reversed)
136
137 redef fun insert_at(s, pos) do
138 if pos > left.length then
139 return left + right.insert_at(s, pos - left.length)
140 end
141 return left.insert_at(s, pos) + right
142 end
143
144 redef fun to_upper do return new Concat(left.to_upper, right.to_upper)
145
146 redef fun to_lower do return new Concat(left.to_lower, right.to_lower)
147
148 redef fun +(o) do
149 var s = o.to_s
150 var slen = s.length
151 if s isa Concat then
152 return new Concat(self, s)
153 else
154 var r = right
155 var rlen = r.length
156 if rlen + slen > maxlen then return new Concat(self, s)
157 return new Concat(left, r + s)
158 end
159 end
160 end
161
162 # Mutable `Rope`, optimized for concatenation operations
163 #
164 # A `RopeBuffer` is an efficient way of building a `String` when
165 # concatenating small strings.
166 #
167 # It does concatenations in an optimized way by having a
168 # mutable part and an immutable part built by efficiently
169 # concatenating strings in chain.
170 #
171 # Every concatenation operation is done by copying a string to
172 # the mutable part and flushing it when full.
173 #
174 # However, when a long string is appended to the `Buffer`,
175 # the concatenation is done at it would be in a `Rope`.
176 class RopeBuffer
177 super Rope
178 super Buffer
179
180 redef fun chars: Sequence[Char] is cached do return new RopeBufferChars(self)
181
182 # The final string being built on the fly
183 private var str: String is noinit
184
185 # Current concatenation buffer
186 private var ns: NativeString is noinit
187
188 # Next available (e.g. unset) character in the `Buffer`
189 private var rpos = 0
190
191 # Keeps track of the buffer's currently dumped part
192 #
193 # This might happen if for instance, a String was being
194 # built by concatenating small parts of string and suddenly
195 # a long string (length > maxlen) is appended.
196 private var dumped: Int is noinit
197
198 # Length of the complete rope
199 redef var length = 0
200
201 # Length of the mutable part
202 #
203 # Is also used as base to compute the size of the next
204 # mutable native string (`ns`)
205 private var buf_size: Int is noinit
206
207 redef fun substrings: Iterator[String] do return new RopeBufSubstringIterator(self)
208
209 # Builds an empty `RopeBuffer`
210 init do
211 str = ""
212 ns = new NativeString(maxlen)
213 buf_size = maxlen
214 dumped = 0
215 end
216
217 # Builds a new `RopeBuffer` with `str` in it.
218 init from(str: String) do
219 self.str = str
220 ns = new NativeString(maxlen)
221 buf_size = maxlen
222 length = str.length
223 dumped = 0
224 end
225
226 # Resets the informations of the `Buffer`
227 #
228 # This is called when doing in-place modifications
229 # on a previously to_s'd `RopeBuffer`
230 private fun reset do
231 var nns = new NativeString(buf_size)
232 var blen = rpos - dumped
233 ns.copy_to(nns, blen, dumped, 0)
234 ns = nns
235 dumped = 0
236 rpos = blen
237 written = false
238 end
239
240 redef fun empty do return new RopeBuffer
241
242 redef fun clear do
243 str = ""
244 length = 0
245 rpos = 0
246 dumped = 0
247 if written then
248 ns = new NativeString(buf_size)
249 written = false
250 end
251 end
252
253 redef fun substring(from, count) do
254 var strlen = str.length
255
256 if from < 0 then
257 count += from
258 if count < 0 then count = 0
259 from = 0
260 end
261
262 if count > length then count = length - from
263
264 if count == 0 then return empty
265
266 if from < strlen then
267 var subpos = strlen - from
268 if count <= subpos then
269 return new RopeBuffer.from(str.substring(from, count))
270 else
271 var l = str.substring_from(from)
272 var rem = count - subpos
273 var nns = new NativeString(rem)
274 ns.copy_to(nns, rem, dumped, 0)
275 return new RopeBuffer.from(l + nns.to_s_with_length(rem))
276 end
277 else
278 var nns = new NativeString(count)
279 ns.copy_to(nns, count, dumped, 0)
280 return new RopeBuffer.from(nns.to_s_with_length(count))
281 end
282 end
283
284 redef fun append(s) do
285 var slen = s.length
286 length += slen
287 var rp = rpos
288 if s isa Rope or slen > maxlen then
289 if rp > 0 and dumped != rp then
290 str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1)
291 dumped = rp
292 end
293 str = str + s
294 return
295 end
296 var remsp = buf_size - rp
297 var sits: NativeString
298 var begin: Int
299 if s isa FlatString then
300 begin = s.index_from
301 sits = s.items
302 else if s isa FlatBuffer then
303 begin = 0
304 sits = s.items
305 else
306 if slen <= remsp then
307 for i in s.chars do
308 ns[rpos] = i
309 rpos += 1
310 end
311 else
312 var spos = 0
313 for i in [0..remsp[ do
314 ns[rpos] = s[spos]
315 rpos += 1
316 spos += 1
317 end
318 dump_buffer
319 while spos < slen do
320 ns[rpos] = s[spos]
321 spos += 1
322 rpos += 1
323 end
324 end
325 return
326 end
327 if slen <= remsp then
328 if remsp <= 0 then
329 dump_buffer
330 rpos = 0
331 else
332 sits.copy_to(ns, slen, begin, rp)
333 rpos += slen
334 end
335 else
336 sits.copy_to(ns, remsp, begin, rp)
337 rpos = buf_size
338 dump_buffer
339 var nlen = slen - remsp
340 sits.copy_to(ns, nlen, begin + remsp, 0)
341 rpos = nlen
342 end
343 end
344
345 redef fun add(c) do
346 var rp = rpos
347 if rp >= buf_size then
348 dump_buffer
349 rp = 0
350 end
351 ns[rp] = c
352 rp += 1
353 length += 1
354 rpos = rp
355 end
356
357 # Converts the Buffer to a FlatString, appends it to
358 # the final String and re-allocates a new larger Buffer.
359 private fun dump_buffer do
360 written = false
361 var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
362 str += nstr
363 var bs = buf_size
364 bs = bs * 2
365 ns = new NativeString(bs)
366 buf_size = bs
367 dumped = 0
368 end
369
370 redef fun output do
371 str.output
372 new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output
373 end
374
375 # Enlarge is useless here since the `Buffer`
376 # part is automatically dumped when necessary.
377 #
378 # Also, since the buffer can not be overused by a
379 # single string, there is no need for manual
380 # resizing.
381 #
382 # "You have no power here !"
383 redef fun enlarge(i) do end
384
385 redef fun to_s do
386 written = true
387 var nnslen = rpos - dumped
388 if nnslen == 0 then return str
389 return str + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
390 end
391
392 redef fun reverse do
393 # Flush the buffer in order to only have to reverse `str`.
394 if rpos > 0 and dumped != rpos then
395 str += new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
396 dumped = rpos
397 end
398 str = str.reversed
399 end
400
401 redef fun upper do
402 if written then reset
403 str = str.to_upper
404 var mits = ns
405 for i in [0 .. rpos[ do
406 mits[i] = mits[i].to_upper
407 end
408 end
409
410 redef fun lower do
411 if written then reset
412 str = str.to_lower
413 var mits = ns
414 for i in [0 .. rpos[ do
415 mits[i] = mits[i].to_lower
416 end
417 end
418 end
419
420 redef class FlatString
421
422 redef fun insert_at(s, pos) do
423 var l = substring(0, pos)
424 var r = substring_from(pos)
425 return l + s + r
426 end
427
428 redef fun +(o) do
429 var s = o.to_s
430 var slen = s.length
431 var mlen = length
432 if slen == 0 then return self
433 if mlen == 0 then return s
434 var nlen = slen + mlen
435 if s isa FlatString then
436 if nlen > maxlen then return new Concat(self, s)
437 var mits = items
438 var sifrom = s.index_from
439 var mifrom = index_from
440 var sits = s.items
441 var ns = new NativeString(nlen + 1)
442 mits.copy_to(ns, mlen, mifrom, 0)
443 sits.copy_to(ns, slen, sifrom, mlen)
444 return ns.to_s_with_length(nlen)
445 else if s isa Concat then
446 var sl = s.left
447 var sllen = sl.length
448 if sllen + mlen > maxlen then return new Concat(self, s)
449 return new Concat(self + sl, s.right)
450 else
451 abort
452 end
453 end
454 end
455
456 # A simple linked list for use with iterators
457 private class RopeIterPiece
458 # The encapsulated node of the `Rope`
459 var node: String
460 # Was its left child (if any) visited ?
461 var ldone: Bool
462 # Was its right child (if any) visited ?
463 var rdone: Bool
464 # The previous node in the list.
465 var prev: nullable RopeIterPiece
466 end
467
468 # A reverse iterator capable of working with `Rope` objects
469 private class RopeReviter
470 super IndexedIterator[Char]
471
472 # Current NativeString
473 var ns: String
474 # Current position in NativeString
475 var pns: Int
476 # Position in the Rope (0-indexed)
477 var pos: Int
478 # Iterator on the substrings, does the Postfix part of
479 # the Rope traversal.
480 var subs: IndexedIterator[String]
481
482 init(root: RopeString) is old_style_init do
483 pos = root.length - 1
484 subs = new ReverseRopeSubstrings(root)
485 ns = subs.item
486 pns = ns.length - 1
487 end
488
489 init from(root: RopeString, pos: Int) do
490 self.pos = pos
491 subs = new ReverseRopeSubstrings.from(root, pos)
492 ns = subs.item
493 pns = pos - subs.index
494 end
495
496 redef fun index do return pos
497
498 redef fun is_ok do return pos >= 0
499
500 redef fun item do return ns[pns]
501
502 redef fun next do
503 pns -= 1
504 pos -= 1
505 if pns >= 0 then return
506 if not subs.is_ok then return
507 subs.next
508 if not subs.is_ok then return
509 ns = subs.item
510 pns = ns.length - 1
511 end
512 end
513
514 # Forward iterator on the chars of a `Rope`
515 private class RopeIter
516 super IndexedIterator[Char]
517
518 # Position in current `String`
519 var pns: Int
520 # Current `String` being iterated on
521 var str: String
522 # Substrings of the Rope
523 var subs: IndexedIterator[String]
524 # Maximum position to iterate on (e.g. Rope.length)
525 var max: Int
526 # Position (char) in the Rope (0-indexed)
527 var pos: Int
528
529 init(root: RopeString) is old_style_init do
530 subs = new RopeSubstrings(root)
531 pns = 0
532 str = subs.item
533 max = root.length - 1
534 pos = 0
535 end
536
537 init from(root: RopeString, pos: Int) do
538 subs = new RopeSubstrings.from(root, pos)
539 pns = pos - subs.index
540 self.pos = pos
541 str = subs.item
542 max = root.length - 1
543 end
544
545 redef fun item do return str[pns]
546
547 redef fun is_ok do return pos <= max
548
549 redef fun index do return pos
550
551 redef fun next do
552 pns += 1
553 pos += 1
554 if pns < subs.item.length then return
555 if not subs.is_ok then return
556 subs.next
557 if not subs.is_ok then return
558 str = subs.item
559 pns = 0
560 end
561 end
562
563 # Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
564 private class ReverseRopeSubstrings
565 super IndexedIterator[String]
566
567 # Visit Stack
568 var iter: RopeIterPiece is noinit
569 # Position in `Rope`
570 var pos: Int is noinit
571
572 # Current leaf
573 var str: String is noinit
574
575 init(root: RopeString) is old_style_init do
576 var r = new RopeIterPiece(root, false, true, null)
577 pos = root.length - 1
578 var lnod: String = root
579 loop
580 if lnod isa Concat then
581 lnod = lnod.right
582 r = new RopeIterPiece(lnod, false, true, r)
583 else
584 str = lnod
585 iter = r
586 break
587 end
588 end
589 end
590
591 init from(root: RopeString, pos: Int) do
592 var r = new RopeIterPiece(root, false, true, null)
593 var rnod: String = root
594 var off = pos
595 loop
596 if rnod isa Concat then
597 if off >= rnod.left.length then
598 off -= rnod.left.length
599 rnod = rnod.right
600 r = new RopeIterPiece(rnod, false, true, r)
601 else
602 r.ldone = true
603 rnod = rnod.left
604 r = new RopeIterPiece(rnod, false, true, r)
605 end
606 else
607 str = rnod
608 r.ldone = true
609 iter = r
610 self.pos = pos - off
611 break
612 end
613 end
614 end
615
616 redef fun item do return str
617
618 redef fun index do return pos
619
620 redef fun is_ok do return pos >= 0
621
622 redef fun next do
623 if pos < 0 then return
624 var curr = iter.prev
625 var currit = curr.node
626 while curr != null do
627 currit = curr.node
628 if not currit isa Concat then
629 str = currit
630 pos -= str.length
631 iter = curr
632 return
633 end
634 if not curr.rdone then
635 curr.rdone = true
636 curr = new RopeIterPiece(currit.right, false, false, curr)
637 continue
638 end
639 if not curr.ldone then
640 curr.ldone = true
641 curr = new RopeIterPiece(currit.left, false, false, curr)
642 continue
643 end
644 curr = curr.prev
645 end
646 pos = -1
647 end
648 end
649
650 private class RopeBufSubstringIterator
651 super Iterator[String]
652
653 # Iterator on the substrings of the building string
654 var iter: Iterator[String]
655 # Makes a String out of the buffered part of the Ropebuffer
656 var nsstr: String
657 # Did we attain the buffered part ?
658 var nsstr_done = false
659
660 init(str: RopeBuffer) is old_style_init do
661 iter = str.str.substrings
662 nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1)
663 if str.length == 0 then nsstr_done = true
664 end
665
666 redef fun is_ok do return iter.is_ok or not nsstr_done
667
668 redef fun item do
669 assert is_ok
670 if iter.is_ok then return iter.item
671 return nsstr
672 end
673
674 redef fun next do
675 if iter.is_ok then
676 iter.next
677 return
678 end
679 nsstr_done = true
680 end
681 end
682
683 # Substrings of a Rope (i.e. Postfix iterator on leaves)
684 private class RopeSubstrings
685 super IndexedIterator[String]
686
687 # Visit Stack
688 var iter: RopeIterPiece is noinit
689 # Position in `Rope`
690 var pos: Int is noinit
691 # Maximum position in `Rope` (i.e. length - 1)
692 var max: Int is noinit
693
694 # Current leaf
695 var str: String is noinit
696
697 init(root: RopeString) is old_style_init do
698 var r = new RopeIterPiece(root, true, false, null)
699 pos = 0
700 max = root.length - 1
701 var rnod: String = root
702 loop
703 if rnod isa Concat then
704 rnod = rnod.left
705 r = new RopeIterPiece(rnod, true, false, r)
706 else
707 str = rnod
708 r.rdone = true
709 iter = r
710 break
711 end
712 end
713 end
714
715 init from(root: RopeString, pos: Int) do
716 var r = new RopeIterPiece(root, true, false, null)
717 max = root.length - 1
718 var rnod: String = root
719 var off = pos
720 loop
721 if rnod isa Concat then
722 if off >= rnod.left.length then
723 r.rdone = true
724 off -= rnod.left.length
725 rnod = rnod.right
726 r = new RopeIterPiece(rnod, true, false, r)
727 else
728 rnod = rnod.left
729 r = new RopeIterPiece(rnod, true, false, r)
730 end
731 else
732 str = rnod
733 r.rdone = true
734 iter = r
735 self.pos = pos - off
736 break
737 end
738 end
739 end
740
741 redef fun item do return str
742
743 redef fun is_ok do return pos <= max
744
745 redef fun index do return pos
746
747 redef fun next do
748 pos += str.length
749 if pos > max then return
750 var it = iter.prev
751 var rnod = it.node
752 loop
753 if not rnod isa Concat then
754 it.ldone = true
755 it.rdone = true
756 str = rnod
757 iter = it.as(not null)
758 break
759 end
760 if not it.ldone then
761 rnod = rnod.left
762 it.ldone = true
763 it = new RopeIterPiece(rnod, false, false, it)
764 else if not it.rdone then
765 it.rdone = true
766 rnod = rnod.right
767 it = new RopeIterPiece(rnod, false, false, it)
768 else
769 it = it.prev
770 rnod = it.node
771 continue
772 end
773 end
774 end
775 end
776
777 # Implementation of a `StringCharView` for `RopeString` objects
778 private class RopeChars
779 super StringCharView
780
781 redef type SELFTYPE: RopeString
782
783 redef fun [](i) do
784 return target[i]
785 end
786
787 redef fun iterator_from(i) do return new RopeIter.from(target, i)
788
789 redef fun reverse_iterator_from(i) do return new RopeReviter.from(target, i)
790
791 end
792
793 # An Iterator over a RopeBuffer.
794 class RopeBufferIter
795 super IndexedIterator[Char]
796
797 # Subiterator.
798 var sit: IndexedIterator[Char]
799
800 # Native string iterated over.
801 var ns: NativeString
802
803 # Current position in `ns`.
804 var pns: Int
805
806 # Maximum position iterable.
807 var maxpos: Int
808
809 redef var index: Int
810
811 # Init the iterator from a RopeBuffer.
812 init(t: RopeBuffer) is old_style_init do
813 ns = t.ns
814 maxpos = t.rpos
815 sit = t.str.chars.iterator
816 pns = t.dumped
817 index = 0
818 end
819
820 # Init the iterator from a RopeBuffer starting from `pos`.
821 init from(t: RopeBuffer, pos: Int) do
822 ns = t.ns
823 maxpos = t.length
824 sit = t.str.chars.iterator_from(pos)
825 pns = pos - t.str.length
826 index = pos
827 end
828
829 redef fun is_ok do return index < maxpos
830
831 redef fun item do
832 if sit.is_ok then return sit.item
833 return ns[pns]
834 end
835
836 redef fun next do
837 index += 1
838 if sit.is_ok then
839 sit.next
840 else
841 pns += 1
842 end
843 end
844 end
845
846 # Reverse iterator over a RopeBuffer.
847 class RopeBufferReviter
848 super IndexedIterator[Char]
849
850 # Subiterator.
851 var sit: IndexedIterator[Char]
852
853 # Native string iterated over.
854 var ns: NativeString
855
856 # Current position in `ns`.
857 var pns: Int
858
859 redef var index: Int
860
861 # Init the iterator from a RopeBuffer.
862 init(tgt: RopeBuffer) is old_style_init do
863 sit = tgt.str.chars.reverse_iterator
864 pns = tgt.rpos - 1
865 index = tgt.length - 1
866 ns = tgt.ns
867 end
868
869 # Init the iterator from a RopeBuffer starting from `pos`.
870 init from(tgt: RopeBuffer, pos: Int) do
871 sit = tgt.str.chars.reverse_iterator_from(pos - tgt.rpos - tgt.dumped)
872 pns = pos - tgt.str.length
873 index = pos
874 ns = tgt.ns
875 end
876
877 redef fun is_ok do return index > 0
878
879 redef fun item do
880 if pns >= 0 then return ns[pns]
881 return sit.item
882 end
883
884 redef fun next do
885 index -= 1
886 if pns >= 0 then
887 pns -= 1
888 else
889 sit.next
890 end
891 end
892 end
893
894 # View on the chars of a `RopeBuffer`
895 class RopeBufferChars
896 super BufferCharView
897
898 redef type SELFTYPE: RopeBuffer
899
900 redef fun [](i) do
901 if i < target.str.length then
902 return target.str[i]
903 else
904 return target.ns[i - target.str.length]
905 end
906 end
907
908 redef fun []=(i,c) do
909 if i == target.length then target.add c
910 if i < target.str.length then
911 var s = target.str
912 var l = s.substring(0, i)
913 var r = s.substring_from(i + 1)
914 target.str = l + c.to_s + r
915 else
916 target.ns[i - target.str.length] = c
917 end
918 end
919
920 redef fun add(c) do target.add c
921
922 redef fun push(c) do target.add c
923
924 redef fun iterator_from(i) do return new RopeBufferIter.from(target, i)
925
926 redef fun reverse_iterator_from(i) do return new RopeBufferReviter.from(target, i)
927 end