68593fd82ba16e50813c8062209c84e3111ffcfb
[nit.git] / lib / standard / ropes.nit
1 # This file is part of NIT (http://www.nitlanguage.org).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Tree-based representation of a String.
16 #
17 # Ropes are a data structure introduced in a 1995 paper from
18 # Hans J. Boehm, Russ Atkinson and Michael Plass.
19 #
20 # See:
21 #
22 # > Ropes: an Alternative to Strings,
23 # > *Software - Practice and Experience*,
24 # > Vol. 25(12), 1315-1330 (December 1995).
25 #
26 # The implementation developed here provides an automatic change
27 # of data structure depending on the length of the leaves.
28 #
29 # The length from which a `Rope` is built from a `flat` string
30 # if defined at top-level (see `maxlen`) and can be redefined by clients
31 # depending on their needs (e.g. if you need to bench the speed of
32 # the creation of concat nodes, lower the size of maxlen).
33 #
34 # A Rope as defined in the original paper is a Tree made of concatenation
35 # nodes and containing `Flat` (that is Array-based) strings as Leaves.
36 #
37 # Example :
38 #
39 # Concat
40 # / \
41 # Concat Concat
42 # / \ / \
43 # "My" " Name" " is" " Simon."
44 #
45 # Note that the above example is not representative of the actual implementation
46 # of `Ropes`, since short leaves are merged to keep the rope at an acceptable
47 # height (hence, this rope here might actually be a `FlatString` !).
48 module ropes
49
50 intrude import string
51
52 # Maxlen is the maximum length of a Leaf node
53 #
54 # When concatenating two leaves, if `new_length` > `maxlen`,
55 # A `Concat` node is created instead
56 #
57 # Its purpose is to limit the depth of the `Rope` (this
58 # improves performance when accessing/iterating).
59 fun maxlen: Int do return 64
60
61 # String using a tree-based representation with leaves as `FlatStrings`
62 private abstract class Rope
63 super Text
64 end
65
66 private abstract class RopeString
67 super Rope
68 super String
69
70 redef fun chars is cached do return new RopeChars(self)
71 end
72
73 # Node that represents a concatenation between two `String`
74 private class Concat
75 super RopeString
76
77 redef var length: Int
78
79 redef fun substrings do return new RopeSubstrings(self)
80
81 redef fun empty do return ""
82
83 redef fun to_cstring is cached do
84 var len = length
85 var ns = new NativeString(len + 1)
86 ns[len] = '\0'
87 var off = 0
88 for i in substrings do
89 var ilen = i.length
90 i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).index_from, off)
91 off += ilen
92 end
93 return ns
94 end
95
96 # Left child of the node
97 var left: String
98 # Right child of the node
99 var right: String
100
101 init(l: String, r: String) is old_style_init do
102 left = l
103 right = r
104 length = l.length + r.length
105 end
106
107 redef fun output do
108 left.output
109 right.output
110 end
111
112 redef fun iterator do return new RopeIter(self)
113
114 redef fun *(i) do
115 var x: String = self
116 for j in [1 .. i[ do x += self
117 return x
118 end
119
120 redef fun [](i) do
121 var llen = left.length
122 if i >= llen then return right[i - llen]
123 return left[i]
124 end
125
126 redef fun substring(from, len) do
127 var llen = left.length
128 if from < llen then
129 if from + len < llen then return left.substring(from,len)
130 var lsublen = llen - from
131 return left.substring_from(from) + right.substring(0, len - lsublen)
132 else
133 return right.substring(from - llen, len)
134 end
135 end
136
137 redef fun reversed do return new Concat(right.reversed, left.reversed)
138
139 redef fun insert_at(s, pos) do
140 if pos > left.length then
141 return left + right.insert_at(s, pos - left.length)
142 end
143 return left.insert_at(s, pos) + right
144 end
145
146 redef fun to_upper do return new Concat(left.to_upper, right.to_upper)
147
148 redef fun to_lower do return new Concat(left.to_lower, right.to_lower)
149
150 redef fun +(o) do
151 var s = o.to_s
152 var slen = s.length
153 if s isa Concat then
154 return new Concat(self, s)
155 else
156 var r = right
157 var rlen = r.length
158 if rlen + slen > maxlen then return new Concat(self, s)
159 return new Concat(left, r + s)
160 end
161 end
162 end
163
164 # Mutable `Rope`, optimized for concatenation operations
165 #
166 # A `RopeBuffer` is an efficient way of building a `String` when
167 # concatenating small strings.
168 #
169 # It does concatenations in an optimized way by having a
170 # mutable part and an immutable part built by efficiently
171 # concatenating strings in chain.
172 #
173 # Every concatenation operation is done by copying a string to
174 # the mutable part and flushing it when full.
175 #
176 # However, when a long string is appended to the `Buffer`,
177 # the concatenation is done at it would be in a `Rope`.
178 class RopeBuffer
179 super Rope
180 super Buffer
181
182 redef fun chars: Sequence[Char] is cached do return new RopeBufferChars(self)
183
184 # The final string being built on the fly
185 private var str: String is noinit
186
187 # Current concatenation buffer
188 private var ns: NativeString is noinit
189
190 # Next available (e.g. unset) character in the `Buffer`
191 private var rpos = 0
192
193 # Keeps track of the buffer's currently dumped part
194 #
195 # This might happen if for instance, a String was being
196 # built by concatenating small parts of string and suddenly
197 # a long string (length > maxlen) is appended.
198 private var dumped: Int is noinit
199
200 # Length of the complete rope
201 redef var length = 0
202
203 # Length of the mutable part
204 #
205 # Is also used as base to compute the size of the next
206 # mutable native string (`ns`)
207 private var buf_size: Int is noinit
208
209 redef fun substrings: Iterator[String] do return new RopeBufSubstringIterator(self)
210
211 # Builds an empty `RopeBuffer`
212 init do
213 str = ""
214 ns = new NativeString(maxlen)
215 buf_size = maxlen
216 dumped = 0
217 end
218
219 # Builds a new `RopeBuffer` with `str` in it.
220 init from(str: String) do
221 self.str = str
222 ns = new NativeString(maxlen)
223 buf_size = maxlen
224 length = str.length
225 dumped = 0
226 end
227
228 # Resets the informations of the `Buffer`
229 #
230 # This is called when doing in-place modifications
231 # on a previously to_s'd `RopeBuffer`
232 private fun reset do
233 var nns = new NativeString(buf_size)
234 var blen = rpos - dumped
235 ns.copy_to(nns, blen, dumped, 0)
236 ns = nns
237 dumped = 0
238 rpos = blen
239 written = false
240 end
241
242 redef fun empty do return new RopeBuffer
243
244 redef fun clear do
245 str = ""
246 length = 0
247 rpos = 0
248 dumped = 0
249 if written then
250 ns = new NativeString(buf_size)
251 written = false
252 end
253 end
254
255 redef fun substring(from, count) do
256 var strlen = str.length
257
258 if from < 0 then
259 count += from
260 if count < 0 then count = 0
261 from = 0
262 end
263
264 if count > length then count = length - from
265
266 if count == 0 then return empty
267
268 if from < strlen then
269 var subpos = strlen - from
270 if count <= subpos then
271 return new RopeBuffer.from(str.substring(from, count))
272 else
273 var l = str.substring_from(from)
274 var rem = count - subpos
275 var nns = new NativeString(rem)
276 ns.copy_to(nns, rem, dumped, 0)
277 return new RopeBuffer.from(l + nns.to_s_with_length(rem))
278 end
279 else
280 var nns = new NativeString(count)
281 ns.copy_to(nns, count, dumped, 0)
282 return new RopeBuffer.from(nns.to_s_with_length(count))
283 end
284 end
285
286 redef fun append(s) do
287 var slen = s.length
288 length += slen
289 var rp = rpos
290 if s isa Rope or slen > maxlen then
291 if rp > 0 and dumped != rp then
292 str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1)
293 dumped = rp
294 end
295 str = str + s
296 return
297 end
298 var remsp = buf_size - rp
299 var sits: NativeString
300 var begin: Int
301 if s isa FlatString then
302 begin = s.index_from
303 sits = s.items
304 else if s isa FlatBuffer then
305 begin = 0
306 sits = s.items
307 else
308 if slen <= remsp then
309 for i in s.chars do
310 ns[rpos] = i
311 rpos += 1
312 end
313 else
314 var spos = 0
315 for i in [0..remsp[ do
316 ns[rpos] = s[spos]
317 rpos += 1
318 spos += 1
319 end
320 dump_buffer
321 while spos < slen do
322 ns[rpos] = s[spos]
323 spos += 1
324 rpos += 1
325 end
326 end
327 return
328 end
329 if slen <= remsp then
330 if remsp <= 0 then
331 dump_buffer
332 rpos = 0
333 else
334 sits.copy_to(ns, slen, begin, rp)
335 rpos += slen
336 end
337 else
338 sits.copy_to(ns, remsp, begin, rp)
339 rpos = buf_size
340 dump_buffer
341 var nlen = slen - remsp
342 sits.copy_to(ns, nlen, begin + remsp, 0)
343 rpos = nlen
344 end
345 end
346
347 redef fun add(c) do
348 var rp = rpos
349 if rp >= buf_size then
350 dump_buffer
351 rp = 0
352 end
353 ns[rp] = c
354 rp += 1
355 length += 1
356 rpos = rp
357 end
358
359 # Converts the Buffer to a FlatString, appends it to
360 # the final String and re-allocates a new larger Buffer.
361 private fun dump_buffer do
362 written = false
363 var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
364 str += nstr
365 var bs = buf_size
366 bs = bs * 2
367 ns = new NativeString(bs)
368 buf_size = bs
369 dumped = 0
370 end
371
372 redef fun output do
373 str.output
374 new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output
375 end
376
377 # Enlarge is useless here since the `Buffer`
378 # part is automatically dumped when necessary.
379 #
380 # Also, since the buffer can not be overused by a
381 # single string, there is no need for manual
382 # resizing.
383 #
384 # "You have no power here !"
385 redef fun enlarge(i) do end
386
387 redef fun to_s do
388 written = true
389 var nnslen = rpos - dumped
390 if nnslen == 0 then return str
391 return str + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
392 end
393
394 redef fun reverse do
395 # Flush the buffer in order to only have to reverse `str`.
396 if rpos > 0 and dumped != rpos then
397 str += new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
398 dumped = rpos
399 end
400 str = str.reversed
401 end
402
403 redef fun upper do
404 if written then reset
405 str = str.to_upper
406 var mits = ns
407 for i in [0 .. rpos[ do
408 mits[i] = mits[i].to_upper
409 end
410 end
411
412 redef fun lower do
413 if written then reset
414 str = str.to_lower
415 var mits = ns
416 for i in [0 .. rpos[ do
417 mits[i] = mits[i].to_lower
418 end
419 end
420 end
421
422 redef class FlatString
423
424 redef fun insert_at(s, pos) do
425 var l = substring(0, pos)
426 var r = substring_from(pos)
427 return l + s + r
428 end
429
430 redef fun +(o) do
431 var s = o.to_s
432 var slen = s.length
433 var mlen = length
434 if slen == 0 then return self
435 if mlen == 0 then return s
436 var nlen = slen + mlen
437 if s isa FlatString then
438 if nlen > maxlen then return new Concat(self, s)
439 var mits = items
440 var sifrom = s.index_from
441 var mifrom = index_from
442 var sits = s.items
443 var ns = new NativeString(nlen + 1)
444 mits.copy_to(ns, mlen, mifrom, 0)
445 sits.copy_to(ns, slen, sifrom, mlen)
446 return ns.to_s_with_length(nlen)
447 else if s isa Concat then
448 var sl = s.left
449 var sllen = sl.length
450 if sllen + mlen > maxlen then return new Concat(self, s)
451 return new Concat(self + sl, s.right)
452 else
453 abort
454 end
455 end
456 end
457
458 # A simple linked list for use with iterators
459 private class RopeIterPiece
460 # The encapsulated node of the `Rope`
461 var node: String
462 # Was its left child (if any) visited ?
463 var ldone: Bool
464 # Was its right child (if any) visited ?
465 var rdone: Bool
466 # The previous node in the list.
467 var prev: nullable RopeIterPiece
468 end
469
470 # A reverse iterator capable of working with `Rope` objects
471 private class RopeReviter
472 super IndexedIterator[Char]
473
474 # Current NativeString
475 var ns: String
476 # Current position in NativeString
477 var pns: Int
478 # Position in the Rope (0-indexed)
479 var pos: Int
480 # Iterator on the substrings, does the Postfix part of
481 # the Rope traversal.
482 var subs: IndexedIterator[String]
483
484 init(root: RopeString) is old_style_init do
485 pos = root.length - 1
486 subs = new ReverseRopeSubstrings(root)
487 ns = subs.item
488 pns = ns.length - 1
489 end
490
491 init from(root: RopeString, pos: Int) do
492 self.pos = pos
493 subs = new ReverseRopeSubstrings.from(root, pos)
494 ns = subs.item
495 pns = pos - subs.index
496 end
497
498 redef fun index do return pos
499
500 redef fun is_ok do return pos >= 0
501
502 redef fun item do return ns[pns]
503
504 redef fun next do
505 pns -= 1
506 pos -= 1
507 if pns >= 0 then return
508 if not subs.is_ok then return
509 subs.next
510 if not subs.is_ok then return
511 ns = subs.item
512 pns = ns.length - 1
513 end
514 end
515
516 # Forward iterator on the chars of a `Rope`
517 private class RopeIter
518 super IndexedIterator[Char]
519
520 # Position in current `String`
521 var pns: Int
522 # Current `String` being iterated on
523 var str: String
524 # Substrings of the Rope
525 var subs: IndexedIterator[String]
526 # Maximum position to iterate on (e.g. Rope.length)
527 var max: Int
528 # Position (char) in the Rope (0-indexed)
529 var pos: Int
530
531 init(root: RopeString) is old_style_init do
532 subs = new RopeSubstrings(root)
533 pns = 0
534 str = subs.item
535 max = root.length - 1
536 pos = 0
537 end
538
539 init from(root: RopeString, pos: Int) do
540 subs = new RopeSubstrings.from(root, pos)
541 pns = pos - subs.index
542 self.pos = pos
543 str = subs.item
544 max = root.length - 1
545 end
546
547 redef fun item do return str[pns]
548
549 redef fun is_ok do return pos <= max
550
551 redef fun index do return pos
552
553 redef fun next do
554 pns += 1
555 pos += 1
556 if pns < subs.item.length then return
557 if not subs.is_ok then return
558 subs.next
559 if not subs.is_ok then return
560 str = subs.item
561 pns = 0
562 end
563 end
564
565 # Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
566 private class ReverseRopeSubstrings
567 super IndexedIterator[String]
568
569 # Visit Stack
570 var iter: RopeIterPiece is noinit
571 # Position in `Rope`
572 var pos: Int is noinit
573
574 # Current leaf
575 var str: String is noinit
576
577 init(root: RopeString) is old_style_init do
578 var r = new RopeIterPiece(root, false, true, null)
579 pos = root.length - 1
580 var lnod: String = root
581 loop
582 if lnod isa Concat then
583 lnod = lnod.right
584 r = new RopeIterPiece(lnod, false, true, r)
585 else
586 str = lnod
587 iter = r
588 break
589 end
590 end
591 end
592
593 init from(root: RopeString, pos: Int) do
594 var r = new RopeIterPiece(root, false, true, null)
595 var rnod: String = root
596 var off = pos
597 loop
598 if rnod isa Concat then
599 if off >= rnod.left.length then
600 off -= rnod.left.length
601 rnod = rnod.right
602 r = new RopeIterPiece(rnod, false, true, r)
603 else
604 r.ldone = true
605 rnod = rnod.left
606 r = new RopeIterPiece(rnod, false, true, r)
607 end
608 else
609 str = rnod
610 r.ldone = true
611 iter = r
612 self.pos = pos - off
613 break
614 end
615 end
616 end
617
618 redef fun item do return str
619
620 redef fun index do return pos
621
622 redef fun is_ok do return pos >= 0
623
624 redef fun next do
625 if pos < 0 then return
626 var curr = iter.prev
627 var currit = curr.node
628 while curr != null do
629 currit = curr.node
630 if not currit isa Concat then
631 str = currit
632 pos -= str.length
633 iter = curr
634 return
635 end
636 if not curr.rdone then
637 curr.rdone = true
638 curr = new RopeIterPiece(currit.right, false, false, curr)
639 continue
640 end
641 if not curr.ldone then
642 curr.ldone = true
643 curr = new RopeIterPiece(currit.left, false, false, curr)
644 continue
645 end
646 curr = curr.prev
647 end
648 pos = -1
649 end
650 end
651
652 private class RopeBufSubstringIterator
653 super Iterator[String]
654
655 # Iterator on the substrings of the building string
656 var iter: Iterator[String]
657 # Makes a String out of the buffered part of the Ropebuffer
658 var nsstr: String
659 # Did we attain the buffered part ?
660 var nsstr_done = false
661
662 init(str: RopeBuffer) is old_style_init do
663 iter = str.str.substrings
664 nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1)
665 if str.length == 0 then nsstr_done = true
666 end
667
668 redef fun is_ok do return iter.is_ok or not nsstr_done
669
670 redef fun item do
671 assert is_ok
672 if iter.is_ok then return iter.item
673 return nsstr
674 end
675
676 redef fun next do
677 if iter.is_ok then
678 iter.next
679 return
680 end
681 nsstr_done = true
682 end
683 end
684
685 # Substrings of a Rope (i.e. Postfix iterator on leaves)
686 private class RopeSubstrings
687 super IndexedIterator[String]
688
689 # Visit Stack
690 var iter: RopeIterPiece is noinit
691 # Position in `Rope`
692 var pos: Int is noinit
693 # Maximum position in `Rope` (i.e. length - 1)
694 var max: Int is noinit
695
696 # Current leaf
697 var str: String is noinit
698
699 init(root: RopeString) is old_style_init do
700 var r = new RopeIterPiece(root, true, false, null)
701 pos = 0
702 max = root.length - 1
703 var rnod: String = root
704 loop
705 if rnod isa Concat then
706 rnod = rnod.left
707 r = new RopeIterPiece(rnod, true, false, r)
708 else
709 str = rnod
710 r.rdone = true
711 iter = r
712 break
713 end
714 end
715 end
716
717 init from(root: RopeString, pos: Int) do
718 var r = new RopeIterPiece(root, true, false, null)
719 max = root.length - 1
720 var rnod: String = root
721 var off = pos
722 loop
723 if rnod isa Concat then
724 if off >= rnod.left.length then
725 r.rdone = true
726 off -= rnod.left.length
727 rnod = rnod.right
728 r = new RopeIterPiece(rnod, true, false, r)
729 else
730 rnod = rnod.left
731 r = new RopeIterPiece(rnod, true, false, r)
732 end
733 else
734 str = rnod
735 r.rdone = true
736 iter = r
737 self.pos = pos - off
738 break
739 end
740 end
741 end
742
743 redef fun item do return str
744
745 redef fun is_ok do return pos <= max
746
747 redef fun index do return pos
748
749 redef fun next do
750 pos += str.length
751 if pos > max then return
752 var it = iter.prev
753 var rnod = it.node
754 loop
755 if not rnod isa Concat then
756 it.ldone = true
757 it.rdone = true
758 str = rnod
759 iter = it.as(not null)
760 break
761 end
762 if not it.ldone then
763 rnod = rnod.left
764 it.ldone = true
765 it = new RopeIterPiece(rnod, false, false, it)
766 else if not it.rdone then
767 it.rdone = true
768 rnod = rnod.right
769 it = new RopeIterPiece(rnod, false, false, it)
770 else
771 it = it.prev
772 rnod = it.node
773 continue
774 end
775 end
776 end
777 end
778
779 # Implementation of a `StringCharView` for `RopeString` objects
780 private class RopeChars
781 super StringCharView
782
783 var tgt: RopeString
784
785 init(s: RopeString) is old_style_init do tgt = s
786
787 redef fun [](i) do
788 return tgt[i]
789 end
790
791 redef fun iterator_from(i) do return new RopeIter.from(tgt, i)
792
793 redef fun reverse_iterator_from(i) do return new RopeReviter.from(tgt, i)
794
795 end
796
797 # An Iterator over a RopeBuffer.
798 class RopeBufferIter
799 super IndexedIterator[Char]
800
801 # Subiterator.
802 var sit: IndexedIterator[Char]
803
804 # Native string iterated over.
805 var ns: NativeString
806
807 # Current position in `ns`.
808 var pns: Int
809
810 # Maximum position iterable.
811 var maxpos: Int
812
813 redef var index: Int
814
815 # Init the iterator from a RopeBuffer.
816 init(t: RopeBuffer) is old_style_init do
817 ns = t.ns
818 maxpos = t.rpos
819 sit = t.str.chars.iterator
820 pns = t.dumped
821 index = 0
822 end
823
824 # Init the iterator from a RopeBuffer starting from `pos`.
825 init from(t: RopeBuffer, pos: Int) do
826 ns = t.ns
827 maxpos = t.length
828 sit = t.str.chars.iterator_from(pos)
829 pns = pos - t.str.length
830 index = pos
831 end
832
833 redef fun is_ok do return index < maxpos
834
835 redef fun item do
836 if sit.is_ok then return sit.item
837 return ns[pns]
838 end
839
840 redef fun next do
841 index += 1
842 if sit.is_ok then
843 sit.next
844 else
845 pns += 1
846 end
847 end
848 end
849
850 # Reverse iterator over a RopeBuffer.
851 class RopeBufferReviter
852 super IndexedIterator[Char]
853
854 # Subiterator.
855 var sit: IndexedIterator[Char]
856
857 # Native string iterated over.
858 var ns: NativeString
859
860 # Current position in `ns`.
861 var pns: Int
862
863 redef var index: Int
864
865 # Init the iterator from a RopeBuffer.
866 init(tgt: RopeBuffer) is old_style_init do
867 sit = tgt.str.chars.reverse_iterator
868 pns = tgt.rpos - 1
869 index = tgt.length - 1
870 ns = tgt.ns
871 end
872
873 # Init the iterator from a RopeBuffer starting from `pos`.
874 init from(tgt: RopeBuffer, pos: Int) do
875 sit = tgt.str.chars.reverse_iterator_from(pos - tgt.rpos - tgt.dumped)
876 pns = pos - tgt.str.length
877 index = pos
878 ns = tgt.ns
879 end
880
881 redef fun is_ok do return index > 0
882
883 redef fun item do
884 if pns >= 0 then return ns[pns]
885 return sit.item
886 end
887
888 redef fun next do
889 index -= 1
890 if pns >= 0 then
891 pns -= 1
892 else
893 sit.next
894 end
895 end
896 end
897
898 # View on the chars of a `RopeBuffer`
899 class RopeBufferChars
900 super BufferCharView
901
902 redef type SELFTYPE: RopeBuffer
903
904 redef fun [](i) do
905 if i < target.str.length then
906 return target.str[i]
907 else
908 return target.ns[i - target.str.length]
909 end
910 end
911
912 redef fun []=(i,c) do
913 if i == target.length then target.add c
914 if i < target.str.length then
915 var s = target.str
916 var l = s.substring(0, i)
917 var r = s.substring_from(i + 1)
918 target.str = l + c.to_s + r
919 else
920 target.ns[i - target.str.length] = c
921 end
922 end
923
924 redef fun add(c) do target.add c
925
926 redef fun push(c) do target.add c
927
928 redef fun iterator_from(i) do return new RopeBufferIter.from(target, i)
929
930 redef fun reverse_iterator_from(i) do return new RopeBufferReviter.from(target, i)
931 end