X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/ropes.nit b/lib/standard/ropes.nit index 3aa9771..8d2d057 100644 --- a/lib/standard/ropes.nit +++ b/lib/standard/ropes.nit @@ -1,1096 +1,926 @@ -# This file is part of NIT ( http://www.nitlanguage.org ). +# This file is part of NIT (http://www.nitlanguage.org). # -# This file is free software, which comes along with NIT. This software is -# distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; -# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. You can modify it if you want, provided this header -# is kept unaltered, and a notification of the changes is added. -# You are allowed to redistribute it and sell it, alone or as a part of -# another product. - -# Nit implementation of the Ropes (see Ropes : An Alternative to Strings, -# SOFTWARE - PRACTICE AND EXPERIENCE, VOL. 25(12), 1315 - 1330 (DECEMBER 1995) -# Hans. J Boehm, Russ Atkinson and Michael Plass) +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# A rope is a kind of string but instead of being flat, it relies on a binary tree structure to store data. +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Tree-based representation of a String. +# +# Ropes are a data structure introduced in a 1995 paper from +# Hans J. Boehm, Russ Atkinson and Michael Plass. +# See : `Ropes : an Alternative to Strings`, `Software - Practice and Experience, +# Vol. 25(12), 1315-1330 (December 1995)`. +# +# The implementation developed here provides an automatic change +# of data structure depending on the length of the leaves. +# +# The length from which a `Rope` is built from a `flat` string +# if defined at top-level (see `maxlen`) and can be redefined by clients +# depending on their needs (e.g. if you need to bench the speed of +# the creation of concat nodes, lower the size of maxlen). +# +# A Rope as defined in the original paper is a Tree made of concatenation +# nodes and containing `Flat` (that is Array-based) strings as Leaves. +# +# Example : +# +# ` Concat ` +# ` / \ ` +# ` Concat Concat ` +# ` / \ / \ ` +# `"My" " Name" " is" " Simon." ` +# +# Note that the above example is not representative of the actual implementation +# of `Ropes`, since short leaves are merged to keep the rope at an acceptable +# height (hence, this rope here might actually be a `FlatString` !). module ropes -import file intrude import string -# Structure, tuple containing a LeafNode and an Int -# Used for methods like [] or has/has_only -private class TupleLeafNodePos - private var curr_node: LeafNode - private var corrected_pos: Int - private var visit_stack: List[TupleVisitNode] -end - -# Abstract class, represents all the services offered by both mutable and immutable ropes -abstract class Rope - super Comparable - super StringCapable - - # Cached version of self as a flat String - private var str_representation: nullable String = null - - redef type OTHER: Rope - - # The first node of the hierarchy - private var parent_node: RopeNode - - # Needed by the compiler to avoid producing an error with constructors in subclasses - init do - self.parent_node = new ConcatNode - end - - # Initializes a new Rope with a text embedded in directly - init with_string(str: AbstractString) do - self.parent_node = new ConcatNode - parent_node.as(ConcatNode).right_child = new LeafNode(str) - parent_node.as(ConcatNode).update_data - end +# Maxlen is the maximum length of a Leaf node +# +# When concatenating two leaves, if `new_length` > `maxlen`, +# A `Concat` node is created instead +# +# Its purpose is to limit the depth of the `Rope` (this +# improves performance when accessing/iterating). +fun maxlen: Int do return 64 - # Returns a view on the rope - fun chars: SequenceRead[Char] - do - return new CharRopeView(self) - end +# String using a tree-based representation with leaves as `FlatStrings` +private abstract class Rope + super Text +end - # Gets the total length of the Rope - fun length: Int - do - return parent_node.length - end +private abstract class RopeString + super Rope + super String - # Returns a flat version of self - redef fun to_s - do - if self.str_representation == null then flatten - return str_representation.as(not null) - end + redef fun chars is cached do return new RopeChars(self) +end - # Stores a flat version of self in cache - private fun flatten: String - do - var native_final_str = calloc_string(length + 1) +# Node that represents a concatenation between two `String` +private class Concat + super RopeString - native_final_str[length] = '\0' + redef var length: Int - var offset = 0 + redef fun substrings do return new RopeSubstrings(self) - var iter = new DFSRopeLeafIterator(self) + redef fun empty do return "" - while iter.is_ok do - iter.item.value._items.copy_to(native_final_str, iter.item.value.length, 0, offset) - offset += iter.item.value.length - iter.next + redef fun to_cstring is cached do + var len = length + var ns = new NativeString(len + 1) + ns[len] = '\0' + var off = 0 + for i in substrings do + var ilen = i.length + i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).index_from, off) + off += ilen end - - return new String.with_native(native_final_str, length) + return ns end - # Gets a node containing the substring to seek the char at the require position - private fun get_node_for_pos(position: Int): TupleLeafNodePos - do - assert position >= 0 and position < self.length + # Left child of the node + var left: String + # Right child of the node + var right: String - var curr_node: nullable RopeNode = parent_node - - var visit_stack = new List[TupleVisitNode] - - var curr_visit_tuple: TupleVisitNode - - loop - if curr_node isa ConcatNode then - curr_visit_tuple = new TupleVisitNode(curr_node) - if curr_node.left_child != null and position < curr_node.left_child.length then - curr_visit_tuple.left_visited = true - curr_node = curr_node.left_child - else if curr_node.right_child != null then - curr_visit_tuple.left_visited = true - curr_visit_tuple.right_visited = true - if curr_node.left_child != null then position -= curr_node.left_child.length - curr_node = curr_node.right_child - else - print "Fatal Error" - abort - end - visit_stack.push(curr_visit_tuple) - else if curr_node isa LeafNode then - return new TupleLeafNodePos(curr_node, position, visit_stack) - end - end + init(l: String, r: String) is old_style_init do + left = l + right = r + length = l.length + r.length end - # Concats two ropes and returns a new one - fun +(other: Rope): Rope do - var new_rope = new BufferRope - - var first_iter = new DFSRopeLeafIterator(self) - - while first_iter.is_ok do - new_rope.append(first_iter.item.value) - first_iter.next - end - - var second_iter = new DFSRopeLeafIterator(other) - - while second_iter.is_ok do - new_rope.append(second_iter.item.value) - second_iter.next - end - - return new_rope + redef fun output do + left.output + right.output end - # Returns a copy of several ropes concatenated - # - # Is equivalent to a chain of + operations - # Except this one is optimized for performance - fun multi_concat(ropes: Rope...): Rope - do - var new_rope = new BufferRope - - var self_iter = self.iterator - while self_iter.is_ok do - new_rope.append(self_iter.item.value) - self_iter.next - end + redef fun iterator do return new RopeIter(self) - for i in ropes do - var iter = i.iterator - while iter.is_ok do - new_rope.append(iter.item.value) - iter.next - end - end - - return new_rope + redef fun *(i) do + var x: String = self + for j in [1 .. i[ do x += self + return x end - # Appends the content of self multiple times in a new Rope object - fun *(repeats: Int): Rope do - - var new_rope = new BufferRope - - var str = self.to_s - - for i in [1 .. repeats] do new_rope.append(str) - - return new_rope + redef fun [](i) do + var llen = left.length + if i >= llen then return right[i - llen] + return left[i] end - # Returns an iterator on self - # - # Unsafe modifications on a MutableRope - # - private fun iterator: Iterator[LeafNode] do return new DFSRopeLeafIterator(self) - - # Creates a subrope. - # - # var rope = (new BufferRope).append("abcd") - # - # assert rope.subrope(1, 2) == "bc" - # assert rope.subrope(-1, ) == "a" - # assert rope.subrope(1, 0) == "" - # assert rope.subrope(2, 5) == "cd" - # - # A `index_from` index < 0 will be replaced by 0. - # Unless a `count` value is > 0 at the same time. - # In this case, `index_from += count` and `count -= index_from`. - # - fun subrope(index_from: Int, count: Int): Rope - do - assert count >= 0 - - if index_from < 0 then - count += index_from - if count < 0 then count = 0 - index_from = 0 - end - - if count - index_from >= self.length then count = length - index_from - - var buffer = new BufferRope - - var iter = new DFSRopeLeafIterator.with_index(self, index_from) - - var curr_subrope_index = index_from - iter.pos - - while iter.is_ok do - if count == 0 then break - if curr_subrope_index > 0 then - if count >= iter.item.value.length then - buffer.append(iter.item.value.substring(curr_subrope_index, count)) - count -= iter.item.value.length - curr_subrope_index - curr_subrope_index = 0 - else - buffer.append(iter.item.value.substring(curr_subrope_index, count)) - break - end - else - if count >= iter.item.value.length then - buffer.append(iter.item.value) - count -= iter.item.value.length - else - buffer.append(iter.item.value.substring(0, count)) - break - end - end - - iter.next + redef fun substring(from, len) do + var llen = left.length + if from < llen then + if from + len < llen then return left.substring(from,len) + var lsublen = llen - from + return left.substring_from(from) + right.substring(0, len - lsublen) + else + return right.substring(from - llen, len) end - - return buffer end - # Returns an upper (capitalized) version of self - fun to_upper: Rope - do - var new_rope = new BufferRope - var iter = new DFSRopeLeafIterator(self) - while iter.is_ok do - new_rope.append(iter.item.value.to_upper) - iter.next - end - return new_rope - end + redef fun reversed do return new Concat(right.reversed, left.reversed) - # Returns a lower (minuscule) version of self - fun to_lower: Rope - do - var new_rope = new BufferRope - var iter = new DFSRopeLeafIterator(self) - while iter.is_ok do - new_rope.append(iter.item.value.to_lower) - iter.next + redef fun insert_at(s, pos) do + if pos > left.length then + return left + right.insert_at(s, pos - left.length) end - return new_rope + return left.insert_at(s, pos) + right end - ############################################################################ - # Comparable Refined Methods # - ############################################################################ - - # Compares the current Rope to another object (either another rope or a String) - redef fun == (other) - do - if other == null or not (other isa Rope or other isa AbstractString) then return false - var self_iter = new RopeCharIterator(self) - if other isa Rope then - if self.length != other.length then return false - var other_iterator = new RopeCharIterator(other) - while self_iter.is_ok do - if self_iter.item != other_iterator.item then return false - self_iter.next - other_iterator.next - end - else if other isa AbstractString then - var pos = 0 - if self.length != other.length then return false - while self_iter.is_ok do - if self_iter.item != other[pos] then return false - pos += 1 - self_iter.next - end - end - return true - end + redef fun to_upper do return new Concat(left.to_upper, right.to_upper) - # Checks if self is lesser than other - # - # Comparison done in lexicographical order - # i.e. 'aa' < 'b' - # - redef fun <(other) - do - var other_iter = other.chars.iterator - for i in self.chars do - if not other_iter.is_ok then return false - if i < other_iter.item then return true - if i > other_iter.item then return false - other_iter.next + redef fun to_lower do return new Concat(left.to_lower, right.to_lower) + + redef fun +(o) do + var s = o.to_s + var slen = s.length + if s isa Concat then + return new Concat(self, s) + else + var r = right + var rlen = r.length + if rlen + slen > maxlen then return new Concat(left, new Concat(r, s)) + return new Concat(left, r + s) end - if other_iter.is_ok then return true - return false end - end -# Rope that can be modified +# Mutable `Rope`, optimized for concatenation operations +# +# A `RopeBuffer` is an efficient way of building a `String` when +# concatenating small strings. # -# /!\ Non Thread-safe /!\ +# It does concatenations in an optimized way by having a +# mutable part and an immutable part built by efficiently +# concatenating strings in chain. # -class BufferRope +# Every concatenation operation is done by copying a string to +# the mutable part and flushing it when full. +# +# However, when a long string is appended to the `Buffer`, +# the concatenation is done at it would be in a `Rope`. +class RopeBuffer super Rope + super Buffer - var is_dirty: Bool = false + redef fun chars: Sequence[Char] is cached do return new RopeBufferChars(self) - init - do - super - end + # The final string being built on the fly + private var str: String is noinit - init with_string(str) - do - super - end + # Current concatenation buffer + private var ns: NativeString is noinit - ############################################################################ - # Tree Balancing Methods # - ############################################################################ + # Next available (e.g. unset) character in the `Buffer` + private var rpos = 0 - # Performs a right rotation on a node of the Rope + # Keeps track of the buffer's currently dumped part # - # Root Pivot - # / \ / \ - # Pivot Leaf3 => Leaf1 Root - # / \ / \ - # Leaf1 Leaf2 Leaf2 Leaf3 - private fun rotate_right(root: ConcatNode) - do - assert root.left_child != null - var pivot = root.left_child.as(ConcatNode) - var root_new_left = pivot.right_child - var root_parent = root.parent - - root.left_child = root_new_left - pivot.right_child = root - if root_parent == null then - self.parent_node = pivot - pivot.parent = null - return - end + # This might happen if for instance, a String was being + # built by concatenating small parts of string and suddenly + # a long string (length > maxlen) is appended. + private var dumped: Int is noinit - if root_parent.left_child == root then - root_parent.left_child = pivot - else - root_parent.right_child = pivot - end + # Length of the complete rope + redef var length = 0 - root.update_data - pivot.update_data - root_parent.update_data - end - - # Performs a left rotation on a node of the Rope + # Length of the mutable part # - # Root Pivot - # / \ / \ - # Leaf1 Pivot => Root Leaf3 - # / \ / \ - # Leaf2 Leaf3 Leaf1 Leaf2 - private fun rotate_left(root: ConcatNode) - do - assert root.right_child != null - var pivot = root.right_child.as(ConcatNode) - var root_new_right = pivot.left_child - var root_parent = root.parent - - root.right_child = root_new_right - pivot.left_child = root - if root_parent == null then - self.parent_node = pivot - pivot.parent = null - return - end + # Is also used as base to compute the size of the next + # mutable native string (`ns`) + private var buf_size: Int is noinit - if root_parent.left_child == root then - root_parent.left_child = pivot - else - root_parent.right_child = pivot - end + redef fun substrings: Iterator[String] do return new RopeBufSubstringIterator(self) - root.update_data - pivot.update_data - root_parent.update_data + # Builds an empty `RopeBuffer` + init do + str = "" + ns = new NativeString(maxlen) + buf_size = maxlen + dumped = 0 end - # Shortcut method to balance a node and its parents - # based on the rules of the AVL Tree - private fun balance_from_node(parent_node: nullable ConcatNode) - do - while parent_node != null do - parent_node.update_data - var node_balance = parent_node.balance_factor - if node_balance < -1 or node_balance > 1 then - balance_node(parent_node) - end - parent_node = parent_node.parent - end + # Builds a new `RopeBuffer` with `str` in it. + init from(str: String) do + self.str = str + ns = new NativeString(maxlen) + buf_size = maxlen + length = str.length + dumped = 0 end - # Performs rotations to balance a node according to AVL Tree rules - private fun balance_node(node: ConcatNode) - do - var balance_factor = node.balance_factor - if balance_factor < -1 then - var right_balance = node.right_child.balance_factor - if right_balance < 0 then - rotate_left(node) - else - rotate_right(node.right_child.as(ConcatNode)) - rotate_left(node) - end - else - var left_balance = node.left_child.balance_factor - if left_balance > 0 then - rotate_right(node) - else - rotate_left(node.left_child.as(ConcatNode)) - rotate_right(node) - end + # Resets the informations of the `Buffer` + # + # This is called when doing in-place modifications + # on a previously to_s'd `RopeBuffer` + private fun reset do + var nns = new NativeString(buf_size) + var blen = rpos - dumped + ns.copy_to(nns, blen, dumped, 0) + dumped = 0 + rpos = blen + written = false + end + + redef fun empty do return new RopeBuffer + + redef fun clear do + str = "" + length = 0 + rpos = 0 + dumped = 0 + if written then + ns = new NativeString(buf_size) + written = false end end - ############################################################################ - # BufferRope exclusive Methods # - ############################################################################ + redef fun substring(from, count) do + var strlen = str.length - # Appends a new Collection[Char] at the end of the current rope - fun append(str: Collection[Char]): BufferRope - do - if str isa AbstractString then - var last_node = parent_node + if from < 0 then + count += from + if count < 0 then count = 0 + from = 0 + end - while last_node isa ConcatNode and last_node.right_child != null do - last_node = last_node.right_child.as(not null) - end + if count > length then count = length - from - if last_node isa ConcatNode then - last_node.right_child = new LeafNode(str.to_s) - else if last_node isa LeafNode then - var last_node_parent = last_node.parent - var new_concat = new ConcatNode - last_node_parent.right_child = new_concat - new_concat.left_child = last_node - new_concat.right_child = new LeafNode(str.to_s) - last_node = new_concat + if count == 0 then return empty + + if from < strlen then + var subpos = strlen - from + if count <= subpos then + return new RopeBuffer.from(str.substring(from, count)) else - print "Fatal Error, please report to the developers for more insight." - abort + var l = str.substring_from(from) + var rem = count - subpos + var nns = new NativeString(rem) + ns.copy_to(nns, rem, dumped, 0) + return new RopeBuffer.from(l + nns.to_s_with_length(rem)) end - - balance_from_node(last_node) else - var buf = new Buffer.with_capacity(str.length) - for i in str do - buf.add(i) - end - append(buf) + var nns = new NativeString(count) + ns.copy_to(nns, count, dumped, 0) + return new RopeBuffer.from(nns.to_s_with_length(count)) end - - if not is_dirty then is_dirty = true - - return self end - # Variatic function to append several collections of Chars - fun append_multi(strs: Collection[Char]...): BufferRope - do - for i in strs do - append(i) + redef fun append(s) do + var slen = s.length + length += slen + var rp = rpos + if s isa Rope or slen > maxlen then + if rp > 0 and dumped != rp then + str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1) + dumped = rp + end + str = str + s + return end - return self - end - - # Adds a new Collection[Char] at the beginning of the rope - fun prepend(str: Collection[Char]): BufferRope - do - if str isa AbstractString then - var curr_node = parent_node - - while curr_node isa ConcatNode and curr_node.left_child != null do - curr_node = curr_node.left_child.as(not null) + var remsp = buf_size - rp + var sits: NativeString + var begin: Int + if s isa FlatString then + begin = s.index_from + sits = s.items + else if s isa FlatBuffer then + begin = 0 + sits = s.items + else + if slen <= remsp then + for i in s.chars do + ns[rpos] = i + rpos += 1 + end + else + var spos = 0 + for i in [0..remsp[ do + ns[rpos] = s[spos] + rpos += 1 + spos += 1 + end + dump_buffer + while spos < slen do + ns[rpos] = s[spos] + spos += 1 + rpos += 1 + end end - - if curr_node isa ConcatNode then - curr_node.left_child = new LeafNode(str.to_s) - else if curr_node isa LeafNode then - var parent = curr_node.parent - var new_concat = new ConcatNode - var new_leaf = new LeafNode(str.to_s) - new_concat.left_child = new_leaf - new_concat.right_child = curr_node - parent.left_child = new_concat - curr_node = new_concat + return + end + if slen <= remsp then + if remsp <= 0 then + dump_buffer + rpos = 0 else - print "Fatal Error" - abort + sits.copy_to(ns, slen, begin, rp) + rpos += slen end - - balance_from_node(curr_node) else - var buf = new Buffer.with_capacity(str.length) - for i in str do - buf.add(i) - end - prepend(buf.to_s) + sits.copy_to(ns, remsp, begin, rp) + rpos = buf_size + dump_buffer + var nlen = slen - remsp + sits.copy_to(ns, nlen, begin + remsp, 0) + rpos = nlen end - - if not is_dirty then is_dirty = true - - return self end - # Variatic function to prepend several collections of Chars - fun prepend_multi(strs: Collection[Char]...): BufferRope - do - for i in strs do - prepend(i) + redef fun add(c) do + var rp = rpos + if rp >= buf_size then + dump_buffer + rp = 0 end - return self + ns[rp] = c + rp += 1 + length += 1 + rpos = rp end - # Adds the content of `str` after self, does not create a new rope object - fun concat(str: Rope): Rope - do - var other_iter = new DFSRopeLeafIterator(str) - - var modif_list = new List[String] - - while other_iter.is_ok do - modif_list.push(other_iter.item.value) - other_iter.next - end - - while modif_list.length > 0 do - self.append(modif_list.shift) - end - - if not is_dirty then is_dirty = true - - return self + # Converts the Buffer to a FlatString, appends it to + # the final String and re-allocates a new larger Buffer. + private fun dump_buffer do + written = false + var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1) + str += nstr + var bs = buf_size + bs = bs * 2 + ns = new NativeString(bs) + buf_size = bs + dumped = 0 end - # Returns the content of the current BufferRope object as an ImmutableRope - fun freeze: ImmutableRope - do - var buffer_rope = new BufferRope - var new_rope = new ImmutableRope - - var iter = new DFSRopeLeafIterator(self) - - while iter.is_ok do - buffer_rope.append(iter.item.value) - iter.next - end - - new_rope.parent_node = buffer_rope.parent_node - - if not is_dirty then new_rope.str_representation = self.str_representation - - return new_rope + redef fun output do + str.output + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output end - # Unsafe method to convert self as an ImmutableRope + # Enlarge is useless here since the `Buffer` + # part is automatically dumped when necessary. # - # To be used internally only - private fun to_immutable: ImmutableRope - do - var immutable_self = new ImmutableRope - immutable_self.parent_node = self.parent_node - return immutable_self - end - - ############################################################################ - # Rope refined Methods # - ############################################################################ - - redef fun subrope(index_from: Int, count: Int): BufferRope - do - return super.as(BufferRope) - end + # Also, since the buffer can not be overused by a + # single string, there is no need for manual + # resizing. + # + # "You have no power here !" + redef fun enlarge(i) do end - redef fun *(repeats: Int): BufferRope - do - return super.as(BufferRope) + redef fun to_s do + written = true + var nnslen = rpos - dumped + if nnslen == 0 then return str + return str + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1) end - redef fun +(other: Rope): BufferRope - do - return super.as(BufferRope) + redef fun reverse do + # Flush the buffer in order to only have to reverse `str`. + if rpos > 0 and dumped != rpos then + str += new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1) + dumped = rpos + end + str = str.reversed end - redef fun multi_concat(ropes: Rope...): BufferRope - do - return super.as(BufferRope) + redef fun upper do + if written then reset + str = str.to_upper + var mits = ns + for i in [0 .. rpos[ do + mits[i] = mits[i].to_upper + end end - # Refines to add a cache method, calculates only once for every modification - # the string representation for self - redef fun to_s - do - if self.str_representation == null or is_dirty then - self.str_representation = flatten - is_dirty = false + redef fun lower do + if written then reset + str = str.to_lower + var mits = ns + for i in [0 .. rpos[ do + mits[i] = mits[i].to_lower end - return self.str_representation.as(not null) end - end -# Rope that cannot be modified -class ImmutableRope - super Rope - - init - do - super - end - - init with_string(str) - do - super +redef class FlatString + + redef fun insert_at(s, pos) do + var l = substring(0, pos) + var r = substring_from(pos) + return l + s + r + end + + redef fun +(o) do + var s = o.to_s + var slen = s.length + var mlen = length + if slen == 0 then return self + if mlen == 0 then return s + var nlen = slen + mlen + if s isa FlatString then + if nlen > maxlen then return new Concat(self, s) + var mits = items + var sifrom = s.index_from + var mifrom = index_from + var sits = s.items + var ns = new NativeString(nlen + 1) + mits.copy_to(ns, mlen, mifrom, 0) + sits.copy_to(ns, slen, sifrom, mlen) + return ns.to_s_with_length(nlen) + else if s isa Concat then + var sl = s.left + var sllen = sl.length + if sllen + mlen > maxlen then return new Concat(self, s) + return new Concat(self + sl, s.right) + else + abort + end end +end - ############################################################################ - # Rope refined Methods # - ############################################################################ +# A simple linked list for use with iterators +private class RopeIterPiece + # The encapsulated node of the `Rope` + var node: String + # Was its left child (if any) visited ? + var ldone: Bool + # Was its right child (if any) visited ? + var rdone: Bool + # The previous node in the list. + var prev: nullable RopeIterPiece +end - redef fun subrope(index_from: Int, count: Int): ImmutableRope - do - return (super.as(BufferRope)).to_immutable - end +# A reverse iterator capable of working with `Rope` objects +private class RopeReviter + super IndexedIterator[Char] - redef fun *(repeats: Int): ImmutableRope - do - return (super.as(BufferRope)).to_immutable - end + # Current NativeString + var ns: String + # Current position in NativeString + var pns: Int + # Position in the Rope (0-indexed) + var pos: Int + # Iterator on the substrings, does the Postfix part of + # the Rope traversal. + var subs: IndexedIterator[String] - redef fun +(other: Rope): ImmutableRope - do - return (super.as(BufferRope)).to_immutable + init(root: RopeString) is old_style_init do + pos = root.length - 1 + subs = new ReverseRopeSubstrings(root) + ns = subs.item + pns = ns.length - 1 end - redef fun multi_concat(ropes: Rope...): ImmutableRope - do - return (super.as(BufferRope)).to_immutable + init from(root: RopeString, pos: Int) do + self.pos = pos + subs = new ReverseRopeSubstrings.from(root, pos) + ns = subs.item + pns = pos - subs.index end -end - -############################################ -# Rope view classes # -############################################ - -class CharRopeView - super SequenceRead[Char] + redef fun index do return pos - # Targeted Rope for the view - private var target: Rope + redef fun is_ok do return pos >= 0 - init(tgt: Rope) - do - self.target = tgt - end + redef fun item do return ns[pns] - redef fun [](position) - do - var tuple = target.get_node_for_pos(position) - return tuple.curr_node.value[tuple.corrected_pos] + redef fun next do + pns -= 1 + pos -= 1 + if pns >= 0 then return + if not subs.is_ok then return + subs.next + if not subs.is_ok then return + ns = subs.item + pns = ns.length - 1 end +end - redef fun first do return self[0] - - redef fun index_of(char) - do - var intern_iter = new RopeCharIterator(target) - while intern_iter.is_ok do - if intern_iter.item == char then return intern_iter.index - intern_iter.next - end - return -1 - end +# Forward iterator on the chars of a `Rope` +private class RopeIter + super IndexedIterator[Char] - redef fun iterator do - return new RopeCharIterator(target) + # Position in current `String` + var pns: Int + # Current `String` being iterated on + var str: String + # Substrings of the Rope + var subs: IndexedIterator[String] + # Maximum position to iterate on (e.g. Rope.length) + var max: Int + # Position (char) in the Rope (0-indexed) + var pos: Int + + init(root: RopeString) is old_style_init do + subs = new RopeSubstrings(root) + pns = 0 + str = subs.item + max = root.length - 1 + pos = 0 + end + + init from(root: RopeString, pos: Int) do + subs = new RopeSubstrings.from(root, pos) + pns = pos - subs.index + self.pos = pos + str = subs.item + max = root.length - 1 + end + + redef fun item do return str[pns] + + redef fun is_ok do return pos <= max + + redef fun index do return pos + + redef fun next do + pns += 1 + pos += 1 + if pns < subs.item.length then return + if not subs.is_ok then return + subs.next + if not subs.is_ok then return + str = subs.item + pns = 0 end +end - redef fun last do return self[self.length-1] +# Substrings of a Rope (i.e. Reverse postfix iterator on leaves) +private class ReverseRopeSubstrings + super IndexedIterator[String] - redef fun length do return target.length + # Visit Stack + var iter: RopeIterPiece is noinit + # Position in `Rope` + var pos: Int is noinit - redef fun count(item) - do - var count = 0 - var iter = self.iterator + # Current leaf + var str: String is noinit - for i in self do - if i == item then count += 1 + init(root: RopeString) is old_style_init do + var r = new RopeIterPiece(root, false, true, null) + pos = root.length - 1 + var lnod: String = root + loop + if lnod isa Concat then + lnod = lnod.right + r = new RopeIterPiece(lnod, false, true, r) + else + str = lnod + iter = r + break + end end - - return count end - redef fun has_only(item) - do - for i in self do - if i != item then return false + init from(root: RopeString, pos: Int) do + var r = new RopeIterPiece(root, false, true, null) + var rnod: String = root + var off = pos + loop + if rnod isa Concat then + if off >= rnod.left.length then + off -= rnod.left.length + rnod = rnod.right + r = new RopeIterPiece(rnod, false, true, r) + else + r.ldone = true + rnod = rnod.left + r = new RopeIterPiece(rnod, false, true, r) + end + else + str = rnod + r.ldone = true + iter = r + self.pos = pos - off + break + end end - return true end - redef fun is_empty do return length == 0 + redef fun item do return str - redef fun to_a do - return to_s.to_a - end + redef fun index do return pos - redef fun to_s do - return target.to_s - end - - redef fun ==(other) - do - if not other isa SequenceRead[Char] then return false - - if self.length != other then return false - - var iter = other.iterator + redef fun is_ok do return pos >= 0 - for i in self do - if i != iter.item then return false - iter.next + redef fun next do + if pos < 0 then return + var curr = iter.prev + var currit = curr.node + while curr != null do + currit = curr.node + if not currit isa Concat then + str = currit + pos -= str.length + iter = curr + return + end + if not curr.rdone then + curr.rdone = true + curr = new RopeIterPiece(currit.right, false, false, curr) + continue + end + if not curr.ldone then + curr.ldone = true + curr = new RopeIterPiece(currit.left, false, false, curr) + continue + end + curr = curr.prev end - - return true + pos = -1 end - end -########################################### -# Iterator classes # -########################################### +private class RopeBufSubstringIterator + super Iterator[String] -# A tuple representing the state of a node for a tree parsing algorithm -private class TupleVisitNode + # Iterator on the substrings of the building string + var iter: Iterator[String] + # Makes a String out of the buffered part of the Ropebuffer + var nsstr: String + # Did we attain the buffered part ? + var nsstr_done = false - init(tgt: ConcatNode) - do - self.node = tgt + init(str: RopeBuffer) is old_style_init do + iter = str.str.substrings + nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1) + if str.length == 0 then nsstr_done = true end - private var node: ConcatNode - - private var left_visited = false - private var right_visited = false - -end - -# Any kind of iterator parsing a Rope for LeafNodes -private abstract class RopeIterator - super IndexedIterator[LeafNode] - - # Rope meant to be visited - private var _target: Rope - - private fun target: Rope do return self._target - - # Position in target - private var pos = 0 + redef fun is_ok do return iter.is_ok or not nsstr_done - init(tgt: Rope) - do - self._target = tgt + redef fun item do + assert is_ok + if iter.is_ok then return iter.item + return nsstr end - init with_index(tgt: Rope, index: Int) - do - self._target = tgt + redef fun next do + if iter.is_ok then + iter.next + return + end + nsstr_done = true end - end -# Iterator returning the content of a rope one char at a time -class RopeCharIterator - super IndexedIterator[Char] - - # The iterator used to visit the rope - private var sub_str_iter: DFSRopeLeafIterator - - # The current position in the rope - private var abs_pos = 0 - - # The position in the current substring - private var sub_pos: Int = 0 - - # The substring contained in the current node visited by `sub_str_iter` - private var curr_substring: nullable String - - init(tgt: Rope) - do - sub_str_iter = new DFSRopeLeafIterator(tgt) - if sub_str_iter.is_ok then curr_substring = sub_str_iter.item.value - end - - redef fun item do return curr_substring[sub_pos] - - redef fun is_ok - do - if sub_str_iter.is_ok then return true - if not sub_str_iter.is_ok and curr_substring != null and sub_pos < curr_substring.length then return true - return false +# Substrings of a Rope (i.e. Postfix iterator on leaves) +private class RopeSubstrings + super IndexedIterator[String] + + # Visit Stack + var iter: RopeIterPiece is noinit + # Position in `Rope` + var pos: Int is noinit + # Maximum position in `Rope` (i.e. length - 1) + var max: Int is noinit + + # Current leaf + var str: String is noinit + + init(root: RopeString) is old_style_init do + var r = new RopeIterPiece(root, true, false, null) + pos = 0 + max = root.length - 1 + var rnod: String = root + loop + if rnod isa Concat then + rnod = rnod.left + r = new RopeIterPiece(rnod, true, false, r) + else + str = rnod + r.rdone = true + iter = r + break + end + end end - redef fun next - do - assert is_ok - if sub_pos < curr_substring.length - 1 then - sub_pos += 1 - else - sub_str_iter.next - if sub_str_iter.is_ok then - curr_substring = sub_str_iter.item.value - sub_pos = 0 + init from(root: RopeString, pos: Int) do + var r = new RopeIterPiece(root, true, false, null) + max = root.length - 1 + var rnod: String = root + var off = pos + loop + if rnod isa Concat then + if off >= rnod.left.length then + r.rdone = true + off -= rnod.left.length + rnod = rnod.right + r = new RopeIterPiece(rnod, true, false, r) + else + rnod = rnod.left + r = new RopeIterPiece(rnod, true, false, r) + end else - sub_pos = curr_substring.length + str = rnod + r.rdone = true + iter = r + self.pos = pos - off + break end end - abs_pos += 1 end - redef fun index do return abs_pos - -end - -# Special kind of iterator -# -# Performs a Depth-First Search on RopeLeaf items -# -private class DFSRopeLeafIterator - super RopeIterator + redef fun item do return str - # Stack of the visited nodes in the rope - private var visit_stack = new List[TupleVisitNode] + redef fun is_ok do return pos <= max - # The leaf node being visited by the iterator - private var curr_leaf: nullable LeafNode + redef fun index do return pos - init(tgt: Rope) - do - super - - var first_node = target.parent_node - - if first_node isa ConcatNode then - visit_stack.push(new TupleVisitNode(first_node)) - else if first_node isa LeafNode then - curr_leaf = first_node - return + redef fun next do + pos += str.length + if pos > max then return + var it = iter.prev + var rnod = it.node + loop + if not rnod isa Concat then + it.ldone = true + it.rdone = true + str = rnod + iter = it.as(not null) + break + end + if not it.ldone then + rnod = rnod.left + it.ldone = true + it = new RopeIterPiece(rnod, false, false, it) + else if not it.rdone then + it.rdone = true + rnod = rnod.right + it = new RopeIterPiece(rnod, false, false, it) + else + it = it.prev + rnod = it.node + continue + end end - - next_body end +end - # Creates a new iterator on `tgt` starting at `index` - init with_index(tgt: Rope, index: Int) - do - super +# Implementation of a `StringCharView` for `RopeString` objects +private class RopeChars + super StringCharView - var returned_tuple = target.get_node_for_pos(index) - curr_leaf = returned_tuple.curr_node - visit_stack = returned_tuple.visit_stack - pos = index - returned_tuple.corrected_pos - end + var tgt: RopeString - redef fun is_ok do return curr_leaf != null + init(s: RopeString) is old_style_init do tgt = s - redef fun next - do - assert is_ok - pos += curr_leaf.value.length - next_body + redef fun [](i) do + return tgt[i] end - private fun next_body - do - var next_node: nullable RopeNode - while not visit_stack.is_empty do - var curr_concat_tuple = visit_stack.last - if not curr_concat_tuple.left_visited then - - curr_concat_tuple.left_visited = true + redef fun iterator_from(i) do return new RopeIter.from(tgt, i) - next_node = curr_concat_tuple.node.left_child + redef fun reverse_iterator_from(i) do return new RopeReviter.from(tgt, i) - if next_node == null then continue +end - if next_node isa ConcatNode then - visit_stack.push(new TupleVisitNode(next_node)) - else if next_node isa LeafNode then - curr_leaf = next_node - return - end +# An Iterator over a RopeBuffer. +class RopeBufferIter + super IndexedIterator[Char] - else if not curr_concat_tuple.right_visited then + # Subiterator. + var sit: IndexedIterator[Char] - curr_concat_tuple.right_visited = true + # Native string iterated over. + var ns: NativeString - next_node = curr_concat_tuple.node.right_child + # Current position in `ns`. + var pns: Int - if next_node == null then continue + # Maximum position iterable. + var maxpos: Int - if next_node isa ConcatNode then - visit_stack.push(new TupleVisitNode(next_node)) - else if next_node isa LeafNode then - curr_leaf = next_node - return - end + redef var index: Int - else - visit_stack.pop - end - end - self.curr_leaf = null + # Init the iterator from a RopeBuffer. + init(t: RopeBuffer) is old_style_init do + ns = t.ns + maxpos = t.rpos + sit = t.str.chars.iterator + pns = t.dumped + index = 0 end - redef fun item - do - assert is_ok - return curr_leaf.as(not null) + # Init the iterator from a RopeBuffer starting from `pos`. + init from(t: RopeBuffer, pos: Int) do + ns = t.ns + maxpos = t.length + sit = t.str.chars.iterator_from(pos) + pns = pos - t.str.length + index = pos end -end - -########################################### -# Node classes # -########################################### - -# A node for a Rope -private abstract class RopeNode - - private var _length = 0 - - private var parent: nullable ConcatNode = null - - private var height = 0 - - # The balance factor of a node, if it is a Leaf, it equals its length - # Else, it will be equal to the difference between the height on the left and on the right - private fun balance_factor: Int do return height end - - fun length: Int do return _length + redef fun is_ok do return index < maxpos - private fun length=(len: Int) - do - _length = len + redef fun item do + if sit.is_ok then return sit.item + return ns[pns] end -end - -# Node that represents a concatenation between two nodes (of any RopeNode type) -private class ConcatNode - super RopeNode - - private var _left_child: nullable RopeNode - private var _right_child: nullable RopeNode - private fun left_child: nullable RopeNode - do - if _left_child != null then - return _left_child + redef fun next do + index += 1 + if sit.is_ok then + sit.next else - return null + pns += 1 end end +end - private fun right_child: nullable RopeNode - do - if _right_child != null then - return _right_child - else - return null - end - end +# Reverse iterator over a RopeBuffer. +class RopeBufferReviter + super IndexedIterator[Char] - private fun left_child=(new_node: nullable RopeNode) - do - self._left_child = new_node - new_node.parent = self - update_data - end + # Subiterator. + var sit: IndexedIterator[Char] - private fun right_child=(new_node: nullable RopeNode) - do - self._right_child = new_node - new_node.parent = self - update_data - end + # Native string iterated over. + var ns: NativeString - # Updates the internal data of the current node - # - # Concretely, updates the length and the height of the node - private fun update_data - do - self.length = 0 - self.height = 1 - if left_child != null then - self.length += left_child.length - if left_child.height + 1 > self.height then self.height = left_child.height + 1 - end - if right_child != null then - self.length += right_child.length - if right_child.height + 1 > self.height then self.height = right_child.height + 1 - end - end + # Current position in `ns`. + var pns: Int - # Computes and returns the balance factor (used for AVL trees) - # - # Formula : left.height - right.height - redef private fun balance_factor - do - var left_height = 0 - var right_height = 0 - if left_child != null then left_height = left_child.height - if right_child != null then right_height = right_child.height - return left_height - right_height + redef var index: Int + + # Init the iterator from a RopeBuffer. + init(tgt: RopeBuffer) is old_style_init do + sit = tgt.str.chars.reverse_iterator + pns = tgt.rpos - 1 + index = tgt.length - 1 + ns = tgt.ns end -end -# A leaf node contains a substring of some sort -private class LeafNode - super RopeNode + # Init the iterator from a RopeBuffer starting from `pos`. + init from(tgt: RopeBuffer, pos: Int) do + sit = tgt.str.chars.reverse_iterator_from(pos - tgt.rpos - tgt.dumped) + pns = pos - tgt.str.length + index = pos + ns = tgt.ns + end - # Encapsulated string in the leaf node - private var _value: String + redef fun is_ok do return index > 0 - init(val: AbstractString) - do - self._value = val.to_s - self.length = val.length + redef fun item do + if pns >= 0 then return ns[pns] + return sit.item end - private fun value: String do return self._value - - private fun value= (val: String) - do - _value = val + redef fun next do + index -= 1 + if pns >= 0 then + pns -= 1 + else + sit.next + end end end -##################################################### -# Foreign classes refinement # -##################################################### +# View on the chars of a `RopeBuffer` +class RopeBufferChars + super BufferCharView -redef class String - redef fun ==(other) - do - if other isa Rope then - return other == self + redef type SELFTYPE: RopeBuffer + + redef fun [](i) do + if i < target.str.length then + return target.str[i] else - return super + return target.ns[i - target.str.length] end end -end -redef class Buffer - redef fun ==(other) - do - if other isa Rope then - return other == self + redef fun []=(i,c) do + if i == target.length then target.add c + if i < target.str.length then + var s = target.str + var l = s.substring(0, i) + var r = s.substring_from(i + 1) + target.str = l + c.to_s + r else - return super + target.ns[i - target.str.length] = c end end + + redef fun add(c) do target.add c + + redef fun push(c) do target.add c + + redef fun iterator_from(i) do return new RopeBufferIter.from(target, i) + + redef fun reverse_iterator_from(i) do return new RopeBufferReviter.from(target, i) end