# This file is part of NIT (http://www.nitlanguage.org). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Tree-based representation of a String. # # Ropes are a data structure introduced in a 1995 paper from # Hans J. Boehm, Russ Atkinson and Michael Plass. # # See: # # > Ropes: an Alternative to Strings, # > *Software - Practice and Experience*, # > Vol. 25(12), 1315-1330 (December 1995). # # The implementation developed here provides an automatic change # of data structure depending on the length of the leaves. # # The length from which a `Rope` is built from a `flat` string # if defined at top-level (see `maxlen`) and can be redefined by clients # depending on their needs (e.g. if you need to bench the speed of # the creation of concat nodes, lower the size of maxlen). # # A Rope as defined in the original paper is a Tree made of concatenation # nodes and containing `Flat` (that is Array-based) strings as Leaves. # # Example : # # ~~~raw # Concat # / \ # Concat Concat # / \ / \ # "My" " Name" " is" " Simon." # ~~~ # # Note that the above example is not representative of the actual implementation # of `Ropes`, since short leaves are merged to keep the rope at an acceptable # height (hence, this rope here might actually be a `FlatString` !). module ropes intrude import flat # Maxlen is the maximum length of a Leaf node # # When concatenating two leaves, if `new_length` > `maxlen`, # A `Concat` node is created instead # # Its purpose is to limit the depth of the `Rope` (this # improves performance when accessing/iterating). fun maxlen: Int do return 64 # String using a tree-based representation with leaves as `FlatStrings` private abstract class Rope super Text end # Node that represents a concatenation between two `String` private class Concat super Rope super String redef var chars is lazy do return new RopeChars(self) redef var length is noinit redef fun substrings do return new RopeSubstrings(self) redef fun empty do return "" redef var to_cstring is lazy do var len = length var ns = new NativeString(len + 1) ns[len] = '\0' var off = 0 for i in substrings do var ilen = i.length i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).index_from, off) off += ilen end return ns end # Left child of the node var left: String # Right child of the node var right: String init do length = left.length + right.length end redef fun output do left.output right.output end redef fun iterator do return new RopeIter(self) redef fun *(i) do var x: String = self for j in [1 .. i[ do x += self return x end redef fun [](i) do var llen = left.length if i >= llen then return right[i - llen] return left[i] end redef fun substring(from, len) do var llen = left.length if from < llen then if from + len < llen then return left.substring(from,len) var lsublen = llen - from return left.substring_from(from) + right.substring(0, len - lsublen) else return right.substring(from - llen, len) end end redef fun reversed do return new Concat(right.reversed, left.reversed) redef fun insert_at(s, pos) do if pos > left.length then return left + right.insert_at(s, pos - left.length) end return left.insert_at(s, pos) + right end redef fun to_upper do return new Concat(left.to_upper, right.to_upper) redef fun to_lower do return new Concat(left.to_lower, right.to_lower) redef fun +(o) do var s = o.to_s var slen = s.length if s isa Concat then return new Concat(self, s) else var r = right var rlen = r.length if rlen + slen > maxlen then return new Concat(self, s) return new Concat(left, r + s) end end redef fun copy_to_native(dest, n, src_offset, dest_offset) do var subs = new RopeSubstrings.from(self, src_offset) var st = src_offset - subs.pos var off = dest_offset while n > 0 do var it = subs.item if n > it.length then var cplen = it.length - st it.items.copy_to(dest, cplen, st, off) off += cplen n -= cplen else it.items.copy_to(dest, n, st, off) n = 0 end subs.next st = 0 end end end # Mutable `Rope`, optimized for concatenation operations # # A `RopeBuffer` is an efficient way of building a `String` when # concatenating small strings. # # It does concatenations in an optimized way by having a # mutable part and an immutable part built by efficiently # concatenating strings in chain. # # Every concatenation operation is done by copying a string to # the mutable part and flushing it when full. # # However, when a long string is appended to the `Buffer`, # the concatenation is done at it would be in a `Rope`. class RopeBuffer super Rope super Buffer redef var chars: Sequence[Char] is lazy do return new RopeBufferChars(self) # The final string being built on the fly private var str: String is noinit # Current concatenation buffer private var ns: NativeString is noinit # Next available (e.g. unset) character in the `Buffer` private var rpos = 0 # Keeps track of the buffer's currently dumped part # # This might happen if for instance, a String was being # built by concatenating small parts of string and suddenly # a long string (length > maxlen) is appended. private var dumped: Int is noinit # Length of the complete rope redef var length = 0 # Length of the mutable part # # Is also used as base to compute the size of the next # mutable native string (`ns`) private var buf_size: Int is noinit redef fun substrings do return new RopeBufSubstringIterator(self) # Builds an empty `RopeBuffer` init do str = "" ns = new NativeString(maxlen) buf_size = maxlen dumped = 0 end # Builds a new `RopeBuffer` with `str` in it. init from(str: String) do self.str = str ns = new NativeString(maxlen) buf_size = maxlen length = str.length dumped = 0 end # Resets the informations of the `Buffer` # # This is called when doing in-place modifications # on a previously to_s'd `RopeBuffer` private fun reset do var nns = new NativeString(buf_size) var blen = rpos - dumped ns.copy_to(nns, blen, dumped, 0) ns = nns dumped = 0 rpos = blen written = false end redef fun empty do return new RopeBuffer redef fun clear do str = "" length = 0 rpos = 0 dumped = 0 if written then ns = new NativeString(buf_size) written = false end end redef fun substring(from, count) do var strlen = str.length if from < 0 then count += from if count < 0 then count = 0 from = 0 end if count > length then count = length - from if count == 0 then return empty if from < strlen then var subpos = strlen - from if count <= subpos then return new RopeBuffer.from(str.substring(from, count)) else var l = str.substring_from(from) var rem = count - subpos var nns = new NativeString(rem) ns.copy_to(nns, rem, dumped, 0) return new RopeBuffer.from(l + nns.to_s_with_length(rem)) end else var nns = new NativeString(count) ns.copy_to(nns, count, dumped, 0) return new RopeBuffer.from(nns.to_s_with_length(count)) end end redef fun append(s) do var slen = s.length length += slen var rp = rpos if s isa Rope or slen > maxlen then if rp > 0 and dumped != rp then str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1) dumped = rp end str = str + s return end var remsp = buf_size - rp var sits: NativeString var begin: Int if s isa FlatString then begin = s.index_from sits = s.items else if s isa FlatBuffer then begin = 0 sits = s.items else if slen <= remsp then for i in s.chars do ns[rpos] = i rpos += 1 end else var spos = 0 for i in [0..remsp[ do ns[rpos] = s[spos] rpos += 1 spos += 1 end dump_buffer while spos < slen do ns[rpos] = s[spos] spos += 1 rpos += 1 end end return end if slen <= remsp then if remsp <= 0 then dump_buffer rpos = 0 else sits.copy_to(ns, slen, begin, rp) rpos += slen end else sits.copy_to(ns, remsp, begin, rp) rpos = buf_size dump_buffer var nlen = slen - remsp sits.copy_to(ns, nlen, begin + remsp, 0) rpos = nlen end end redef fun add(c) do var rp = rpos if rp >= buf_size then dump_buffer rp = 0 end ns[rp] = c rp += 1 length += 1 rpos = rp end # Converts the Buffer to a FlatString, appends it to # the final String and re-allocates a new larger Buffer. private fun dump_buffer do written = false var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1) str += nstr var bs = buf_size bs = bs * 2 ns = new NativeString(bs) buf_size = bs dumped = 0 end redef fun output do str.output new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output end # Enlarge is useless here since the `Buffer` # part is automatically dumped when necessary. # # Also, since the buffer can not be overused by a # single string, there is no need for manual # resizing. # # "You have no power here !" redef fun enlarge(i) do end redef fun to_s do written = true var nnslen = rpos - dumped if nnslen == 0 then return str return str + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1) end redef fun reverse do # Flush the buffer in order to only have to reverse `str`. if rpos > 0 and dumped != rpos then str += new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1) dumped = rpos end str = str.reversed end redef fun upper do if written then reset str = str.to_upper var mits = ns for i in [0 .. rpos[ do mits[i] = mits[i].to_upper end end redef fun lower do if written then reset str = str.to_lower var mits = ns for i in [0 .. rpos[ do mits[i] = mits[i].to_lower end end end redef class FlatString redef fun insert_at(s, pos) do var l = substring(0, pos) var r = substring_from(pos) return l + s + r end redef fun +(o) do var s = o.to_s var slen = s.length var mlen = length if slen == 0 then return self if mlen == 0 then return s var nlen = slen + mlen if s isa FlatString then if nlen > maxlen then return new Concat(self, s) var mits = items var sifrom = s.index_from var mifrom = index_from var sits = s.items var ns = new NativeString(nlen + 1) mits.copy_to(ns, mlen, mifrom, 0) sits.copy_to(ns, slen, sifrom, mlen) return ns.to_s_with_length(nlen) else if s isa Concat then var sl = s.left var sllen = sl.length if sllen + mlen > maxlen then return new Concat(self, s) return new Concat(self + sl, s.right) else abort end end end # A simple linked list for use with iterators private class RopeIterPiece # The encapsulated node of the `Rope` var node: String # Was its left child (if any) visited ? var ldone: Bool # Was its right child (if any) visited ? var rdone: Bool # The previous node in the list. var prev: nullable RopeIterPiece end # A reverse iterator capable of working with `Rope` objects private class RopeReviter super IndexedIterator[Char] # Current NativeString var ns: String # Current position in NativeString var pns: Int # Position in the Rope (0-indexed) var pos: Int # Iterator on the substrings, does the Postfix part of # the Rope traversal. var subs: IndexedIterator[String] init(root: Concat) is old_style_init do pos = root.length - 1 subs = new ReverseRopeSubstrings(root) ns = subs.item pns = ns.length - 1 end init from(root: Concat, pos: Int) do self.pos = pos subs = new ReverseRopeSubstrings.from(root, pos) ns = subs.item pns = pos - subs.index end redef fun index do return pos redef fun is_ok do return pos >= 0 redef fun item do return ns[pns] redef fun next do pns -= 1 pos -= 1 if pns >= 0 then return if not subs.is_ok then return subs.next if not subs.is_ok then return ns = subs.item pns = ns.length - 1 end end # Forward iterator on the chars of a `Rope` private class RopeIter super IndexedIterator[Char] # Position in current `String` var pns: Int # Current `String` being iterated on var str: String # Substrings of the Rope var subs: IndexedIterator[String] # Maximum position to iterate on (e.g. Rope.length) var max: Int # Position (char) in the Rope (0-indexed) var pos: Int init(root: Concat) is old_style_init do subs = new RopeSubstrings(root) pns = 0 str = subs.item max = root.length - 1 pos = 0 end init from(root: Concat, pos: Int) do subs = new RopeSubstrings.from(root, pos) pns = pos - subs.index self.pos = pos str = subs.item max = root.length - 1 end redef fun item do return str[pns] redef fun is_ok do return pos <= max redef fun index do return pos redef fun next do pns += 1 pos += 1 if pns < subs.item.length then return if not subs.is_ok then return subs.next if not subs.is_ok then return str = subs.item pns = 0 end end # Substrings of a Rope (i.e. Reverse postfix iterator on leaves) private class ReverseRopeSubstrings super IndexedIterator[String] # Visit Stack var iter: RopeIterPiece is noinit # Position in `Rope` var pos: Int is noinit # Current leaf var str: String is noinit init(root: Concat) is old_style_init do var r = new RopeIterPiece(root, false, true, null) pos = root.length - 1 var lnod: String = root loop if lnod isa Concat then lnod = lnod.right r = new RopeIterPiece(lnod, false, true, r) else str = lnod iter = r break end end end init from(root: Concat, pos: Int) do var r = new RopeIterPiece(root, false, true, null) var rnod: String = root var off = pos loop if rnod isa Concat then if off >= rnod.left.length then off -= rnod.left.length rnod = rnod.right r = new RopeIterPiece(rnod, false, true, r) else r.ldone = true rnod = rnod.left r = new RopeIterPiece(rnod, false, true, r) end else str = rnod r.ldone = true iter = r self.pos = pos - off break end end end redef fun item do return str redef fun index do return pos redef fun is_ok do return pos >= 0 redef fun next do if pos < 0 then return var curr = iter.prev var currit = curr.node while curr != null do currit = curr.node if not currit isa Concat then str = currit pos -= str.length iter = curr return end if not curr.rdone then curr.rdone = true curr = new RopeIterPiece(currit.right, false, false, curr) continue end if not curr.ldone then curr.ldone = true curr = new RopeIterPiece(currit.left, false, false, curr) continue end curr = curr.prev end pos = -1 end end private class RopeBufSubstringIterator super Iterator[FlatText] # Iterator on the substrings of the building string var iter: Iterator[FlatText] # Makes a String out of the buffered part of the Ropebuffer var nsstr: FlatString # Did we attain the buffered part ? var nsstr_done = false init(str: RopeBuffer) is old_style_init do iter = str.str.substrings nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1) if str.length == 0 then nsstr_done = true end redef fun is_ok do return iter.is_ok or not nsstr_done redef fun item do assert is_ok if iter.is_ok then return iter.item return nsstr end redef fun next do if iter.is_ok then iter.next return end nsstr_done = true end end # Substrings of a Rope (i.e. Postfix iterator on leaves) private class RopeSubstrings super IndexedIterator[FlatString] # Visit Stack var iter: RopeIterPiece is noinit # Position in `Rope` var pos: Int is noinit # Maximum position in `Rope` (i.e. length - 1) var max: Int is noinit # Current leaf var str: FlatString is noinit init(root: Concat) is old_style_init do var r = new RopeIterPiece(root, true, false, null) pos = 0 max = root.length - 1 var rnod: String = root loop if rnod isa Concat then rnod = rnod.left r = new RopeIterPiece(rnod, true, false, r) else str = rnod.as(FlatString) r.rdone = true iter = r break end end end init from(root: Concat, pos: Int) do var r = new RopeIterPiece(root, true, false, null) max = root.length - 1 var rnod: String = root var off = pos loop if rnod isa Concat then if off >= rnod.left.length then r.rdone = true off -= rnod.left.length rnod = rnod.right r = new RopeIterPiece(rnod, true, false, r) else rnod = rnod.left r = new RopeIterPiece(rnod, true, false, r) end else str = rnod.as(FlatString) r.rdone = true iter = r self.pos = pos - off break end end end redef fun item do return str redef fun is_ok do return pos <= max redef fun index do return pos redef fun next do pos += str.length if pos > max then return var it = iter.prev var rnod = it.node loop if not rnod isa Concat then it.ldone = true it.rdone = true str = rnod.as(FlatString) iter = it.as(not null) break end if not it.ldone then rnod = rnod.left it.ldone = true it = new RopeIterPiece(rnod, false, false, it) else if not it.rdone then it.rdone = true rnod = rnod.right it = new RopeIterPiece(rnod, false, false, it) else it = it.prev rnod = it.node continue end end end end # Implementation of a `StringCharView` for `Concat` objects private class RopeChars super StringCharView redef type SELFTYPE: Concat redef fun [](i) do return target[i] end redef fun iterator_from(i) do return new RopeIter.from(target, i) redef fun reverse_iterator_from(i) do return new RopeReviter.from(target, i) end # An Iterator over a RopeBuffer. class RopeBufferIter super IndexedIterator[Char] # Subiterator. var sit: IndexedIterator[Char] # Native string iterated over. var ns: NativeString # Current position in `ns`. var pns: Int # Maximum position iterable. var maxpos: Int redef var index # Init the iterator from a RopeBuffer. init(t: RopeBuffer) is old_style_init do ns = t.ns maxpos = t.rpos sit = t.str.chars.iterator pns = t.dumped index = 0 end # Init the iterator from a RopeBuffer starting from `pos`. init from(t: RopeBuffer, pos: Int) do ns = t.ns maxpos = t.length sit = t.str.chars.iterator_from(pos) pns = pos - t.str.length index = pos end redef fun is_ok do return index < maxpos redef fun item do if sit.is_ok then return sit.item return ns[pns] end redef fun next do index += 1 if sit.is_ok then sit.next else pns += 1 end end end # Reverse iterator over a RopeBuffer. class RopeBufferReviter super IndexedIterator[Char] # Subiterator. var sit: IndexedIterator[Char] # Native string iterated over. var ns: NativeString # Current position in `ns`. var pns: Int redef var index # Init the iterator from a RopeBuffer. init(tgt: RopeBuffer) is old_style_init do sit = tgt.str.chars.reverse_iterator pns = tgt.rpos - 1 index = tgt.length - 1 ns = tgt.ns end # Init the iterator from a RopeBuffer starting from `pos`. init from(tgt: RopeBuffer, pos: Int) do sit = tgt.str.chars.reverse_iterator_from(pos - tgt.rpos - tgt.dumped) pns = pos - tgt.str.length index = pos ns = tgt.ns end redef fun is_ok do return index > 0 redef fun item do if pns >= 0 then return ns[pns] return sit.item end redef fun next do index -= 1 if pns >= 0 then pns -= 1 else sit.next end end end # View on the chars of a `RopeBuffer` class RopeBufferChars super BufferCharView redef type SELFTYPE: RopeBuffer redef fun [](i) do if i < target.str.length then return target.str[i] else return target.ns[i - target.str.length] end end redef fun []=(i,c) do if i == target.length then target.add c if i < target.str.length then var s = target.str var l = s.substring(0, i) var r = s.substring_from(i + 1) target.str = l + c.to_s + r else target.ns[i - target.str.length] = c end end redef fun add(c) do target.add c redef fun push(c) do target.add c redef fun iterator_from(i) do return new RopeBufferIter.from(target, i) redef fun reverse_iterator_from(i) do return new RopeBufferReviter.from(target, i) end