#
# Ropes are a data structure introduced in a 1995 paper from
# Hans J. Boehm, Russ Atkinson and Michael Plass.
-# See : `Ropes : an Alternative to Strings`, `Software - Practice and Experience,
-# Vol. 25(12), 1315-1330 (December 1995)`.
+#
+# See:
+#
+# > Ropes: an Alternative to Strings,
+# > *Software - Practice and Experience*,
+# > Vol. 25(12), 1315-1330 (December 1995).
#
# The implementation developed here provides an automatic change
# of data structure depending on the length of the leaves.
#
# Example :
#
-# ` Concat `
-# ` / \ `
-# ` Concat Concat `
-# ` / \ / \ `
-# `"My" " Name" " is" " Simon." `
+# Concat
+# / \
+# Concat Concat
+# / \ / \
+# "My" " Name" " is" " Simon."
#
# Note that the above example is not representative of the actual implementation
# of `Ropes`, since short leaves are merged to keep the rope at an acceptable
super Rope
super String
- redef fun chars is cached do return new RopeChars(self)
+ redef var chars is lazy do return new RopeChars(self)
end
# Node that represents a concatenation between two `String`
private class Concat
super RopeString
- redef var length: Int
+ redef var length: Int is noinit
redef fun substrings do return new RopeSubstrings(self)
redef fun empty do return ""
- redef fun to_cstring is cached do
+ redef var to_cstring is lazy do
var len = length
var ns = new NativeString(len + 1)
ns[len] = '\0'
# Right child of the node
var right: String
- init(l: String, r: String) do
- left = l
- right = r
- length = l.length + r.length
+ init do
+ length = left.length + right.length
end
redef fun output do
redef fun +(o) do
var s = o.to_s
- var mlen = length
var slen = s.length
if s isa Concat then
return new Concat(self, s)
else
var r = right
var rlen = r.length
- if rlen + slen > maxlen then return new Concat(left, new Concat(r, s))
+ if rlen + slen > maxlen then return new Concat(self, s)
return new Concat(left, r + s)
end
end
end
+# Mutable `Rope`, optimized for concatenation operations
+#
+# A `RopeBuffer` is an efficient way of building a `String` when
+# concatenating small strings.
+#
+# It does concatenations in an optimized way by having a
+# mutable part and an immutable part built by efficiently
+# concatenating strings in chain.
+#
+# Every concatenation operation is done by copying a string to
+# the mutable part and flushing it when full.
+#
+# However, when a long string is appended to the `Buffer`,
+# the concatenation is done at it would be in a `Rope`.
+class RopeBuffer
+ super Rope
+ super Buffer
+
+ redef var chars: Sequence[Char] is lazy do return new RopeBufferChars(self)
+
+ # The final string being built on the fly
+ private var str: String is noinit
+
+ # Current concatenation buffer
+ private var ns: NativeString is noinit
+
+ # Next available (e.g. unset) character in the `Buffer`
+ private var rpos = 0
+
+ # Keeps track of the buffer's currently dumped part
+ #
+ # This might happen if for instance, a String was being
+ # built by concatenating small parts of string and suddenly
+ # a long string (length > maxlen) is appended.
+ private var dumped: Int is noinit
+
+ # Length of the complete rope
+ redef var length = 0
+
+ # Length of the mutable part
+ #
+ # Is also used as base to compute the size of the next
+ # mutable native string (`ns`)
+ private var buf_size: Int is noinit
+
+ redef fun substrings: Iterator[String] do return new RopeBufSubstringIterator(self)
+
+ # Builds an empty `RopeBuffer`
+ init do
+ str = ""
+ ns = new NativeString(maxlen)
+ buf_size = maxlen
+ dumped = 0
+ end
+
+ # Builds a new `RopeBuffer` with `str` in it.
+ init from(str: String) do
+ self.str = str
+ ns = new NativeString(maxlen)
+ buf_size = maxlen
+ length = str.length
+ dumped = 0
+ end
+
+ # Resets the informations of the `Buffer`
+ #
+ # This is called when doing in-place modifications
+ # on a previously to_s'd `RopeBuffer`
+ private fun reset do
+ var nns = new NativeString(buf_size)
+ var blen = rpos - dumped
+ ns.copy_to(nns, blen, dumped, 0)
+ ns = nns
+ dumped = 0
+ rpos = blen
+ written = false
+ end
+
+ redef fun empty do return new RopeBuffer
+
+ redef fun clear do
+ str = ""
+ length = 0
+ rpos = 0
+ dumped = 0
+ if written then
+ ns = new NativeString(buf_size)
+ written = false
+ end
+ end
+
+ redef fun substring(from, count) do
+ var strlen = str.length
+
+ if from < 0 then
+ count += from
+ if count < 0 then count = 0
+ from = 0
+ end
+
+ if count > length then count = length - from
+
+ if count == 0 then return empty
+
+ if from < strlen then
+ var subpos = strlen - from
+ if count <= subpos then
+ return new RopeBuffer.from(str.substring(from, count))
+ else
+ var l = str.substring_from(from)
+ var rem = count - subpos
+ var nns = new NativeString(rem)
+ ns.copy_to(nns, rem, dumped, 0)
+ return new RopeBuffer.from(l + nns.to_s_with_length(rem))
+ end
+ else
+ var nns = new NativeString(count)
+ ns.copy_to(nns, count, dumped, 0)
+ return new RopeBuffer.from(nns.to_s_with_length(count))
+ end
+ end
+
+ redef fun append(s) do
+ var slen = s.length
+ length += slen
+ var rp = rpos
+ if s isa Rope or slen > maxlen then
+ if rp > 0 and dumped != rp then
+ str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1)
+ dumped = rp
+ end
+ str = str + s
+ return
+ end
+ var remsp = buf_size - rp
+ var sits: NativeString
+ var begin: Int
+ if s isa FlatString then
+ begin = s.index_from
+ sits = s.items
+ else if s isa FlatBuffer then
+ begin = 0
+ sits = s.items
+ else
+ if slen <= remsp then
+ for i in s.chars do
+ ns[rpos] = i
+ rpos += 1
+ end
+ else
+ var spos = 0
+ for i in [0..remsp[ do
+ ns[rpos] = s[spos]
+ rpos += 1
+ spos += 1
+ end
+ dump_buffer
+ while spos < slen do
+ ns[rpos] = s[spos]
+ spos += 1
+ rpos += 1
+ end
+ end
+ return
+ end
+ if slen <= remsp then
+ if remsp <= 0 then
+ dump_buffer
+ rpos = 0
+ else
+ sits.copy_to(ns, slen, begin, rp)
+ rpos += slen
+ end
+ else
+ sits.copy_to(ns, remsp, begin, rp)
+ rpos = buf_size
+ dump_buffer
+ var nlen = slen - remsp
+ sits.copy_to(ns, nlen, begin + remsp, 0)
+ rpos = nlen
+ end
+ end
+
+ redef fun add(c) do
+ var rp = rpos
+ if rp >= buf_size then
+ dump_buffer
+ rp = 0
+ end
+ ns[rp] = c
+ rp += 1
+ length += 1
+ rpos = rp
+ end
+
+ # Converts the Buffer to a FlatString, appends it to
+ # the final String and re-allocates a new larger Buffer.
+ private fun dump_buffer do
+ written = false
+ var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
+ str += nstr
+ var bs = buf_size
+ bs = bs * 2
+ ns = new NativeString(bs)
+ buf_size = bs
+ dumped = 0
+ end
+
+ redef fun output do
+ str.output
+ new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output
+ end
+
+ # Enlarge is useless here since the `Buffer`
+ # part is automatically dumped when necessary.
+ #
+ # Also, since the buffer can not be overused by a
+ # single string, there is no need for manual
+ # resizing.
+ #
+ # "You have no power here !"
+ redef fun enlarge(i) do end
+
+ redef fun to_s do
+ written = true
+ var nnslen = rpos - dumped
+ if nnslen == 0 then return str
+ return str + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
+ end
+
+ redef fun reverse do
+ # Flush the buffer in order to only have to reverse `str`.
+ if rpos > 0 and dumped != rpos then
+ str += new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
+ dumped = rpos
+ end
+ str = str.reversed
+ end
+
+ redef fun upper do
+ if written then reset
+ str = str.to_upper
+ var mits = ns
+ for i in [0 .. rpos[ do
+ mits[i] = mits[i].to_upper
+ end
+ end
+
+ redef fun lower do
+ if written then reset
+ str = str.to_lower
+ var mits = ns
+ for i in [0 .. rpos[ do
+ mits[i] = mits[i].to_lower
+ end
+ end
+end
+
redef class FlatString
redef fun insert_at(s, pos) do
# the Rope traversal.
var subs: IndexedIterator[String]
- init(root: RopeString) do
+ init(root: RopeString) is old_style_init do
pos = root.length - 1
subs = new ReverseRopeSubstrings(root)
ns = subs.item
# Position (char) in the Rope (0-indexed)
var pos: Int
- init(root: RopeString) do
+ init(root: RopeString) is old_style_init do
subs = new RopeSubstrings(root)
pns = 0
str = subs.item
# Current leaf
var str: String is noinit
- init(root: RopeString) do
+ init(root: RopeString) is old_style_init do
var r = new RopeIterPiece(root, false, true, null)
pos = root.length - 1
var lnod: String = root
redef fun next do
if pos < 0 then return
- var curr: nullable RopeIterPiece = iter.prev
+ var curr = iter.prev
var currit = curr.node
while curr != null do
currit = curr.node
end
end
+private class RopeBufSubstringIterator
+ super Iterator[String]
+
+ # Iterator on the substrings of the building string
+ var iter: Iterator[String]
+ # Makes a String out of the buffered part of the Ropebuffer
+ var nsstr: String
+ # Did we attain the buffered part ?
+ var nsstr_done = false
+
+ init(str: RopeBuffer) is old_style_init do
+ iter = str.str.substrings
+ nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1)
+ if str.length == 0 then nsstr_done = true
+ end
+
+ redef fun is_ok do return iter.is_ok or not nsstr_done
+
+ redef fun item do
+ assert is_ok
+ if iter.is_ok then return iter.item
+ return nsstr
+ end
+
+ redef fun next do
+ if iter.is_ok then
+ iter.next
+ return
+ end
+ nsstr_done = true
+ end
+end
+
# Substrings of a Rope (i.e. Postfix iterator on leaves)
private class RopeSubstrings
super IndexedIterator[String]
# Current leaf
var str: String is noinit
- init(root: RopeString) do
+ init(root: RopeString) is old_style_init do
var r = new RopeIterPiece(root, true, false, null)
pos = 0
max = root.length - 1
pos += str.length
if pos > max then return
var it = iter.prev
- var rnod: String = it.node
+ var rnod = it.node
loop
if not rnod isa Concat then
it.ldone = true
private class RopeChars
super StringCharView
- var tgt: RopeString
+ redef type SELFTYPE: RopeString
+
+ redef fun [](i) do
+ return target[i]
+ end
+
+ redef fun iterator_from(i) do return new RopeIter.from(target, i)
+
+ redef fun reverse_iterator_from(i) do return new RopeReviter.from(target, i)
+
+end
+
+# An Iterator over a RopeBuffer.
+class RopeBufferIter
+ super IndexedIterator[Char]
+
+ # Subiterator.
+ var sit: IndexedIterator[Char]
+
+ # Native string iterated over.
+ var ns: NativeString
+
+ # Current position in `ns`.
+ var pns: Int
+
+ # Maximum position iterable.
+ var maxpos: Int
+
+ redef var index: Int
+
+ # Init the iterator from a RopeBuffer.
+ init(t: RopeBuffer) is old_style_init do
+ ns = t.ns
+ maxpos = t.rpos
+ sit = t.str.chars.iterator
+ pns = t.dumped
+ index = 0
+ end
- init(s: RopeString) do tgt = s
+ # Init the iterator from a RopeBuffer starting from `pos`.
+ init from(t: RopeBuffer, pos: Int) do
+ ns = t.ns
+ maxpos = t.length
+ sit = t.str.chars.iterator_from(pos)
+ pns = pos - t.str.length
+ index = pos
+ end
+
+ redef fun is_ok do return index < maxpos
+
+ redef fun item do
+ if sit.is_ok then return sit.item
+ return ns[pns]
+ end
+
+ redef fun next do
+ index += 1
+ if sit.is_ok then
+ sit.next
+ else
+ pns += 1
+ end
+ end
+end
+
+# Reverse iterator over a RopeBuffer.
+class RopeBufferReviter
+ super IndexedIterator[Char]
+
+ # Subiterator.
+ var sit: IndexedIterator[Char]
+
+ # Native string iterated over.
+ var ns: NativeString
+
+ # Current position in `ns`.
+ var pns: Int
+
+ redef var index: Int
+
+ # Init the iterator from a RopeBuffer.
+ init(tgt: RopeBuffer) is old_style_init do
+ sit = tgt.str.chars.reverse_iterator
+ pns = tgt.rpos - 1
+ index = tgt.length - 1
+ ns = tgt.ns
+ end
+
+ # Init the iterator from a RopeBuffer starting from `pos`.
+ init from(tgt: RopeBuffer, pos: Int) do
+ sit = tgt.str.chars.reverse_iterator_from(pos - tgt.rpos - tgt.dumped)
+ pns = pos - tgt.str.length
+ index = pos
+ ns = tgt.ns
+ end
+
+ redef fun is_ok do return index > 0
+
+ redef fun item do
+ if pns >= 0 then return ns[pns]
+ return sit.item
+ end
+
+ redef fun next do
+ index -= 1
+ if pns >= 0 then
+ pns -= 1
+ else
+ sit.next
+ end
+ end
+end
+
+# View on the chars of a `RopeBuffer`
+class RopeBufferChars
+ super BufferCharView
+
+ redef type SELFTYPE: RopeBuffer
redef fun [](i) do
- return tgt[i]
+ if i < target.str.length then
+ return target.str[i]
+ else
+ return target.ns[i - target.str.length]
+ end
+ end
+
+ redef fun []=(i,c) do
+ if i == target.length then target.add c
+ if i < target.str.length then
+ var s = target.str
+ var l = s.substring(0, i)
+ var r = s.substring_from(i + 1)
+ target.str = l + c.to_s + r
+ else
+ target.ns[i - target.str.length] = c
+ end
end
- redef fun iterator_from(i) do return new RopeIter.from(tgt, i)
+ redef fun add(c) do target.add c
+
+ redef fun push(c) do target.add c
- redef fun reverse_iterator_from(i) do return new RopeReviter.from(tgt, i)
+ redef fun iterator_from(i) do return new RopeBufferIter.from(target, i)
+ redef fun reverse_iterator_from(i) do return new RopeBufferReviter.from(target, i)
end