X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/ropes.nit b/lib/standard/ropes.nit index a9f7616..644e3dc 100644 --- a/lib/standard/ropes.nit +++ b/lib/standard/ropes.nit @@ -16,8 +16,12 @@ # # Ropes are a data structure introduced in a 1995 paper from # Hans J. Boehm, Russ Atkinson and Michael Plass. -# See : `Ropes : an Alternative to Strings`, `Software - Practice and Experience, -# Vol. 25(12), 1315-1330 (December 1995)`. +# +# See: +# +# > Ropes: an Alternative to Strings, +# > *Software - Practice and Experience*, +# > Vol. 25(12), 1315-1330 (December 1995). # # The implementation developed here provides an automatic change # of data structure depending on the length of the leaves. @@ -32,11 +36,11 @@ # # Example : # -# ` Concat ` -# ` / \ ` -# ` Concat Concat ` -# ` / \ / \ ` -# `"My" " Name" " is" " Simon." ` +# Concat +# / \ +# Concat Concat +# / \ / \ +# "My" " Name" " is" " Simon." # # Note that the above example is not representative of the actual implementation # of `Ropes`, since short leaves are merged to keep the rope at an acceptable @@ -63,20 +67,20 @@ private abstract class RopeString super Rope super String - redef fun chars is cached do return new RopeChars(self) + redef var chars is lazy do return new RopeChars(self) end # Node that represents a concatenation between two `String` private class Concat super RopeString - redef var length: Int + redef var length: Int is noinit redef fun substrings do return new RopeSubstrings(self) redef fun empty do return "" - redef fun to_cstring is cached do + redef var to_cstring is lazy do var len = length var ns = new NativeString(len + 1) ns[len] = '\0' @@ -94,10 +98,8 @@ private class Concat # Right child of the node var right: String - init(l: String, r: String) do - left = l - right = r - length = l.length + r.length + init do + length = left.length + right.length end redef fun output do @@ -145,19 +147,276 @@ private class Concat redef fun +(o) do var s = o.to_s - var mlen = length var slen = s.length if s isa Concat then return new Concat(self, s) else var r = right var rlen = r.length - if rlen + slen > maxlen then return new Concat(left, new Concat(r, s)) + if rlen + slen > maxlen then return new Concat(self, s) return new Concat(left, r + s) end end end +# Mutable `Rope`, optimized for concatenation operations +# +# A `RopeBuffer` is an efficient way of building a `String` when +# concatenating small strings. +# +# It does concatenations in an optimized way by having a +# mutable part and an immutable part built by efficiently +# concatenating strings in chain. +# +# Every concatenation operation is done by copying a string to +# the mutable part and flushing it when full. +# +# However, when a long string is appended to the `Buffer`, +# the concatenation is done at it would be in a `Rope`. +class RopeBuffer + super Rope + super Buffer + + redef var chars: Sequence[Char] is lazy do return new RopeBufferChars(self) + + # The final string being built on the fly + private var str: String is noinit + + # Current concatenation buffer + private var ns: NativeString is noinit + + # Next available (e.g. unset) character in the `Buffer` + private var rpos = 0 + + # Keeps track of the buffer's currently dumped part + # + # This might happen if for instance, a String was being + # built by concatenating small parts of string and suddenly + # a long string (length > maxlen) is appended. + private var dumped: Int is noinit + + # Length of the complete rope + redef var length = 0 + + # Length of the mutable part + # + # Is also used as base to compute the size of the next + # mutable native string (`ns`) + private var buf_size: Int is noinit + + redef fun substrings: Iterator[String] do return new RopeBufSubstringIterator(self) + + # Builds an empty `RopeBuffer` + init do + str = "" + ns = new NativeString(maxlen) + buf_size = maxlen + dumped = 0 + end + + # Builds a new `RopeBuffer` with `str` in it. + init from(str: String) do + self.str = str + ns = new NativeString(maxlen) + buf_size = maxlen + length = str.length + dumped = 0 + end + + # Resets the informations of the `Buffer` + # + # This is called when doing in-place modifications + # on a previously to_s'd `RopeBuffer` + private fun reset do + var nns = new NativeString(buf_size) + var blen = rpos - dumped + ns.copy_to(nns, blen, dumped, 0) + ns = nns + dumped = 0 + rpos = blen + written = false + end + + redef fun empty do return new RopeBuffer + + redef fun clear do + str = "" + length = 0 + rpos = 0 + dumped = 0 + if written then + ns = new NativeString(buf_size) + written = false + end + end + + redef fun substring(from, count) do + var strlen = str.length + + if from < 0 then + count += from + if count < 0 then count = 0 + from = 0 + end + + if count > length then count = length - from + + if count == 0 then return empty + + if from < strlen then + var subpos = strlen - from + if count <= subpos then + return new RopeBuffer.from(str.substring(from, count)) + else + var l = str.substring_from(from) + var rem = count - subpos + var nns = new NativeString(rem) + ns.copy_to(nns, rem, dumped, 0) + return new RopeBuffer.from(l + nns.to_s_with_length(rem)) + end + else + var nns = new NativeString(count) + ns.copy_to(nns, count, dumped, 0) + return new RopeBuffer.from(nns.to_s_with_length(count)) + end + end + + redef fun append(s) do + var slen = s.length + length += slen + var rp = rpos + if s isa Rope or slen > maxlen then + if rp > 0 and dumped != rp then + str += new FlatString.with_infos(ns, rp - dumped, dumped, rp - 1) + dumped = rp + end + str = str + s + return + end + var remsp = buf_size - rp + var sits: NativeString + var begin: Int + if s isa FlatString then + begin = s.index_from + sits = s.items + else if s isa FlatBuffer then + begin = 0 + sits = s.items + else + if slen <= remsp then + for i in s.chars do + ns[rpos] = i + rpos += 1 + end + else + var spos = 0 + for i in [0..remsp[ do + ns[rpos] = s[spos] + rpos += 1 + spos += 1 + end + dump_buffer + while spos < slen do + ns[rpos] = s[spos] + spos += 1 + rpos += 1 + end + end + return + end + if slen <= remsp then + if remsp <= 0 then + dump_buffer + rpos = 0 + else + sits.copy_to(ns, slen, begin, rp) + rpos += slen + end + else + sits.copy_to(ns, remsp, begin, rp) + rpos = buf_size + dump_buffer + var nlen = slen - remsp + sits.copy_to(ns, nlen, begin + remsp, 0) + rpos = nlen + end + end + + redef fun add(c) do + var rp = rpos + if rp >= buf_size then + dump_buffer + rp = 0 + end + ns[rp] = c + rp += 1 + length += 1 + rpos = rp + end + + # Converts the Buffer to a FlatString, appends it to + # the final String and re-allocates a new larger Buffer. + private fun dump_buffer do + written = false + var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1) + str += nstr + var bs = buf_size + bs = bs * 2 + ns = new NativeString(bs) + buf_size = bs + dumped = 0 + end + + redef fun output do + str.output + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output + end + + # Enlarge is useless here since the `Buffer` + # part is automatically dumped when necessary. + # + # Also, since the buffer can not be overused by a + # single string, there is no need for manual + # resizing. + # + # "You have no power here !" + redef fun enlarge(i) do end + + redef fun to_s do + written = true + var nnslen = rpos - dumped + if nnslen == 0 then return str + return str + new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1) + end + + redef fun reverse do + # Flush the buffer in order to only have to reverse `str`. + if rpos > 0 and dumped != rpos then + str += new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1) + dumped = rpos + end + str = str.reversed + end + + redef fun upper do + if written then reset + str = str.to_upper + var mits = ns + for i in [0 .. rpos[ do + mits[i] = mits[i].to_upper + end + end + + redef fun lower do + if written then reset + str = str.to_lower + var mits = ns + for i in [0 .. rpos[ do + mits[i] = mits[i].to_lower + end + end +end + redef class FlatString redef fun insert_at(s, pos) do @@ -220,7 +479,7 @@ private class RopeReviter # the Rope traversal. var subs: IndexedIterator[String] - init(root: RopeString) do + init(root: RopeString) is old_style_init do pos = root.length - 1 subs = new ReverseRopeSubstrings(root) ns = subs.item @@ -267,7 +526,7 @@ private class RopeIter # Position (char) in the Rope (0-indexed) var pos: Int - init(root: RopeString) do + init(root: RopeString) is old_style_init do subs = new RopeSubstrings(root) pns = 0 str = subs.item @@ -313,7 +572,7 @@ private class ReverseRopeSubstrings # Current leaf var str: String is noinit - init(root: RopeString) do + init(root: RopeString) is old_style_init do var r = new RopeIterPiece(root, false, true, null) pos = root.length - 1 var lnod: String = root @@ -362,7 +621,7 @@ private class ReverseRopeSubstrings redef fun next do if pos < 0 then return - var curr: nullable RopeIterPiece = iter.prev + var curr = iter.prev var currit = curr.node while curr != null do currit = curr.node @@ -388,6 +647,39 @@ private class ReverseRopeSubstrings end end +private class RopeBufSubstringIterator + super Iterator[String] + + # Iterator on the substrings of the building string + var iter: Iterator[String] + # Makes a String out of the buffered part of the Ropebuffer + var nsstr: String + # Did we attain the buffered part ? + var nsstr_done = false + + init(str: RopeBuffer) is old_style_init do + iter = str.str.substrings + nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1) + if str.length == 0 then nsstr_done = true + end + + redef fun is_ok do return iter.is_ok or not nsstr_done + + redef fun item do + assert is_ok + if iter.is_ok then return iter.item + return nsstr + end + + redef fun next do + if iter.is_ok then + iter.next + return + end + nsstr_done = true + end +end + # Substrings of a Rope (i.e. Postfix iterator on leaves) private class RopeSubstrings super IndexedIterator[String] @@ -402,7 +694,7 @@ private class RopeSubstrings # Current leaf var str: String is noinit - init(root: RopeString) do + init(root: RopeString) is old_style_init do var r = new RopeIterPiece(root, true, false, null) pos = 0 max = root.length - 1 @@ -456,7 +748,7 @@ private class RopeSubstrings pos += str.length if pos > max then return var it = iter.prev - var rnod: String = it.node + var rnod = it.node loop if not rnod isa Concat then it.ldone = true @@ -486,16 +778,150 @@ end private class RopeChars super StringCharView - var tgt: RopeString + redef type SELFTYPE: RopeString + + redef fun [](i) do + return target[i] + end + + redef fun iterator_from(i) do return new RopeIter.from(target, i) + + redef fun reverse_iterator_from(i) do return new RopeReviter.from(target, i) + +end + +# An Iterator over a RopeBuffer. +class RopeBufferIter + super IndexedIterator[Char] + + # Subiterator. + var sit: IndexedIterator[Char] + + # Native string iterated over. + var ns: NativeString + + # Current position in `ns`. + var pns: Int + + # Maximum position iterable. + var maxpos: Int + + redef var index: Int + + # Init the iterator from a RopeBuffer. + init(t: RopeBuffer) is old_style_init do + ns = t.ns + maxpos = t.rpos + sit = t.str.chars.iterator + pns = t.dumped + index = 0 + end - init(s: RopeString) do tgt = s + # Init the iterator from a RopeBuffer starting from `pos`. + init from(t: RopeBuffer, pos: Int) do + ns = t.ns + maxpos = t.length + sit = t.str.chars.iterator_from(pos) + pns = pos - t.str.length + index = pos + end + + redef fun is_ok do return index < maxpos + + redef fun item do + if sit.is_ok then return sit.item + return ns[pns] + end + + redef fun next do + index += 1 + if sit.is_ok then + sit.next + else + pns += 1 + end + end +end + +# Reverse iterator over a RopeBuffer. +class RopeBufferReviter + super IndexedIterator[Char] + + # Subiterator. + var sit: IndexedIterator[Char] + + # Native string iterated over. + var ns: NativeString + + # Current position in `ns`. + var pns: Int + + redef var index: Int + + # Init the iterator from a RopeBuffer. + init(tgt: RopeBuffer) is old_style_init do + sit = tgt.str.chars.reverse_iterator + pns = tgt.rpos - 1 + index = tgt.length - 1 + ns = tgt.ns + end + + # Init the iterator from a RopeBuffer starting from `pos`. + init from(tgt: RopeBuffer, pos: Int) do + sit = tgt.str.chars.reverse_iterator_from(pos - tgt.rpos - tgt.dumped) + pns = pos - tgt.str.length + index = pos + ns = tgt.ns + end + + redef fun is_ok do return index > 0 + + redef fun item do + if pns >= 0 then return ns[pns] + return sit.item + end + + redef fun next do + index -= 1 + if pns >= 0 then + pns -= 1 + else + sit.next + end + end +end + +# View on the chars of a `RopeBuffer` +class RopeBufferChars + super BufferCharView + + redef type SELFTYPE: RopeBuffer redef fun [](i) do - return tgt[i] + if i < target.str.length then + return target.str[i] + else + return target.ns[i - target.str.length] + end + end + + redef fun []=(i,c) do + if i == target.length then target.add c + if i < target.str.length then + var s = target.str + var l = s.substring(0, i) + var r = s.substring_from(i + 1) + target.str = l + c.to_s + r + else + target.ns[i - target.str.length] = c + end end - redef fun iterator_from(i) do return new RopeIter.from(tgt, i) + redef fun add(c) do target.add c + + redef fun push(c) do target.add c - redef fun reverse_iterator_from(i) do return new RopeReviter.from(tgt, i) + redef fun iterator_from(i) do return new RopeBufferIter.from(target, i) + redef fun reverse_iterator_from(i) do return new RopeBufferReviter.from(target, i) end