lib/standard/ropes: Rewrote ropes base block.
authorLucas Bajolet <r4pass@hotmail.com>
Tue, 21 Oct 2014 14:25:41 +0000 (10:25 -0400)
committerLucas Bajolet <r4pass@hotmail.com>
Tue, 4 Nov 2014 20:44:41 +0000 (15:44 -0500)
Signed-off-by: Lucas Bajolet <r4pass@hotmail.com>

lib/standard/ropes.nit [new file with mode: 0644]

diff --git a/lib/standard/ropes.nit b/lib/standard/ropes.nit
new file mode 100644 (file)
index 0000000..a9f7616
--- /dev/null
@@ -0,0 +1,501 @@
+# This file is part of NIT (http://www.nitlanguage.org).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Tree-based representation of a String.
+#
+# Ropes are a data structure introduced in a 1995 paper from
+# Hans J. Boehm, Russ Atkinson and Michael Plass.
+# See : `Ropes : an Alternative to Strings`, `Software - Practice and Experience,
+# Vol. 25(12), 1315-1330 (December 1995)`.
+#
+# The implementation developed here provides an automatic change
+# of data structure depending on the length of the leaves.
+#
+# The length from which a `Rope` is built from a `flat` string
+# if defined at top-level (see `maxlen`) and can be redefined by clients
+# depending on their needs (e.g. if you need to bench the speed of
+# the creation of concat nodes, lower the size of maxlen).
+#
+# A Rope as defined in the original paper is a Tree made of concatenation
+# nodes and containing `Flat` (that is Array-based) strings as Leaves.
+#
+# Example :
+#
+# `            Concat                   `
+# `          /        \                 `
+# `    Concat          Concat           `
+# `   /      \        /      \          `
+# `"My"     " Name"  " is"   " Simon."  `
+#
+# Note that the above example is not representative of the actual implementation
+# of `Ropes`, since short leaves are merged to keep the rope at an acceptable
+# height (hence, this rope here might actually be a `FlatString` !).
+module ropes
+
+intrude import string
+
+# Maxlen is the maximum length of a Leaf node
+#
+# When concatenating two leaves, if `new_length` > `maxlen`,
+# A `Concat` node is created instead
+#
+# Its purpose is to limit the depth of the `Rope` (this
+# improves performance when accessing/iterating).
+fun maxlen: Int do return 64
+
+# String using a tree-based representation with leaves as `FlatStrings`
+private abstract class Rope
+       super Text
+end
+
+private abstract class RopeString
+       super Rope
+       super String
+
+       redef fun chars is cached do return new RopeChars(self)
+end
+
+# Node that represents a concatenation between two `String`
+private class Concat
+       super RopeString
+
+       redef var length: Int
+
+       redef fun substrings do return new RopeSubstrings(self)
+
+       redef fun empty do return ""
+
+       redef fun to_cstring is cached do
+               var len = length
+               var ns = new NativeString(len + 1)
+               ns[len] = '\0'
+               var off = 0
+               for i in substrings do
+                       var ilen = i.length
+                       i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).index_from, off)
+                       off += ilen
+               end
+               return ns
+       end
+
+       # Left child of the node
+       var left: String
+       # Right child of the node
+       var right: String
+
+       init(l: String, r: String) do
+               left = l
+               right = r
+               length = l.length + r.length
+       end
+
+       redef fun output do
+               left.output
+               right.output
+       end
+
+       redef fun iterator do return new RopeIter(self)
+
+       redef fun *(i) do
+               var x: String = self
+               for j in [1 .. i[ do x += self
+               return x
+       end
+
+       redef fun [](i) do
+               var llen = left.length
+               if i >= llen then return right[i - llen]
+               return left[i]
+       end
+
+       redef fun substring(from, len) do
+               var llen = left.length
+               if from < llen then
+                       if from + len < llen then return left.substring(from,len)
+                       var lsublen = llen - from
+                       return left.substring_from(from) + right.substring(0, len - lsublen)
+               else
+                       return right.substring(from - llen, len)
+               end
+       end
+
+       redef fun reversed do return new Concat(right.reversed, left.reversed)
+
+       redef fun insert_at(s, pos) do
+               if pos > left.length then
+                       return left + right.insert_at(s, pos - left.length)
+               end
+               return left.insert_at(s, pos) + right
+       end
+
+       redef fun to_upper do return new Concat(left.to_upper, right.to_upper)
+
+       redef fun to_lower do return new Concat(left.to_lower, right.to_lower)
+
+       redef fun +(o) do
+               var s = o.to_s
+               var mlen = length
+               var slen = s.length
+               if s isa Concat then
+                       return new Concat(self, s)
+               else
+                       var r = right
+                       var rlen = r.length
+                       if rlen + slen > maxlen then return new Concat(left, new Concat(r, s))
+                       return new Concat(left, r + s)
+               end
+       end
+end
+
+redef class FlatString
+
+       redef fun insert_at(s, pos) do
+               var l = substring(0, pos)
+               var r = substring_from(pos)
+               return l + s + r
+       end
+
+       redef fun +(o) do
+               var s = o.to_s
+               var slen = s.length
+               var mlen = length
+               if slen == 0 then return self
+               if mlen == 0 then return s
+               var nlen = slen + mlen
+               if s isa FlatString then
+                       if nlen > maxlen then return new Concat(self, s)
+                       var mits = items
+                       var sifrom = s.index_from
+                       var mifrom = index_from
+                       var sits = s.items
+                       var ns = new NativeString(nlen + 1)
+                       mits.copy_to(ns, mlen, mifrom, 0)
+                       sits.copy_to(ns, slen, sifrom, mlen)
+                       return ns.to_s_with_length(nlen)
+               else if s isa Concat then
+                       var sl = s.left
+                       var sllen = sl.length
+                       if sllen + mlen > maxlen then return new Concat(self, s)
+                       return new Concat(self + sl, s.right)
+               else
+                       abort
+               end
+       end
+end
+
+# A simple linked list for use with iterators
+private class RopeIterPiece
+       # The encapsulated node of the `Rope`
+       var node: String
+       # Was its left child (if any) visited ?
+       var ldone: Bool
+       # Was its right child (if any) visited ?
+       var rdone: Bool
+       # The previous node in the list.
+       var prev: nullable RopeIterPiece
+end
+
+# A reverse iterator capable of working with `Rope` objects
+private class RopeReviter
+       super IndexedIterator[Char]
+
+       # Current NativeString
+       var ns: String
+       # Current position in NativeString
+       var pns: Int
+       # Position in the Rope (0-indexed)
+       var pos: Int
+       # Iterator on the substrings, does the Postfix part of
+       # the Rope traversal.
+       var subs: IndexedIterator[String]
+
+       init(root: RopeString) do
+               pos = root.length - 1
+               subs = new ReverseRopeSubstrings(root)
+               ns = subs.item
+               pns = ns.length - 1
+       end
+
+       init from(root: RopeString, pos: Int) do
+               self.pos = pos
+               subs = new ReverseRopeSubstrings.from(root, pos)
+               ns = subs.item
+               pns = pos - subs.index
+       end
+
+       redef fun index do return pos
+
+       redef fun is_ok do return pos >= 0
+
+       redef fun item do return ns[pns]
+
+       redef fun next do
+               pns -= 1
+               pos -= 1
+               if pns >= 0 then return
+               if not subs.is_ok then return
+               subs.next
+               if not subs.is_ok then return
+               ns = subs.item
+               pns = ns.length - 1
+       end
+end
+
+# Forward iterator on the chars of a `Rope`
+private class RopeIter
+       super IndexedIterator[Char]
+
+       # Position in current `String`
+       var pns: Int
+       # Current `String` being iterated on
+       var str: String
+       # Substrings of the Rope
+       var subs: IndexedIterator[String]
+       # Maximum position to iterate on (e.g. Rope.length)
+       var max: Int
+       # Position (char) in the Rope (0-indexed)
+       var pos: Int
+
+       init(root: RopeString) do
+               subs = new RopeSubstrings(root)
+               pns = 0
+               str = subs.item
+               max = root.length - 1
+               pos = 0
+       end
+
+       init from(root: RopeString, pos: Int) do
+               subs = new RopeSubstrings.from(root, pos)
+               pns = pos - subs.index
+               self.pos = pos
+               str = subs.item
+               max = root.length - 1
+       end
+
+       redef fun item do return str[pns]
+
+       redef fun is_ok do return pos <= max
+
+       redef fun index do return pos
+
+       redef fun next do
+               pns += 1
+               pos += 1
+               if pns < subs.item.length then return
+               if not subs.is_ok then return
+               subs.next
+               if not subs.is_ok then return
+               str = subs.item
+               pns = 0
+       end
+end
+
+# Substrings of a Rope (i.e. Reverse postfix iterator on leaves)
+private class ReverseRopeSubstrings
+       super IndexedIterator[String]
+
+       # Visit Stack
+       var iter: RopeIterPiece is noinit
+       # Position in `Rope`
+       var pos: Int is noinit
+
+       # Current leaf
+       var str: String is noinit
+
+       init(root: RopeString) do
+               var r = new RopeIterPiece(root, false, true, null)
+               pos = root.length - 1
+               var lnod: String = root
+               loop
+                       if lnod isa Concat then
+                               lnod = lnod.right
+                               r = new RopeIterPiece(lnod, false, true, r)
+                       else
+                               str = lnod
+                               iter = r
+                               break
+                       end
+               end
+       end
+
+       init from(root: RopeString, pos: Int) do
+               var r = new RopeIterPiece(root, false, true, null)
+               var rnod: String = root
+               var off = pos
+               loop
+                       if rnod isa Concat then
+                               if off >= rnod.left.length then
+                                       off -= rnod.left.length
+                                       rnod = rnod.right
+                                       r = new RopeIterPiece(rnod, false, true, r)
+                               else
+                                       r.ldone = true
+                                       rnod = rnod.left
+                                       r = new RopeIterPiece(rnod, false, true, r)
+                               end
+                       else
+                               str = rnod
+                               r.ldone = true
+                               iter = r
+                               self.pos = pos - off
+                               break
+                       end
+               end
+       end
+
+       redef fun item do return str
+
+       redef fun index do return pos
+
+       redef fun is_ok do return pos >= 0
+
+       redef fun next do
+               if pos < 0 then return
+               var curr: nullable RopeIterPiece = iter.prev
+               var currit = curr.node
+               while curr != null do
+                       currit = curr.node
+                       if not currit isa Concat then
+                               str = currit
+                               pos -= str.length
+                               iter = curr
+                               return
+                       end
+                       if not curr.rdone then
+                               curr.rdone = true
+                               curr = new RopeIterPiece(currit.right, false, false, curr)
+                               continue
+                       end
+                       if not curr.ldone then
+                               curr.ldone = true
+                               curr = new RopeIterPiece(currit.left, false, false, curr)
+                               continue
+                       end
+                       curr = curr.prev
+               end
+               pos = -1
+       end
+end
+
+# Substrings of a Rope (i.e. Postfix iterator on leaves)
+private class RopeSubstrings
+       super IndexedIterator[String]
+
+       # Visit Stack
+       var iter: RopeIterPiece is noinit
+       # Position in `Rope`
+       var pos: Int is noinit
+       # Maximum position in `Rope` (i.e. length - 1)
+       var max: Int is noinit
+
+       # Current leaf
+       var str: String is noinit
+
+       init(root: RopeString) do
+               var r = new RopeIterPiece(root, true, false, null)
+               pos = 0
+               max = root.length - 1
+               var rnod: String = root
+               loop
+                       if rnod isa Concat then
+                               rnod = rnod.left
+                               r = new RopeIterPiece(rnod, true, false, r)
+                       else
+                               str = rnod
+                               r.rdone = true
+                               iter = r
+                               break
+                       end
+               end
+       end
+
+       init from(root: RopeString, pos: Int) do
+               var r = new RopeIterPiece(root, true, false, null)
+               max = root.length - 1
+               var rnod: String = root
+               var off = pos
+               loop
+                       if rnod isa Concat then
+                               if off >= rnod.left.length then
+                                       r.rdone = true
+                                       off -= rnod.left.length
+                                       rnod = rnod.right
+                                       r = new RopeIterPiece(rnod, true, false, r)
+                               else
+                                       rnod = rnod.left
+                                       r = new RopeIterPiece(rnod, true, false, r)
+                               end
+                       else
+                               str = rnod
+                               r.rdone = true
+                               iter = r
+                               self.pos = pos - off
+                               break
+                       end
+               end
+       end
+
+       redef fun item do return str
+
+       redef fun is_ok do return pos <= max
+
+       redef fun index do return pos
+
+       redef fun next do
+               pos += str.length
+               if pos > max then return
+               var it = iter.prev
+               var rnod: String = it.node
+               loop
+                       if not rnod isa Concat then
+                               it.ldone = true
+                               it.rdone = true
+                               str = rnod
+                               iter = it.as(not null)
+                               break
+                       end
+                       if not it.ldone then
+                               rnod = rnod.left
+                               it.ldone = true
+                               it = new RopeIterPiece(rnod, false, false, it)
+                       else if not it.rdone then
+                               it.rdone = true
+                               rnod = rnod.right
+                               it = new RopeIterPiece(rnod, false, false, it)
+                       else
+                               it = it.prev
+                               rnod = it.node
+                               continue
+                       end
+               end
+       end
+end
+
+# Implementation of a `StringCharView` for `RopeString` objects
+private class RopeChars
+       super StringCharView
+
+       var tgt: RopeString
+
+       init(s: RopeString) do tgt = s
+
+       redef fun [](i) do
+               return tgt[i]
+       end
+
+       redef fun iterator_from(i) do return new RopeIter.from(tgt, i)
+
+       redef fun reverse_iterator_from(i) do return new RopeReviter.from(tgt, i)
+
+end