From b8f1ad7f2d2bde470f07e3469cbb6f5bd6b0447b Mon Sep 17 00:00:00 2001 From: Lucas Bajolet Date: Tue, 21 Oct 2014 10:25:41 -0400 Subject: [PATCH] lib/standard/ropes: Rewrote ropes base block. Signed-off-by: Lucas Bajolet --- lib/standard/ropes.nit | 501 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 501 insertions(+) create mode 100644 lib/standard/ropes.nit diff --git a/lib/standard/ropes.nit b/lib/standard/ropes.nit new file mode 100644 index 0000000..a9f7616 --- /dev/null +++ b/lib/standard/ropes.nit @@ -0,0 +1,501 @@ +# This file is part of NIT (http://www.nitlanguage.org). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Tree-based representation of a String. +# +# Ropes are a data structure introduced in a 1995 paper from +# Hans J. Boehm, Russ Atkinson and Michael Plass. +# See : `Ropes : an Alternative to Strings`, `Software - Practice and Experience, +# Vol. 25(12), 1315-1330 (December 1995)`. +# +# The implementation developed here provides an automatic change +# of data structure depending on the length of the leaves. +# +# The length from which a `Rope` is built from a `flat` string +# if defined at top-level (see `maxlen`) and can be redefined by clients +# depending on their needs (e.g. if you need to bench the speed of +# the creation of concat nodes, lower the size of maxlen). +# +# A Rope as defined in the original paper is a Tree made of concatenation +# nodes and containing `Flat` (that is Array-based) strings as Leaves. +# +# Example : +# +# ` Concat ` +# ` / \ ` +# ` Concat Concat ` +# ` / \ / \ ` +# `"My" " Name" " is" " Simon." ` +# +# Note that the above example is not representative of the actual implementation +# of `Ropes`, since short leaves are merged to keep the rope at an acceptable +# height (hence, this rope here might actually be a `FlatString` !). +module ropes + +intrude import string + +# Maxlen is the maximum length of a Leaf node +# +# When concatenating two leaves, if `new_length` > `maxlen`, +# A `Concat` node is created instead +# +# Its purpose is to limit the depth of the `Rope` (this +# improves performance when accessing/iterating). +fun maxlen: Int do return 64 + +# String using a tree-based representation with leaves as `FlatStrings` +private abstract class Rope + super Text +end + +private abstract class RopeString + super Rope + super String + + redef fun chars is cached do return new RopeChars(self) +end + +# Node that represents a concatenation between two `String` +private class Concat + super RopeString + + redef var length: Int + + redef fun substrings do return new RopeSubstrings(self) + + redef fun empty do return "" + + redef fun to_cstring is cached do + var len = length + var ns = new NativeString(len + 1) + ns[len] = '\0' + var off = 0 + for i in substrings do + var ilen = i.length + i.as(FlatString).items.copy_to(ns, ilen, i.as(FlatString).index_from, off) + off += ilen + end + return ns + end + + # Left child of the node + var left: String + # Right child of the node + var right: String + + init(l: String, r: String) do + left = l + right = r + length = l.length + r.length + end + + redef fun output do + left.output + right.output + end + + redef fun iterator do return new RopeIter(self) + + redef fun *(i) do + var x: String = self + for j in [1 .. i[ do x += self + return x + end + + redef fun [](i) do + var llen = left.length + if i >= llen then return right[i - llen] + return left[i] + end + + redef fun substring(from, len) do + var llen = left.length + if from < llen then + if from + len < llen then return left.substring(from,len) + var lsublen = llen - from + return left.substring_from(from) + right.substring(0, len - lsublen) + else + return right.substring(from - llen, len) + end + end + + redef fun reversed do return new Concat(right.reversed, left.reversed) + + redef fun insert_at(s, pos) do + if pos > left.length then + return left + right.insert_at(s, pos - left.length) + end + return left.insert_at(s, pos) + right + end + + redef fun to_upper do return new Concat(left.to_upper, right.to_upper) + + redef fun to_lower do return new Concat(left.to_lower, right.to_lower) + + redef fun +(o) do + var s = o.to_s + var mlen = length + var slen = s.length + if s isa Concat then + return new Concat(self, s) + else + var r = right + var rlen = r.length + if rlen + slen > maxlen then return new Concat(left, new Concat(r, s)) + return new Concat(left, r + s) + end + end +end + +redef class FlatString + + redef fun insert_at(s, pos) do + var l = substring(0, pos) + var r = substring_from(pos) + return l + s + r + end + + redef fun +(o) do + var s = o.to_s + var slen = s.length + var mlen = length + if slen == 0 then return self + if mlen == 0 then return s + var nlen = slen + mlen + if s isa FlatString then + if nlen > maxlen then return new Concat(self, s) + var mits = items + var sifrom = s.index_from + var mifrom = index_from + var sits = s.items + var ns = new NativeString(nlen + 1) + mits.copy_to(ns, mlen, mifrom, 0) + sits.copy_to(ns, slen, sifrom, mlen) + return ns.to_s_with_length(nlen) + else if s isa Concat then + var sl = s.left + var sllen = sl.length + if sllen + mlen > maxlen then return new Concat(self, s) + return new Concat(self + sl, s.right) + else + abort + end + end +end + +# A simple linked list for use with iterators +private class RopeIterPiece + # The encapsulated node of the `Rope` + var node: String + # Was its left child (if any) visited ? + var ldone: Bool + # Was its right child (if any) visited ? + var rdone: Bool + # The previous node in the list. + var prev: nullable RopeIterPiece +end + +# A reverse iterator capable of working with `Rope` objects +private class RopeReviter + super IndexedIterator[Char] + + # Current NativeString + var ns: String + # Current position in NativeString + var pns: Int + # Position in the Rope (0-indexed) + var pos: Int + # Iterator on the substrings, does the Postfix part of + # the Rope traversal. + var subs: IndexedIterator[String] + + init(root: RopeString) do + pos = root.length - 1 + subs = new ReverseRopeSubstrings(root) + ns = subs.item + pns = ns.length - 1 + end + + init from(root: RopeString, pos: Int) do + self.pos = pos + subs = new ReverseRopeSubstrings.from(root, pos) + ns = subs.item + pns = pos - subs.index + end + + redef fun index do return pos + + redef fun is_ok do return pos >= 0 + + redef fun item do return ns[pns] + + redef fun next do + pns -= 1 + pos -= 1 + if pns >= 0 then return + if not subs.is_ok then return + subs.next + if not subs.is_ok then return + ns = subs.item + pns = ns.length - 1 + end +end + +# Forward iterator on the chars of a `Rope` +private class RopeIter + super IndexedIterator[Char] + + # Position in current `String` + var pns: Int + # Current `String` being iterated on + var str: String + # Substrings of the Rope + var subs: IndexedIterator[String] + # Maximum position to iterate on (e.g. Rope.length) + var max: Int + # Position (char) in the Rope (0-indexed) + var pos: Int + + init(root: RopeString) do + subs = new RopeSubstrings(root) + pns = 0 + str = subs.item + max = root.length - 1 + pos = 0 + end + + init from(root: RopeString, pos: Int) do + subs = new RopeSubstrings.from(root, pos) + pns = pos - subs.index + self.pos = pos + str = subs.item + max = root.length - 1 + end + + redef fun item do return str[pns] + + redef fun is_ok do return pos <= max + + redef fun index do return pos + + redef fun next do + pns += 1 + pos += 1 + if pns < subs.item.length then return + if not subs.is_ok then return + subs.next + if not subs.is_ok then return + str = subs.item + pns = 0 + end +end + +# Substrings of a Rope (i.e. Reverse postfix iterator on leaves) +private class ReverseRopeSubstrings + super IndexedIterator[String] + + # Visit Stack + var iter: RopeIterPiece is noinit + # Position in `Rope` + var pos: Int is noinit + + # Current leaf + var str: String is noinit + + init(root: RopeString) do + var r = new RopeIterPiece(root, false, true, null) + pos = root.length - 1 + var lnod: String = root + loop + if lnod isa Concat then + lnod = lnod.right + r = new RopeIterPiece(lnod, false, true, r) + else + str = lnod + iter = r + break + end + end + end + + init from(root: RopeString, pos: Int) do + var r = new RopeIterPiece(root, false, true, null) + var rnod: String = root + var off = pos + loop + if rnod isa Concat then + if off >= rnod.left.length then + off -= rnod.left.length + rnod = rnod.right + r = new RopeIterPiece(rnod, false, true, r) + else + r.ldone = true + rnod = rnod.left + r = new RopeIterPiece(rnod, false, true, r) + end + else + str = rnod + r.ldone = true + iter = r + self.pos = pos - off + break + end + end + end + + redef fun item do return str + + redef fun index do return pos + + redef fun is_ok do return pos >= 0 + + redef fun next do + if pos < 0 then return + var curr: nullable RopeIterPiece = iter.prev + var currit = curr.node + while curr != null do + currit = curr.node + if not currit isa Concat then + str = currit + pos -= str.length + iter = curr + return + end + if not curr.rdone then + curr.rdone = true + curr = new RopeIterPiece(currit.right, false, false, curr) + continue + end + if not curr.ldone then + curr.ldone = true + curr = new RopeIterPiece(currit.left, false, false, curr) + continue + end + curr = curr.prev + end + pos = -1 + end +end + +# Substrings of a Rope (i.e. Postfix iterator on leaves) +private class RopeSubstrings + super IndexedIterator[String] + + # Visit Stack + var iter: RopeIterPiece is noinit + # Position in `Rope` + var pos: Int is noinit + # Maximum position in `Rope` (i.e. length - 1) + var max: Int is noinit + + # Current leaf + var str: String is noinit + + init(root: RopeString) do + var r = new RopeIterPiece(root, true, false, null) + pos = 0 + max = root.length - 1 + var rnod: String = root + loop + if rnod isa Concat then + rnod = rnod.left + r = new RopeIterPiece(rnod, true, false, r) + else + str = rnod + r.rdone = true + iter = r + break + end + end + end + + init from(root: RopeString, pos: Int) do + var r = new RopeIterPiece(root, true, false, null) + max = root.length - 1 + var rnod: String = root + var off = pos + loop + if rnod isa Concat then + if off >= rnod.left.length then + r.rdone = true + off -= rnod.left.length + rnod = rnod.right + r = new RopeIterPiece(rnod, true, false, r) + else + rnod = rnod.left + r = new RopeIterPiece(rnod, true, false, r) + end + else + str = rnod + r.rdone = true + iter = r + self.pos = pos - off + break + end + end + end + + redef fun item do return str + + redef fun is_ok do return pos <= max + + redef fun index do return pos + + redef fun next do + pos += str.length + if pos > max then return + var it = iter.prev + var rnod: String = it.node + loop + if not rnod isa Concat then + it.ldone = true + it.rdone = true + str = rnod + iter = it.as(not null) + break + end + if not it.ldone then + rnod = rnod.left + it.ldone = true + it = new RopeIterPiece(rnod, false, false, it) + else if not it.rdone then + it.rdone = true + rnod = rnod.right + it = new RopeIterPiece(rnod, false, false, it) + else + it = it.prev + rnod = it.node + continue + end + end + end +end + +# Implementation of a `StringCharView` for `RopeString` objects +private class RopeChars + super StringCharView + + var tgt: RopeString + + init(s: RopeString) do tgt = s + + redef fun [](i) do + return tgt[i] + end + + redef fun iterator_from(i) do return new RopeIter.from(tgt, i) + + redef fun reverse_iterator_from(i) do return new RopeReviter.from(tgt, i) + +end -- 1.7.9.5