Merge: Added contributing guidelines and link from readme
[nit.git] / lib / core / text / ropes.nit
index c164529..f57b33d 100644 (file)
@@ -78,16 +78,18 @@ private class Concat
 
        redef var bytelen is noinit
 
-       redef fun substrings do return new RopeSubstrings(self)
+       redef fun substrings do return new RopeSubstrings.from(self, 0)
 
        redef fun empty do return ""
 
        # Cache for the latest accessed FlatString in `self`
-       var flat_cache: String = ""
+       var flat_cache: FlatString is noinit
 
        # Position of the beginning of `flat_cache` in `self`
        var flat_last_pos_start: Int = -1
 
+       var flat_last_pos_end: Int = -1
+
        redef var to_cstring is lazy do
                var len = _bytelen
                var ns = new NativeString(len + 1)
@@ -113,12 +115,14 @@ private class Concat
                _bytelen = l.bytelen + r.bytelen
        end
 
+       redef fun is_empty do return _bytelen == 0
+
        redef fun output do
                _left.output
                _right.output
        end
 
-       redef fun iterator do return new RopeCharIterator(self)
+       redef fun iterator do return new RopeCharIterator.from(self, 0)
 
        redef fun *(i) do
                var x: String = self
@@ -127,38 +131,65 @@ private class Concat
        end
 
        redef fun [](i) do
-               if flat_last_pos_start != -1 then
-                       var fsp = i - flat_last_pos_start
-                       if fsp >= 0 and fsp < flat_cache.length then return flat_cache[fsp]
+               assert i >= 0 and i <= _length
+               var flps = _flat_last_pos_start
+               if flps != -1 and i >= flps and i <= _flat_last_pos_end then
+                       return _flat_cache.fetch_char_at(i - flps)
+               end
+               var lf = get_leaf_at(i)
+               return lf.fetch_char_at(i - _flat_last_pos_start)
+       end
+
+       fun get_leaf_at(pos: Int): FlatString do
+               var flps = _flat_last_pos_start
+               if flps != -1 and pos >= flps and pos <= _flat_last_pos_end then
+                       return _flat_cache
                end
                var s: String = self
-               var st = i
+               var st = pos
                loop
                        if s isa FlatString then break
                        s = s.as(Concat)
                        var lft = s._left
                        var llen = lft.length
-                       if i >= llen then
+                       if pos >= llen then
                                s = s._right
-                               i -= llen
+                               pos -= llen
                        else
                                s = lft
                        end
                end
-               flat_last_pos_start = st - i
-               flat_cache = s
-               return s[i]
+               _flat_last_pos_start = st - pos
+               _flat_last_pos_end = st - pos + s.length - 1
+               _flat_cache = s
+               return s
        end
 
-       redef fun substring(from, len) do
+       redef fun substring(from, count) do
+               if from < 0 then
+                       count += from
+                       if count < 0 then return ""
+                       from = 0
+               end
+
+               var ln = _length
+               if (count + from) > ln then count = ln - from
+               if count <= 0 then return ""
+               var end_index = from + count - 1
+
+               var flps = _flat_last_pos_start
+               if flps != -1 and from >= flps and end_index <= _flat_last_pos_end then
+                       return _flat_cache.substring_impl(from - flps, count, end_index - flps)
+               end
+
                var lft = _left
                var llen = lft.length
                if from < llen then
-                       if from + len < llen then return lft.substring(from,len)
+                       if from + count < llen then return lft.substring(from, count)
                        var lsublen = llen - from
-                       return lft.substring_from(from) + _right.substring(0, len - lsublen)
+                       return lft.substring_from(from) + _right.substring(0, count - lsublen)
                else
-                       return _right.substring(from - llen, len)
+                       return _right.substring(from - llen, count)
                end
        end
 
@@ -190,23 +221,16 @@ private class Concat
        end
 
        redef fun copy_to_native(dest, n, src_offset, dest_offset) do
-               var subs = new RopeSubstrings.from(self, src_offset)
-               var st = src_offset - subs.pos
-               var off = dest_offset
-               while n > 0 do
-                       var it = subs.item
-                       if n > it.length then
-                               var cplen = it.length - st
-                               it._items.copy_to(dest, cplen, st, off)
-                               off += cplen
-                               n -= cplen
-                       else
-                               it._items.copy_to(dest, n, st, off)
-                               n = 0
-                       end
-                       subs.next
-                       st = 0
+               var l = _left
+               if src_offset < l.bytelen then
+                       var lcopy = l.bytelen - src_offset
+                       lcopy = if lcopy > n then n else lcopy
+                       l.copy_to_native(dest, lcopy, src_offset, dest_offset)
+                       dest_offset += lcopy
+                       n -= lcopy
+                       src_offset = 0
                end
+               _right.copy_to_native(dest, n, src_offset, dest_offset)
        end
 
        # Returns a balanced version of `self`
@@ -315,7 +339,7 @@ class RopeBuffer
        # mutable native string (`ns`)
        private var buf_size: Int is noinit
 
-       redef fun substrings do return new RopeBufSubstringIterator(self)
+       redef fun substrings do return new RopeBufSubstringIterator.from(self)
 
        # Builds an empty `RopeBuffer`
        init do
@@ -425,12 +449,12 @@ class RopeBuffer
                                var rem = count - subpos
                                var nns = new NativeString(rem)
                                ns.copy_to(nns, rem, dumped, 0)
-                               return new RopeBuffer.from(l + nns.to_s_with_length(rem))
+                               return new RopeBuffer.from(l + nns.to_s_unsafe(rem))
                        end
                else
                        var nns = new NativeString(count)
                        ns.copy_to(nns, count, dumped, 0)
-                       return new RopeBuffer.from(nns.to_s_with_length(count))
+                       return new RopeBuffer.from(nns.to_s_unsafe(count))
                end
        end
 
@@ -463,14 +487,15 @@ class RopeBuffer
 
        redef fun add(c) do
                var rp = rpos
-               if rp >= buf_size then
+               var remsp = buf_size - rp
+               var cln = c.u8char_len
+               if cln > remsp then
                        dump_buffer
                        rp = 0
                end
-               # TODO: Fix when supporting UTF-8
-               ns[rp] = c.ascii
-               rp += 1
-               _bytelen += 1
+               ns.set_char_at(rp, c)
+               rp += cln
+               _bytelen += cln
                rpos = rp
        end
 
@@ -478,7 +503,7 @@ class RopeBuffer
        # the final String and re-allocates a new larger Buffer.
        private fun dump_buffer do
                written = false
-               var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
+               var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped)
                str += nstr
                var bs = buf_size
                bs = bs * 2
@@ -491,14 +516,14 @@ class RopeBuffer
        # Similar to dump_buffer, but does not reallocate a new NativeString
        private fun persist_buffer do
                if rpos == dumped then return
-               var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
+               var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped)
                str += nstr
                dumped = rpos
        end
 
        redef fun output do
                str.output
-               new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1).output
+               new FlatString.with_infos(ns, rpos - dumped, dumped).output
        end
 
        # Enlarge is useless here since the `Buffer`
@@ -520,7 +545,7 @@ class RopeBuffer
        redef fun reverse do
                # Flush the buffer in order to only have to reverse `str`.
                if rpos > 0 and dumped != rpos then
-                       str += new FlatString.with_infos(ns, rpos - dumped, dumped, rpos - 1)
+                       str += new FlatString.with_infos(ns, rpos - dumped, dumped)
                        dumped = rpos
                end
                str = str.reversed
@@ -563,7 +588,7 @@ redef class FlatString
                        var ns = new NativeString(nlen + 1)
                        mits.copy_to(ns, mlen, mifrom, 0)
                        sits.copy_to(ns, slen, sifrom, mlen)
-                       return new FlatString.full(ns, nlen, 0, nlen - 1, length + s.length)
+                       return new FlatString.full(ns, nlen, 0, length + s.length)
                else if s isa Concat then
                        var sl = s._left
                        var sllen = sl.bytelen
@@ -592,22 +617,14 @@ private class RopeByteReverseIterator
        super IndexedIterator[Byte]
 
        # Current NativeString
-       var ns: NativeString
+       var ns: NativeString is noautoinit
        # Current position in NativeString
-       var pns: Int
+       var pns: Int is noautoinit
        # Position in the Rope (0-indexed)
-       var pos: Int
+       var pos: Int is noautoinit
        # Iterator on the substrings, does the Postfix part of
        # the Rope traversal.
-       var subs: IndexedIterator[FlatString]
-
-       init(root: Concat) is old_style_init do
-               pos = root._bytelen - 1
-               subs = new ReverseRopeSubstrings(root)
-               var s = subs.item
-               ns = s._items
-               pns = s._last_byte
-       end
+       var subs: IndexedIterator[FlatString] is noautoinit
 
        init from(root: Concat, pos: Int) do
                self.pos = pos
@@ -632,7 +649,7 @@ private class RopeByteReverseIterator
                if not subs.is_ok then return
                var s = subs.item
                ns = s._items
-               pns = s._last_byte
+               pns = s.last_byte
        end
 end
 
@@ -641,30 +658,22 @@ private class RopeByteIterator
        super IndexedIterator[Byte]
 
        # Position in current `String`
-       var pns: Int
+       var pns: Int is noautoinit
        # Current `String` being iterated on
-       var ns: NativeString
+       var ns: NativeString is noautoinit
        # Substrings of the Rope
-       var subs: IndexedIterator[FlatString]
-       # Maximum position to iterate on (e.g. Rope.length)
-       var max: Int
+       var subs: IndexedIterator[FlatString] is noautoinit
+       # Maximum position to iterate on (e.g. Rope.bytelen)
+       var max: Int is noautoinit
        # Position (char) in the Rope (0-indexed)
-       var pos: Int
-
-       init(root: Concat) is old_style_init do
-               subs = new RopeSubstrings(root)
-               pns = 0
-               ns = subs.item._items
-               max = root.length - 1
-               pos = 0
-       end
+       var pos: Int is noautoinit
 
        init from(root: Concat, pos: Int) do
                subs = new RopeSubstrings.from(root, pos)
                pns = pos - subs.index
                self.pos = pos
                ns = subs.item._items
-               max = root.length - 1
+               max = root.bytelen - 1
        end
 
        redef fun item do return ns[pns]
@@ -691,21 +700,14 @@ private class RopeCharReverseIterator
        super IndexedIterator[Char]
 
        # Current NativeString
-       var ns: String
+       var ns: String is noautoinit
        # Current position in NativeString
-       var pns: Int
+       var pns: Int is noautoinit
        # Position in the Rope (0-indexed)
-       var pos: Int
+       var pos: Int is noautoinit
        # Iterator on the substrings, does the Postfix part of
        # the Rope traversal.
-       var subs: IndexedIterator[String]
-
-       init(root: Concat) is old_style_init do
-               pos = root.length - 1
-               subs = new ReverseRopeSubstrings(root)
-               ns = subs.item
-               pns = ns.length - 1
-       end
+       var subs: IndexedIterator[String] is noautoinit
 
        init from(root: Concat, pos: Int) do
                self.pos = pos
@@ -737,23 +739,15 @@ private class RopeCharIterator
        super IndexedIterator[Char]
 
        # Position in current `String`
-       var pns: Int
+       var pns: Int is noautoinit
        # Current `String` being iterated on
-       var str: String
+       var str: String is noautoinit
        # Substrings of the Rope
-       var subs: IndexedIterator[String]
+       var subs: IndexedIterator[String] is noautoinit
        # Maximum position to iterate on (e.g. Rope.length)
-       var max: Int
+       var max: Int is noautoinit
        # Position (char) in the Rope (0-indexed)
-       var pos: Int
-
-       init(root: Concat) is old_style_init do
-               subs = new RopeSubstrings(root)
-               pns = 0
-               str = subs.item
-               max = root.length - 1
-               pos = 0
-       end
+       var pos: Int is noautoinit
 
        init from(root: Concat, pos: Int) do
                subs = new RopeSubstrings.from(root, pos)
@@ -793,22 +787,6 @@ private class ReverseRopeSubstrings
        # Current leaf
        var str: FlatString is noinit
 
-       init(root: Concat) is old_style_init do
-               var r = new RopeCharIteratorPiece(root, false, true, null)
-               pos = root.length - 1
-               var lnod: String = root
-               loop
-                       if lnod isa Concat then
-                               lnod = lnod._right
-                               r = new RopeCharIteratorPiece(lnod, false, true, r)
-                       else
-                               str = lnod.as(FlatString)
-                               iter = r
-                               break
-                       end
-               end
-       end
-
        init from(root: Concat, pos: Int) do
                var r = new RopeCharIteratorPiece(root, false, true, null)
                var rnod: String = root
@@ -843,7 +821,7 @@ private class ReverseRopeSubstrings
        redef fun next do
                if pos < 0 then return
                var curr = iter.prev
-               var currit = curr.node
+               var currit = curr.as(not null).node
                while curr != null do
                        currit = curr.node
                        if not currit isa Concat then
@@ -872,15 +850,15 @@ private class RopeBufSubstringIterator
        super Iterator[FlatText]
 
        # Iterator on the substrings of the building string
-       var iter: Iterator[FlatText]
+       var iter: Iterator[FlatText] is noautoinit
        # Makes a String out of the buffered part of the Ropebuffer
-       var nsstr: FlatString
+       var nsstr: FlatString is noautoinit
        # Did we attain the buffered part ?
        var nsstr_done = false
 
-       init(str: RopeBuffer) is old_style_init do
+       init from(str: RopeBuffer) do
                iter = str.str.substrings
-               nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped, str.rpos - 1)
+               nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped)
                if str.length == 0 then nsstr_done = true
        end
 
@@ -915,24 +893,6 @@ private class RopeSubstrings
        # Current leaf
        var str: FlatString is noinit
 
-       init(root: Concat) is old_style_init do
-               var r = new RopeCharIteratorPiece(root, true, false, null)
-               pos = 0
-               max = root.length - 1
-               var rnod: String = root
-               loop
-                       if rnod isa Concat then
-                               rnod = rnod._left
-                               r = new RopeCharIteratorPiece(rnod, true, false, r)
-                       else
-                               str = rnod.as(FlatString)
-                               r.rdone = true
-                               iter = r
-                               break
-                       end
-               end
-       end
-
        init from(root: Concat, pos: Int) do
                var r = new RopeCharIteratorPiece(root, true, false, null)
                max = root.length - 1
@@ -968,14 +928,14 @@ private class RopeSubstrings
        redef fun next do
                pos += str.length
                if pos > max then return
-               var it = iter.prev
+               var it = iter.prev.as(not null)
                var rnod = it.node
                loop
                        if not rnod isa Concat then
                                it.ldone = true
                                it.rdone = true
                                str = rnod.as(FlatString)
-                               iter = it.as(not null)
+                               iter = it
                                break
                        end
                        if not it.ldone then
@@ -987,7 +947,7 @@ private class RopeSubstrings
                                rnod = rnod._right
                                it = new RopeCharIteratorPiece(rnod, false, false, it)
                        else
-                               it = it.prev
+                               it = it.prev.as(not null)
                                rnod = it.node
                                continue
                        end
@@ -1017,18 +977,45 @@ private class RopeBytes
 
        redef type SELFTYPE: Concat
 
+       var cache: FlatString is noinit
+
+       var cache_start: Int = -1
+
+       var cache_end: Int = -1
+
        redef fun [](i) do
-               var nod: String = target
+               assert i >= 0 and i < target._bytelen
+               var flps = _cache_start
+               if i >= flps and i <= _cache_end then
+                       return _cache.bytes[i - flps]
+               end
+               var lf = get_leaf_at(i)
+               return lf.bytes[i - _cache_start]
+       end
+
+       fun get_leaf_at(pos: Int): FlatString do
+               var flps = _cache_start
+               if pos >= flps and pos <= _cache_end then
+                       return _cache
+               end
+               var s: String = target
+               var st = pos
                loop
-                       if nod isa FlatString then return nod._items[i]
-                       if not nod isa Concat then abort
-                       var lft = nod._left
-                       if lft.bytelen >= i then
-                               nod = nod._right
+                       if s isa FlatString then break
+                       s = s.as(Concat)
+                       var lft = s._left
+                       var llen = lft.bytelen
+                       if pos >= llen then
+                               s = s._right
+                               pos -= llen
                        else
-                               nod = lft
+                               s = lft
                        end
                end
+               _cache_start = st - pos
+               _cache_end = st - pos + s.bytelen - 1
+               _cache = s
+               return s
        end
 
        redef fun iterator_from(i) do return new RopeByteIterator.from(target, i)
@@ -1042,16 +1029,10 @@ class RopeBufferCharIterator
        super IndexedIterator[Char]
 
        # Subiterator.
-       var sit: IndexedIterator[Char]
+       var sit: IndexedIterator[Char] is noautoinit
 
        redef fun index do return sit.index
 
-       # Init the iterator from a RopeBuffer.
-       init(t: RopeBuffer) is old_style_init do
-               t.persist_buffer
-               sit = t.str.chars.iterator
-       end
-
        # Init the iterator from a RopeBuffer starting from `pos`.
        init from(t: RopeBuffer, pos: Int) do
                t.persist_buffer
@@ -1073,16 +1054,10 @@ class RopeBufferCharReverseIterator
        super IndexedIterator[Char]
 
        # Subiterator.
-       var sit: IndexedIterator[Char]
+       var sit: IndexedIterator[Char] is noautoinit
 
        redef fun index do return sit.index
 
-       # Init the iterator from a RopeBuffer.
-       init(tgt: RopeBuffer) is old_style_init do
-               tgt.persist_buffer
-               sit = tgt.str.chars.reverse_iterator
-       end
-
        # Init the iterator from a RopeBuffer starting from `pos`.
        init from(tgt: RopeBuffer, pos: Int) do
                tgt.persist_buffer
@@ -1123,27 +1098,18 @@ class RopeBufferByteIterator
        super IndexedIterator[Byte]
 
        # Subiterator.
-       var sit: IndexedIterator[Byte]
+       var sit: IndexedIterator[Byte] is noautoinit
 
        # Native string iterated over.
-       var ns: NativeString
+       var ns: NativeString is noautoinit
 
        # Current position in `ns`.
-       var pns: Int
+       var pns: Int is noautoinit
 
        # Maximum position iterable.
-       var maxpos: Int
+       var maxpos: Int is noautoinit
 
-       redef var index
-
-       # Init the iterator from a RopeBuffer.
-       init(t: RopeBuffer) is old_style_init do
-               ns = t.ns
-               maxpos = t._bytelen
-               sit = t.str.bytes.iterator
-               pns = t.dumped
-               index = 0
-       end
+       redef var index is noautoinit
 
        # Init the iterator from a RopeBuffer starting from `pos`.
        init from(t: RopeBuffer, pos: Int) do
@@ -1176,23 +1142,15 @@ class RopeBufferByteReverseIterator
        super IndexedIterator[Byte]
 
        # Subiterator.
-       var sit: IndexedIterator[Byte]
+       var sit: IndexedIterator[Byte] is noautoinit
 
        # Native string iterated over.
-       var ns: NativeString
+       var ns: NativeString is noautoinit
 
        # Current position in `ns`.
-       var pns: Int
-
-       redef var index
+       var pns: Int is noautoinit
 
-       # Init the iterator from a RopeBuffer.
-       init(tgt: RopeBuffer) is old_style_init do
-               sit = tgt.str.bytes.reverse_iterator
-               pns = tgt.rpos - 1
-               index = tgt._bytelen - 1
-               ns = tgt.ns
-       end
+       redef var index is noautoinit
 
        # Init the iterator from a RopeBuffer starting from `pos`.
        init from(tgt: RopeBuffer, pos: Int) do
@@ -1211,7 +1169,7 @@ class RopeBufferByteReverseIterator
 
        redef fun next do
                index -= 1
-               if pns > 0 then
+               if pns >= 0 then
                        pns -= 1
                else
                        sit.next