X-Git-Url: http://nitlanguage.org diff --git a/lib/core/text/ropes.nit b/lib/core/text/ropes.nit index 0bf97b8..ca846fd 100644 --- a/lib/core/text/ropes.nit +++ b/lib/core/text/ropes.nit @@ -58,7 +58,7 @@ intrude import flat # # Its purpose is to limit the depth of the `Rope` (this # improves performance when accessing/iterating). -fun maxlen: Int do return 64 +fun maxlen: Int do return 512 # String using a tree-based representation with leaves as `FlatStrings` private abstract class Rope @@ -70,13 +70,13 @@ private class Concat super Rope super String - redef var chars is lazy do return new RopeChars(self) + redef fun chars do return new RopeChars(self) - redef var bytes is lazy do return new RopeBytes(self) + redef fun bytes do return new RopeBytes(self) redef var length is noinit - redef var bytelen is noinit + redef var byte_length is noinit redef fun substrings do return new RopeSubstrings.from(self, 0) @@ -86,17 +86,15 @@ private class Concat var flat_cache: FlatString is noinit # Position of the beginning of `flat_cache` in `self` - var flat_last_pos_start: Int = -1 + var flat_last_pos_start: Int is noinit - var flat_last_pos_end: Int = -1 - - redef var to_cstring is lazy do - var len = _bytelen - var ns = new NativeString(len + 1) + redef fun to_cstring do + var len = _byte_length + var ns = new CString(len + 1) ns[len] = 0u8 var off = 0 for i in substrings do - var ilen = i._bytelen + var ilen = i._byte_length i.as(FlatString)._items.copy_to(ns, ilen, i.as(FlatString)._first_byte, off) off += ilen end @@ -111,11 +109,12 @@ private class Concat init do var l = _left var r = _right - length = l.length + r.length - _bytelen = l.bytelen + r.bytelen + _length = l.length + r.length + _byte_length = l.byte_length + r.byte_length + _flat_last_pos_start = _length end - redef fun is_empty do return _bytelen == 0 + redef fun is_empty do return _byte_length == 0 redef fun output do _left.output @@ -131,10 +130,11 @@ private class Concat end redef fun [](i) do - assert i >= 0 and i <= _length + assert i >= 0 and i < _length var flps = _flat_last_pos_start - if flps != -1 and i >= flps and i <= _flat_last_pos_end then - return _flat_cache.fetch_char_at(i - flps) + if i >= flps then + var fc = _flat_cache + if i < flps + fc._length then return fc.fetch_char_at(i - flps) end var lf = get_leaf_at(i) return lf.fetch_char_at(i - _flat_last_pos_start) @@ -142,8 +142,9 @@ private class Concat fun get_leaf_at(pos: Int): FlatString do var flps = _flat_last_pos_start - if flps != -1 and pos >= flps and pos <= _flat_last_pos_end then - return _flat_cache + if pos >= flps then + var fc = _flat_cache + if pos < flps + fc._length then return fc end var s: String = self var st = pos @@ -160,7 +161,6 @@ private class Concat end end _flat_last_pos_start = st - pos - _flat_last_pos_end = st - pos + s.length - 1 _flat_cache = s return s end @@ -178,8 +178,11 @@ private class Concat var end_index = from + count - 1 var flps = _flat_last_pos_start - if flps != -1 and from >= flps and end_index <= _flat_last_pos_end then - return _flat_cache.substring_impl(from - flps, count, end_index - flps) + if from >= flps then + var fc = _flat_cache + if end_index < flps + fc._length then + return fc.substring_impl(from - flps, count, end_index - flps) + end end var lft = _left @@ -209,12 +212,12 @@ private class Concat redef fun +(o) do var s = o.to_s - var slen = s.bytelen + var slen = s.byte_length if s isa Concat then return new Concat(self, s) else var r = _right - var rlen = r.bytelen + var rlen = r.byte_length if rlen + slen > maxlen then return new Concat(self, s) return new Concat(_left, r + s) end @@ -222,8 +225,8 @@ private class Concat redef fun copy_to_native(dest, n, src_offset, dest_offset) do var l = _left - if src_offset < l.bytelen then - var lcopy = l.bytelen - src_offset + if src_offset < l.byte_length then + var lcopy = l.byte_length - src_offset lcopy = if lcopy > n then n else lcopy l.copy_to_native(dest, lcopy, src_offset, dest_offset) dest_offset += lcopy @@ -278,292 +281,6 @@ private class Concat end end -# Mutable `Rope`, optimized for concatenation operations -# -# A `RopeBuffer` is an efficient way of building a `String` when -# concatenating small strings. -# -# It does concatenations in an optimized way by having a -# mutable part and an immutable part built by efficiently -# concatenating strings in chain. -# -# Every concatenation operation is done by copying a string to -# the mutable part and flushing it when full. -# -# However, when a long string is appended to the `Buffer`, -# the concatenation is done at it would be in a `Rope`. -class RopeBuffer - super Rope - super Buffer - - redef var chars: Sequence[Char] is lazy do return new RopeBufferChars(self) - - redef var bytes is lazy do return new RopeBufferBytes(self) - - # The final string being built on the fly - private var str: String = "" - - # Current concatenation buffer - private var ns: NativeString is noinit - - # Next available (e.g. unset) character in the `Buffer` - private var rpos = 0 - - # Length (in chars) of the buffered part - private var nslen = 0 - - # Keeps track of the buffer's currently dumped part - # - # This might happen if for instance, a String was being - # built by concatenating small parts of string and suddenly - # a long string (length > maxlen) is appended. - private var dumped: Int is noinit - - # Length of the complete rope in chars (0) - redef fun length do - var st = dumped - var len = str.length - while st < rpos do - st += ns[st].u8len - len += 1 - end - return len - end - - # Length of the complete rope in bytes - redef var bytelen = 0 - - # Length of the mutable part (in bytes) - # - # Is also used as base to compute the size of the next - # mutable native string (`ns`) - private var buf_size: Int is noinit - - redef fun substrings do return new RopeBufSubstringIterator.from(self) - - # Builds an empty `RopeBuffer` - init do - ns = new NativeString(maxlen) - buf_size = maxlen - dumped = 0 - end - - # Builds a new `RopeBuffer` with `str` in it. - init from(str: String) do - self.str = str - ns = new NativeString(maxlen) - buf_size = maxlen - _bytelen = str.length - dumped = 0 - end - - # Resets the informations of the `Buffer` - # - # This is called when doing in-place modifications - # on a previously to_s'd `RopeBuffer` - private fun reset do - var nns = new NativeString(buf_size) - var blen = rpos - dumped - ns.copy_to(nns, blen, dumped, 0) - ns = nns - dumped = 0 - rpos = blen - written = false - end - - redef fun [](i) do - if i < str.length then - return str[i] - else - var index = ns.char_to_byte_index_cached(i - str.length, 0, dumped) - return ns.char_at(index) - end - end - - redef fun []=(i, c) do - assert i >= 0 and i <= length - if i == length then add c - if i < str.length then - _bytelen += c.u8char_len - str[i].u8char_len - var s = str - var l = s.substring(0, i) - var r = s.substring_from(i + 1) - str = l + c.to_s + r - else - var reali = i - str.length - var index = ns.char_to_byte_index_cached(reali, 0, dumped) - var st_nxt = ns.char_to_byte_index_cached(reali + 1, reali, index) - var loc_c = ns.char_at(index) - if loc_c.u8char_len != c.u8char_len then - var delta = c.u8char_len - loc_c.u8char_len - var remsp = buf_size - rpos - if remsp < delta then - buf_size *= 2 - var nns = new NativeString(buf_size) - ns.copy_to(nns, index - dumped, dumped, 0) - ns.copy_to(nns, rpos - index - loc_c.u8char_len, index + loc_c.u8char_len, index - dumped + delta) - ns = nns - index = index - dumped - else - ns.copy_to(ns, rpos - st_nxt, st_nxt, st_nxt + delta) - end - _bytelen += delta - rpos += delta - end - ns.set_char_at(index, c) - end - end - - redef fun empty do return new RopeBuffer - - redef fun clear do - str = "" - _bytelen = 0 - rpos = 0 - dumped = 0 - if written then - ns = new NativeString(buf_size) - written = false - end - end - - redef fun substring(from, count) do - var strlen = str.length - - if from < 0 then - count += from - if count < 0 then count = 0 - from = 0 - end - - if count > length then count = length - from - - if count == 0 then return empty - - if from < strlen then - var subpos = strlen - from - if count <= subpos then - return new RopeBuffer.from(str.substring(from, count)) - else - var l = str.substring_from(from) - var rem = count - subpos - var nns = new NativeString(rem) - ns.copy_to(nns, rem, dumped, 0) - return new RopeBuffer.from(l + nns.to_s_unsafe(rem)) - end - else - var nns = new NativeString(count) - ns.copy_to(nns, count, dumped, 0) - return new RopeBuffer.from(nns.to_s_unsafe(count)) - end - end - - redef fun append(s) do - var slen = s.bytelen - if slen >= maxlen then - persist_buffer - str += s.to_s - return - end - if s isa FlatText then - var oits = s._items - var from = s.first_byte - var remsp = buf_size - rpos - if slen <= remsp then - oits.copy_to(ns, slen, from, rpos) - rpos += slen - return - end - var brk = oits.find_beginning_of_char_at(from + remsp) - oits.copy_to(ns, brk, from, rpos) - rpos += brk - dump_buffer - oits.copy_to(ns, slen - remsp, brk, 0) - rpos = slen - remsp - else - for i in s.substrings do append i - end - end - - redef fun add(c) do - var rp = rpos - var remsp = buf_size - rp - var cln = c.u8char_len - if cln > remsp then - dump_buffer - rp = 0 - end - ns.set_char_at(rp, c) - rp += cln - _bytelen += cln - rpos = rp - end - - # Converts the Buffer to a FlatString, appends it to - # the final String and re-allocates a new larger Buffer. - private fun dump_buffer do - written = false - var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped) - str += nstr - var bs = buf_size - bs = bs * 2 - ns = new NativeString(bs) - buf_size = bs - dumped = 0 - rpos = 0 - end - - # Similar to dump_buffer, but does not reallocate a new NativeString - private fun persist_buffer do - if rpos == dumped then return - var nstr = new FlatString.with_infos(ns, rpos - dumped, dumped) - str += nstr - dumped = rpos - end - - redef fun output do - str.output - new FlatString.with_infos(ns, rpos - dumped, dumped).output - end - - # Enlarge is useless here since the `Buffer` - # part is automatically dumped when necessary. - # - # Also, since the buffer can not be overused by a - # single string, there is no need for manual - # resizing. - # - # "You have no power here !" - redef fun enlarge(i) do end - - redef fun to_s do - persist_buffer - written = true - return str - end - - redef fun reverse do - # Flush the buffer in order to only have to reverse `str`. - if rpos > 0 and dumped != rpos then - str += new FlatString.with_infos(ns, rpos - dumped, dumped) - dumped = rpos - end - str = str.reversed - end - - redef fun upper do - if written then reset - persist_buffer - str = str.to_upper - end - - redef fun lower do - if written then reset - persist_buffer - str = str.to_lower - end -end - redef class FlatString redef fun insert_at(s, pos) do @@ -574,8 +291,8 @@ redef class FlatString redef fun +(o) do var s = o.to_s - var slen = s.bytelen - var mlen = _bytelen + var slen = s.byte_length + var mlen = _byte_length if slen == 0 then return self if mlen == 0 then return s var nlen = slen + mlen @@ -585,13 +302,13 @@ redef class FlatString var sifrom = s._first_byte var mifrom = _first_byte var sits = s._items - var ns = new NativeString(nlen + 1) + var ns = new CString(nlen + 1) mits.copy_to(ns, mlen, mifrom, 0) sits.copy_to(ns, slen, sifrom, mlen) return new FlatString.full(ns, nlen, 0, length + s.length) else if s isa Concat then var sl = s._left - var sllen = sl.bytelen + var sllen = sl.byte_length if sllen + mlen > maxlen then return new Concat(self, s) return new Concat(self + sl, s._right) else @@ -616,9 +333,9 @@ end private class RopeByteReverseIterator super IndexedIterator[Byte] - # Current NativeString - var ns: NativeString is noautoinit - # Current position in NativeString + # Current CString + var ns: CString is noautoinit + # Current position in CString var pns: Int is noautoinit # Position in the Rope (0-indexed) var pos: Int is noautoinit @@ -660,10 +377,10 @@ private class RopeByteIterator # Position in current `String` var pns: Int is noautoinit # Current `String` being iterated on - var ns: NativeString is noautoinit + var ns: CString is noautoinit # Substrings of the Rope var subs: IndexedIterator[FlatString] is noautoinit - # Maximum position to iterate on (e.g. Rope.bytelen) + # Maximum position to iterate on (e.g. Rope.byte_length) var max: Int is noautoinit # Position (char) in the Rope (0-indexed) var pos: Int is noautoinit @@ -673,7 +390,7 @@ private class RopeByteIterator pns = pos - subs.index self.pos = pos ns = subs.item._items - max = root.bytelen - 1 + max = root.byte_length - 1 end redef fun item do return ns[pns] @@ -685,7 +402,7 @@ private class RopeByteIterator redef fun next do pns += 1 pos += 1 - if pns < subs.item._bytelen then return + if pns < subs.item._byte_length then return if not subs.is_ok then return subs.next if not subs.is_ok then return @@ -699,9 +416,9 @@ end private class RopeCharReverseIterator super IndexedIterator[Char] - # Current NativeString + # Current CString var ns: String is noautoinit - # Current position in NativeString + # Current position in CString var pns: Int is noautoinit # Position in the Rope (0-indexed) var pos: Int is noautoinit @@ -821,7 +538,7 @@ private class ReverseRopeSubstrings redef fun next do if pos < 0 then return var curr = iter.prev - var currit = curr.node + var currit = curr.as(not null).node while curr != null do currit = curr.node if not currit isa Concat then @@ -846,39 +563,6 @@ private class ReverseRopeSubstrings end end -private class RopeBufSubstringIterator - super Iterator[FlatText] - - # Iterator on the substrings of the building string - var iter: Iterator[FlatText] is noautoinit - # Makes a String out of the buffered part of the Ropebuffer - var nsstr: FlatString is noautoinit - # Did we attain the buffered part ? - var nsstr_done = false - - init from(str: RopeBuffer) do - iter = str.str.substrings - nsstr = new FlatString.with_infos(str.ns, str.rpos - str.dumped, str.dumped) - if str.length == 0 then nsstr_done = true - end - - redef fun is_ok do return iter.is_ok or not nsstr_done - - redef fun item do - assert is_ok - if iter.is_ok then return iter.item - return nsstr - end - - redef fun next do - if iter.is_ok then - iter.next - return - end - nsstr_done = true - end -end - # Substrings of a Rope (i.e. Postfix iterator on leaves) private class RopeSubstrings super IndexedIterator[FlatString] @@ -928,14 +612,14 @@ private class RopeSubstrings redef fun next do pos += str.length if pos > max then return - var it = iter.prev + var it = iter.prev.as(not null) var rnod = it.node loop if not rnod isa Concat then it.ldone = true it.rdone = true str = rnod.as(FlatString) - iter = it.as(not null) + iter = it break end if not it.ldone then @@ -947,7 +631,7 @@ private class RopeSubstrings rnod = rnod._right it = new RopeCharIteratorPiece(rnod, false, false, it) else - it = it.prev + it = it.prev.as(not null) rnod = it.node continue end @@ -984,7 +668,7 @@ private class RopeBytes var cache_end: Int = -1 redef fun [](i) do - assert i >= 0 and i < target._bytelen + assert i >= 0 and i < target._byte_length var flps = _cache_start if i >= flps and i <= _cache_end then return _cache.bytes[i - flps] @@ -1004,7 +688,7 @@ private class RopeBytes if s isa FlatString then break s = s.as(Concat) var lft = s._left - var llen = lft.bytelen + var llen = lft.byte_length if pos >= llen then s = s._right pos -= llen @@ -1013,7 +697,7 @@ private class RopeBytes end end _cache_start = st - pos - _cache_end = st - pos + s.bytelen - 1 + _cache_end = st - pos + s.byte_length - 1 _cache = s return s end @@ -1023,175 +707,3 @@ private class RopeBytes redef fun reverse_iterator_from(i) do return new RopeByteReverseIterator.from(target, i) end - -# An Iterator over a RopeBuffer. -class RopeBufferCharIterator - super IndexedIterator[Char] - - # Subiterator. - var sit: IndexedIterator[Char] is noautoinit - - redef fun index do return sit.index - - # Init the iterator from a RopeBuffer starting from `pos`. - init from(t: RopeBuffer, pos: Int) do - t.persist_buffer - sit = t.str.chars.iterator_from(pos) - end - - redef fun is_ok do return sit.is_ok - - redef fun item do - assert is_ok - return sit.item - end - - redef fun next do sit.next -end - -# Reverse iterator over a RopeBuffer. -class RopeBufferCharReverseIterator - super IndexedIterator[Char] - - # Subiterator. - var sit: IndexedIterator[Char] is noautoinit - - redef fun index do return sit.index - - # Init the iterator from a RopeBuffer starting from `pos`. - init from(tgt: RopeBuffer, pos: Int) do - tgt.persist_buffer - sit = tgt.str.chars.reverse_iterator_from(pos) - end - - redef fun is_ok do return sit.is_ok - - redef fun item do - assert is_ok - return sit.item - end - - redef fun next do sit.next -end - -# View on the chars of a `RopeBuffer` -class RopeBufferChars - super BufferCharView - - redef type SELFTYPE: RopeBuffer - - redef fun [](i) do return target[i] - - redef fun []=(i,c) do target[i] = c - - redef fun add(c) do target.add c - - redef fun push(c) do target.add c - - redef fun iterator_from(i) do return new RopeBufferCharIterator.from(target, i) - - redef fun reverse_iterator_from(i) do return new RopeBufferCharReverseIterator.from(target, i) -end - -# An Iterator over a RopeBuffer. -class RopeBufferByteIterator - super IndexedIterator[Byte] - - # Subiterator. - var sit: IndexedIterator[Byte] is noautoinit - - # Native string iterated over. - var ns: NativeString is noautoinit - - # Current position in `ns`. - var pns: Int is noautoinit - - # Maximum position iterable. - var maxpos: Int is noautoinit - - redef var index is noautoinit - - # Init the iterator from a RopeBuffer starting from `pos`. - init from(t: RopeBuffer, pos: Int) do - ns = t.ns - maxpos = t._bytelen - sit = t.str.bytes.iterator_from(pos) - pns = pos - t.str.length - index = pos - end - - redef fun is_ok do return index < maxpos - - redef fun item do - if sit.is_ok then return sit.item - return ns[pns] - end - - redef fun next do - index += 1 - if sit.is_ok then - sit.next - else - pns += 1 - end - end -end - -# Reverse iterator over a RopeBuffer. -class RopeBufferByteReverseIterator - super IndexedIterator[Byte] - - # Subiterator. - var sit: IndexedIterator[Byte] is noautoinit - - # Native string iterated over. - var ns: NativeString is noautoinit - - # Current position in `ns`. - var pns: Int is noautoinit - - redef var index is noautoinit - - # Init the iterator from a RopeBuffer starting from `pos`. - init from(tgt: RopeBuffer, pos: Int) do - sit = tgt.str.bytes.reverse_iterator_from(pos - (tgt.rpos - tgt.dumped)) - pns = pos - tgt.str.bytelen + tgt.rpos - index = pos - ns = tgt.ns - end - - redef fun is_ok do return index >= 0 - - redef fun item do - if pns >= 0 then return ns[pns] - return sit.item - end - - redef fun next do - index -= 1 - if pns >= 0 then - pns -= 1 - else - sit.next - end - end -end - -# View on the chars of a `RopeBuffer` -class RopeBufferBytes - super BufferByteView - - redef type SELFTYPE: RopeBuffer - - redef fun [](i) do - if i < target.str.bytelen then - return target.str.bytes[i] - else - return target.ns[i - target.str.bytelen] - end - end - - redef fun iterator_from(i) do return new RopeBufferByteIterator.from(target, i) - - redef fun reverse_iterator_from(i) do return new RopeBufferByteReverseIterator.from(target, i) -end