Merge: Added contributing guidelines and link from readme
[nit.git] / lib / core / text / flat.nit
index c89b222..6273609 100644 (file)
@@ -36,9 +36,11 @@ end
 
 redef class FlatText
 
-       fun first_byte: Int do return 0
+       # First byte of the NativeString
+       protected fun first_byte: Int do return 0
 
-       fun last_byte: Int do return _bytelen - 1
+       # Last byte of the NativeString
+       protected fun last_byte: Int do return first_byte + _bytelen - 1
 
        # Cache of the latest position (char) explored in the string
        var position: Int = 0
@@ -48,17 +50,35 @@ redef class FlatText
 
        # Index of the character `index` in `_items`
        fun char_to_byte_index(index: Int): Int do
-               var ln = length
-               assert index >= 0
-               assert index < ln
+               var dpos = index - _position
+               var b = _bytepos
+               var its = _items
 
+               if dpos == 1 then
+                       if its[b] & 0x80u8 == 0x00u8 then
+                               b += 1
+                       else
+                               b += its.length_of_char_at(b)
+                       end
+                       _bytepos = b
+                       _position = index
+                       return b
+               end
+               if dpos == -1 then
+                       b = its.find_beginning_of_char_at(b - 1)
+                       _bytepos = b
+                       _position = index
+                       return b
+               end
+               if dpos == 0 then return b
+
+               var ln = _length
                var pos = _position
                # Find best insertion point
                var delta_begin = index
                var delta_end = (ln - 1) - index
                var delta_cache = (pos - index).abs
                var min = delta_begin
-               var its = _items
 
                if delta_cache < min then min = delta_cache
                if delta_end < min then min = delta_end
@@ -66,15 +86,15 @@ redef class FlatText
                var ns_i: Int
                var my_i: Int
 
-               if min == delta_begin then
-                       ns_i = first_byte
-                       my_i = 0
-               else if min == delta_cache then
+               if min == delta_cache then
                        ns_i = _bytepos
                        my_i = pos
+               else if min == delta_begin then
+                       ns_i = first_byte
+                       my_i = 0
                else
                        ns_i = its.find_beginning_of_char_at(last_byte)
-                       my_i = length - 1
+                       my_i = _length - 1
                end
 
                ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
@@ -179,7 +199,7 @@ redef class FlatText
                        end
                        pos += 1
                end
-               var s = new FlatString.with_infos(nits, nlen, 0, nlen - 1)
+               var s = new FlatString.with_infos(nits, nlen, 0)
                return s
        end
 
@@ -205,6 +225,22 @@ redef class FlatText
                                req_esc += 1
                        else if c == 0x5Cu8 then
                                req_esc += 1
+                       else if c == 0x3Fu8 then
+                               var j = pos + 1
+                               if j < length then
+                                       var next = its[j]
+                                       # We ignore `??'` because it will be escaped as `??\'`.
+                                       if
+                                               next == 0x21u8 or
+                                               next == 0x28u8 or
+                                               next == 0x29u8 or
+                                               next == 0x2Du8 or
+                                               next == 0x2Fu8 or
+                                               next == 0x3Cu8 or
+                                               next == 0x3Du8 or
+                                               next == 0x3Eu8
+                                       then req_esc += 1
+                               end
                        else if c < 32u8 then
                                req_esc += 3
                        end
@@ -260,6 +296,27 @@ redef class FlatText
                                nns[opos] = 0x5Cu8
                                nns[opos + 1] = 0x5Cu8
                                opos += 2
+                       else if c == 0x3Fu8 then
+                               var j = pos + 1
+                               if j < length then
+                                       var next = its[j]
+                                       # We ignore `??'` because it will be escaped as `??\'`.
+                                       if
+                                               next == 0x21u8 or
+                                               next == 0x28u8 or
+                                               next == 0x29u8 or
+                                               next == 0x2Du8 or
+                                               next == 0x2Fu8 or
+                                               next == 0x3Cu8 or
+                                               next == 0x3Du8 or
+                                               next == 0x3Eu8
+                                       then
+                                               nns[opos] = 0x5Cu8
+                                               opos += 1
+                                       end
+                               end
+                               nns[opos] = 0x3Fu8
+                               opos += 1
                        else if c < 32u8 then
                                nns[opos] = 0x5Cu8
                                nns[opos + 1] = 0x30u8
@@ -272,32 +329,101 @@ redef class FlatText
                        end
                        pos += 1
                end
-               return nns.to_s_with_length(nlen)
+               return nns.to_s_unsafe(nlen)
+       end
+
+       redef fun [](index) do
+               var len = _length
+
+               # Statistically:
+               # * ~70% want the next char
+               # * ~23% want the previous
+               # * ~7% want the same char
+               #
+               # So it makes sense to shortcut early. And early is here.
+               var dpos = index - _position
+               var b = _bytepos
+               if dpos == 1 and index < len - 1 then
+                       var its = _items
+                       var c = its[b]
+                       if c & 0x80u8 == 0x00u8 then
+                               # We want the next, and current is easy.
+                               # So next is easy to find!
+                               b += 1
+                               _position = index
+                               _bytepos = b
+                               # The rest will be done by `dpos==0` bellow.
+                               dpos = 0
+                       end
+               else if dpos == -1 and index > 1 then
+                       var its = _items
+                       var c = its[b-1]
+                       if c & 0x80u8 == 0x00u8 then
+                               # We want the previous, and it is easy.
+                               b -= 1
+                               dpos = 0
+                               _position = index
+                               _bytepos = b
+                               return c.ascii
+                       end
+               end
+               if dpos == 0 then
+                       # We know what we want (+0 or +1) just get it now!
+                       var its = _items
+                       var c = its[b]
+                       if c & 0x80u8 == 0x00u8 then return c.ascii
+                       return items.char_at(b)
+               end
+
+               assert index >= 0 and index < len
+               return fetch_char_at(index)
+       end
+
+       # Gets a `Char` at `index` in `self`
+       #
+       # WARNING: Use at your own risks as no bound-checking is done
+       fun fetch_char_at(index: Int): Char do
+               var i = char_to_byte_index(index)
+               var items = _items
+               var b = items[i]
+               if b & 0x80u8 == 0x00u8 then return b.ascii
+               return items.char_at(i)
+       end
+
+       # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
+       #
+       #     assert "ff".to_hex == 255
+       redef fun to_hex(pos, ln) do
+               var res = 0
+               if pos == null then pos = 0
+               if ln == null then ln = length - pos
+               pos = char_to_byte_index(pos)
+               var its = _items
+               var max = pos + ln
+               for i in [pos .. max[ do
+                       res <<= 4
+                       res += its[i].ascii.from_hex
+               end
+               return res
        end
 
-       redef fun [](index) do return _items.char_at(char_to_byte_index(index))
+       redef fun copy_to_native(dst, n, src_off, dst_off) do
+               _items.copy_to(dst, n, first_byte + src_off, dst_off)
+       end
 end
 
 # Immutable strings of characters.
-class FlatString
+abstract class FlatString
        super FlatText
        super String
 
        # Index at which `self` begins in `_items`, inclusively
        redef var first_byte is noinit
 
-       # Index at which `self` ends in `_items`, inclusively
-       redef var last_byte is noinit
-
        redef var chars = new FlatStringCharView(self) is lazy
 
        redef var bytes = new FlatStringByteView(self) is lazy
 
-       redef var length is lazy do
-               if _bytelen == 0 then return 0
-               return _items.utf8_length(_first_byte, _last_byte)
-       end
-
        redef var to_cstring is lazy do
                var blen = _bytelen
                var new_items = new NativeString(blen + 1)
@@ -306,14 +432,15 @@ class FlatString
                return new_items
        end
 
-       redef fun reversed
-       do
+       redef fun reversed do
                var b = new FlatBuffer.with_capacity(_bytelen + 1)
-               for i in [length - 1 .. 0].step(-1) do
-                       b.add self[i]
+               var i = _length - 1
+               while i >= 0 do
+                       b.add self.fetch_char_at(i)
+                       i -= 1
                end
                var s = b.to_s.as(FlatString)
-               s.length = self.length
+               s._length = self._length
                return s
        end
 
@@ -321,24 +448,40 @@ class FlatString
 
        redef fun substring(from, count)
        do
-               assert count >= 0
+               if count <= 0 then return ""
 
                if from < 0 then
                        count += from
-                       if count < 0 then count = 0
+                       if count <= 0 then return ""
                        from = 0
                end
 
-               if (count + from) > length then count = length - from
+               var ln = _length
+               if (count + from) > ln then count = ln - from
                if count <= 0 then return ""
                var end_index = from + count - 1
+               return substring_impl(from, count, end_index)
+       end
+
+       private fun substring_impl(from, count, end_index: Int): String do
+               var cache = _position
+               var dfrom = (cache - from).abs
+               var dend = (end_index - from).abs
+
+               var bytefrom: Int
+               var byteto: Int
+               if dfrom < dend then
+                       bytefrom = char_to_byte_index(from)
+                       byteto = char_to_byte_index(end_index)
+               else
+                       byteto = char_to_byte_index(end_index)
+                       bytefrom = char_to_byte_index(from)
+               end
 
-               var bytefrom = char_to_byte_index(from)
-               var byteto = char_to_byte_index(end_index)
                var its = _items
                byteto += its.length_of_char_at(byteto) - 1
 
-               var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, byteto, count)
+               var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, count)
                return s
        end
 
@@ -348,7 +491,7 @@ class FlatString
        do
                var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
 
-               var mylen = length
+               var mylen = _length
                var pos = 0
 
                while pos < mylen do
@@ -363,7 +506,7 @@ class FlatString
        do
                var outstr = new FlatBuffer.with_capacity(self._bytelen + 1)
 
-               var mylen = length
+               var mylen = _length
                var pos = 0
 
                while pos < mylen do
@@ -387,32 +530,26 @@ class FlatString
        #
        # `_items` will be used as is, without copy, to retrieve the characters of the string.
        # Aliasing issues is the responsibility of the caller.
-       private init with_infos(items: NativeString, bytelen, from, to: Int)
+       private new with_infos(items: NativeString, bytelen, from: Int)
        do
-               self._items = items
-               self._bytelen = bytelen
-               _first_byte = from
-               _last_byte = to
-               _bytepos = from
+               var len = items.utf8_length(from, bytelen)
+               if bytelen == len then return new ASCIIFlatString.full_data(items, bytelen, from, len)
+               return new UnicodeFlatString.full_data(items, bytelen, from, len)
        end
 
        # Low-level creation of a new string with all the data.
        #
        # `_items` will be used as is, without copy, to retrieve the characters of the string.
        # Aliasing issues is the responsibility of the caller.
-       private init full(items: NativeString, bytelen, from, to, length: Int)
+       private new full(items: NativeString, bytelen, from, length: Int)
        do
-               self._items = items
-               self.length = length
-               self._bytelen = bytelen
-               _first_byte = from
-               _last_byte = to
-               _bytepos = from
+               if bytelen == length then return new ASCIIFlatString.full_data(items, bytelen, from, length)
+               return new UnicodeFlatString.full_data(items, bytelen, from, length)
        end
 
        redef fun ==(other)
        do
-               if not other isa FlatString then return super
+               if not other isa FlatText then return super
 
                if self.object_id == other.object_id then return true
 
@@ -421,7 +558,7 @@ class FlatString
                if other._bytelen != my_length then return false
 
                var my_index = _first_byte
-               var its_index = other._first_byte
+               var its_index = other.first_byte
 
                var last_iteration = my_index + my_length
 
@@ -439,29 +576,32 @@ class FlatString
 
        redef fun <(other)
        do
-               if not other isa FlatString then return super
+               if not other isa FlatText then return super
 
                if self.object_id == other.object_id then return false
 
-               var my_length = self._bytelen
-               var its_length = other._bytelen
+               var myits = _items
+               var itsits = other._items
 
-               var max = if my_length < its_length then my_length else its_length
+               var mbt = _bytelen
+               var obt = other.bytelen
 
-               var myits = self.bytes
-               var itsits = other.bytes
+               var minln = if mbt < obt then mbt else obt
+               var mst = _first_byte
+               var ost = other.first_byte
 
-               for i in [0 .. max[ do
-                       var my_curr_char = myits[i]
-                       var its_curr_char = itsits[i]
+               for i in [0 .. minln[ do
+                       var my_curr_char = myits[mst]
+                       var its_curr_char = itsits[ost]
 
-                       if my_curr_char != its_curr_char then
-                               if my_curr_char < its_curr_char then return true
-                               return false
-                       end
+                       if my_curr_char > its_curr_char then return false
+                       if my_curr_char < its_curr_char then return true
+
+                       mst += 1
+                       ost += 1
                end
 
-               return my_length < its_length
+               return mbt < obt
        end
 
        redef fun +(o) do
@@ -477,7 +617,7 @@ class FlatString
                        var ns = new NativeString(nlen + 1)
                        mits.copy_to(ns, mlen, mifrom, 0)
                        sits.copy_to(ns, slen, sifrom, mlen)
-                       return new FlatString.full(ns, nlen, 0, nlen - 1, length + o.length)
+                       return new FlatString.full(ns, nlen, 0, _length + o.length)
                else
                        abort
                end
@@ -486,7 +626,7 @@ class FlatString
        redef fun *(i) do
                var mybtlen = _bytelen
                var new_bytelen = mybtlen * i
-               var mylen = length
+               var mylen = _length
                var newlen = mylen * i
                var its = _items
                var fb = _first_byte
@@ -498,10 +638,9 @@ class FlatString
                        offset += mybtlen
                        i -= 1
                end
-               return new FlatString.full(ns, new_bytelen, 0, new_bytelen - 1, newlen)
+               return new FlatString.full(ns, new_bytelen, 0, newlen)
        end
 
-
        redef fun hash
        do
                if hash_cache == null then
@@ -510,7 +649,7 @@ class FlatString
                        var i = _first_byte
 
                        var my_items = _items
-                       var max = _last_byte
+                       var max = last_byte
 
                        while i <= max do
                                h = (h << 5) + h + my_items[i].to_i
@@ -526,6 +665,80 @@ class FlatString
        redef fun substrings do return new FlatSubstringsIter(self)
 end
 
+# Regular Nit UTF-8 strings
+private class UnicodeFlatString
+       super FlatString
+
+       init full_data(items: NativeString, bytelen, from, length: Int) do
+               self._items = items
+               self._length = length
+               self._bytelen = bytelen
+               _first_byte = from
+               _bytepos = from
+       end
+
+       redef fun substring_from(from) do
+               if from >= self._length then return empty
+               if from <= 0 then return self
+               var c = char_to_byte_index(from)
+               var st = c - _first_byte
+               var fln = bytelen - st
+               return new FlatString.full(items, fln, c, _length - from)
+       end
+end
+
+# Special cases of String where all the characters are ASCII-based
+#
+# Optimizes access operations to O(1) complexity.
+private class ASCIIFlatString
+       super FlatString
+
+       init full_data(items: NativeString, bytelen, from, length: Int) do
+               self._items = items
+               self._length = length
+               self._bytelen = bytelen
+               _first_byte = from
+               _bytepos = from
+       end
+
+       redef fun [](idx) do
+               assert idx < _bytelen and idx >= 0
+               return _items[idx + _first_byte].ascii
+       end
+
+       redef fun substring(from, count) do
+               var ln = _length
+               if count <= 0 then return ""
+               if (count + from) > ln then count = ln - from
+               if count <= 0 then return ""
+               if from < 0 then
+                       count += from
+                       if count <= 0 then return ""
+                       from = 0
+               end
+               return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
+       end
+
+       redef fun reversed do
+               var b = new FlatBuffer.with_capacity(_bytelen + 1)
+               var i = _length - 1
+               while i >= 0 do
+                       b.add self[i]
+                       i -= 1
+               end
+               var s = b.to_s.as(FlatString)
+               return s
+       end
+
+       redef fun char_to_byte_index(index) do return index + _first_byte
+
+       redef fun substring_impl(from, count, end_index) do
+               return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
+       end
+
+       redef fun fetch_char_at(i) do return _items[i + _first_byte].ascii
+end
+
 private class FlatStringCharReverseIterator
        super IndexedIterator[Char]
 
@@ -533,11 +746,6 @@ private class FlatStringCharReverseIterator
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatString, pos: Int)
-       do
-               init(tgt, pos)
-       end
-
        redef fun is_ok do return curr_pos >= 0
 
        redef fun item do return target[curr_pos]
@@ -553,14 +761,11 @@ private class FlatStringCharIterator
 
        var target: FlatString
 
-       var max: Int
+       var max: Int is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatString, pos: Int)
-       do
-               init(tgt, tgt.length - 1, pos)
-       end
+       init do max = target._length - 1
 
        redef fun is_ok do return curr_pos <= max
 
@@ -579,9 +784,9 @@ private class FlatStringCharView
 
        redef fun [](index) do return target[index]
 
-       redef fun iterator_from(start) do return new FlatStringCharIterator.with_pos(target, start)
+       redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
 
-       redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator.with_pos(target, start)
+       redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
 
 end
 
@@ -590,13 +795,15 @@ private class FlatStringByteReverseIterator
 
        var target: FlatString
 
-       var target_items: NativeString
+       var target_items: NativeString is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatString, pos: Int)
+       init
        do
-               init(tgt, tgt._items, pos + tgt._first_byte)
+               var tgt = target
+               target_items = tgt._items
+               curr_pos += tgt._first_byte
        end
 
        redef fun is_ok do return curr_pos >= target._first_byte
@@ -614,16 +821,18 @@ private class FlatStringByteIterator
 
        var target: FlatString
 
-       var target_items: NativeString
+       var target_items: NativeString is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatString, pos: Int)
+       init
        do
-               init(tgt, tgt._items, pos + tgt._first_byte)
+               var tgt = target
+               target_items = tgt._items
+               curr_pos += tgt._first_byte
        end
 
-       redef fun is_ok do return curr_pos <= target._last_byte
+       redef fun is_ok do return curr_pos <= target.last_byte
 
        redef fun item do return target_items[curr_pos]
 
@@ -640,18 +849,17 @@ private class FlatStringByteView
 
        redef fun [](index)
        do
-               # Check that the index (+ _first_byte) is not larger than _last_byte
+               # Check that the index (+ _first_byte) is not larger than last_byte
                # In other terms, if the index is valid
-               assert index >= 0
-               var target = self.target
+               var target = _target
+               assert index >= 0 and index < target._bytelen
                var ind = index + target._first_byte
-               assert ind <= target._last_byte
                return target._items[ind]
        end
 
-       redef fun iterator_from(start) do return new FlatStringByteIterator.with_pos(target, start)
+       redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
 
-       redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator.with_pos(target, start)
+       redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
 
 end
 
@@ -670,8 +878,6 @@ class FlatBuffer
 
        redef var bytes = new FlatBufferByteView(self) is lazy
 
-       redef var length = 0
-
        private var char_cache: Int = -1
 
        private var byte_cache: Int = -1
@@ -721,10 +927,10 @@ class FlatBuffer
 
        redef fun []=(index, item)
        do
-               assert index >= 0 and index <= length
+               assert index >= 0 and index <= _length
                if written then reset
                is_dirty = true
-               if index == length then
+               if index == _length then
                        add item
                        return
                end
@@ -740,7 +946,6 @@ class FlatBuffer
                        lshift_bytes(ip + clen, -size_diff)
                end
                _bytelen += size_diff
-               bytepos += size_diff
                it.set_char_at(ip, item)
        end
 
@@ -753,14 +958,17 @@ class FlatBuffer
                enlarge(bt + clen)
                _items.set_char_at(bt, c)
                _bytelen += clen
-               length += 1
+               _length += 1
        end
 
        redef fun clear do
                is_dirty = true
-               if written then reset
                _bytelen = 0
-               length = 0
+               _length = 0
+               if written then
+                       _capacity = 16
+                       reset
+               end
        end
 
        redef fun empty do return new Buffer
@@ -769,12 +977,13 @@ class FlatBuffer
        do
                var c = capacity
                if cap <= c then return
-               while c <= cap do c = c * 2 + 2
+               if c <= 16 then c = 16
+               while c <= cap do c = c * 2
                # The COW flag can be set at false here, since
                # it does a copy of the current `Buffer`
                written = false
                var bln = _bytelen
-               var a = new NativeString(c+1)
+               var a = new NativeString(c)
                if bln > 0 then
                        var it = _items
                        if bln > 0 then it.copy_to(a, bln, 0, 0)
@@ -788,7 +997,7 @@ class FlatBuffer
                written = true
                var bln = _bytelen
                if bln == 0 then _items = new NativeString(1)
-               return new FlatString.full(_items, bln, 0, bln - 1, length)
+               return new FlatString.full(_items, bln, 0, _length)
        end
 
        redef fun to_cstring
@@ -797,7 +1006,7 @@ class FlatBuffer
                        var bln = _bytelen
                        var new_native = new NativeString(bln + 1)
                        new_native[bln] = 0u8
-                       if length > 0 then _items.copy_to(new_native, bln, 0, 0)
+                       if _length > 0 then _items.copy_to(new_native, bln, 0, 0)
                        real_items = new_native
                        is_dirty = false
                end
@@ -819,29 +1028,24 @@ class FlatBuffer
                self._items = items
                self.capacity = capacity
                self._bytelen = bytelen
-               self.length = length
+               self._length = length
        end
 
        # Create a new string copied from `s`.
        init from(s: Text)
        do
                _items = new NativeString(s.bytelen)
-               if s isa FlatText then
-                       _items = s._items
-               else
-                       for i in substrings do i.as(FlatString)._items.copy_to(_items, i._bytelen, 0, 0)
-               end
+               for i in s.substrings do i._items.copy_to(_items, i._bytelen, first_byte, 0)
                _bytelen = s.bytelen
-               length = s.length
+               _length = s.length
                _capacity = _bytelen
-               written = true
        end
 
        # Create a new empty string with a given capacity.
        init with_capacity(cap: Int)
        do
                assert cap >= 0
-               _items = new NativeString(cap + 1)
+               _items = new NativeString(cap)
                capacity = cap
                _bytelen = 0
        end
@@ -860,7 +1064,7 @@ class FlatBuffer
                        return
                end
                _bytelen = nln
-               length += s.length
+               _length += s.length
        end
 
        # Copies the content of self in `dest`
@@ -877,19 +1081,31 @@ class FlatBuffer
        do
                assert count >= 0
                if from < 0 then from = 0
-               if (from + count) > length then count = length - from
-               if count != 0 then
-                       var its = _items
-                       var bytefrom = its.char_to_byte_index(from)
-                       var byteto = its.char_to_byte_index(count + from - 1)
-                       byteto += its.char_at(byteto).u8char_len - 1
-                       var byte_length = byteto - bytefrom + 1
-                       var r_items = new NativeString(byte_length)
-                       its.copy_to(r_items, byte_length, bytefrom, 0)
-                       return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
-               else
-                       return new Buffer
+               if (from + count) > _length then count = _length - from
+               if count <= 0 then return new Buffer
+               var its = _items
+               var bytefrom = its.char_to_byte_index(from)
+               var byteto = its.char_to_byte_index(count + from - 1)
+               byteto += its.char_at(byteto).u8char_len - 1
+               var byte_length = byteto - bytefrom + 1
+               var r_items = new NativeString(byte_length)
+               its.copy_to(r_items, byte_length, bytefrom, 0)
+               return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
+       end
+
+       redef fun append_substring_impl(s, from, length) do
+               if length <= 0 then return
+               if not s isa FlatText then
+                       super
+                       return
                end
+               var bytest = s.char_to_byte_index(from)
+               var bytend = s.char_to_byte_index(from + length - 1)
+               var btln = bytend - bytest + 1
+               enlarge(btln + _bytelen)
+               s._items.copy_to(_items, btln, bytest, _bytelen)
+               _bytelen += btln
+               _length += length
        end
 
        redef fun reverse
@@ -903,7 +1119,7 @@ class FlatBuffer
        redef fun times(repeats)
        do
                var bln = _bytelen
-               var x = new FlatString.full(_items, bln, 0, bln - 1, length)
+               var x = new FlatString.full(_items, bln, 0, _length)
                for i in [1 .. repeats[ do
                        append(x)
                end
@@ -912,13 +1128,13 @@ class FlatBuffer
        redef fun upper
        do
                if written then reset
-               for i in [0 .. length[ do self[i] = self[i].to_upper
+               for i in [0 .. _length[ do self[i] = self[i].to_upper
        end
 
        redef fun lower
        do
                if written then reset
-               for i in [0 .. length[ do self[i] = self[i].to_lower
+               for i in [0 .. _length[ do self[i] = self[i].to_lower
        end
 end
 
@@ -927,14 +1143,11 @@ private class FlatBufferByteReverseIterator
 
        var target: FlatBuffer
 
-       var target_items: NativeString
+       var target_items: NativeString is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatBuffer, pos: Int)
-       do
-               init(tgt, tgt._items, pos)
-       end
+       init do target_items = target._items
 
        redef fun index do return curr_pos
 
@@ -953,9 +1166,9 @@ private class FlatBufferByteView
 
        redef fun [](index) do return target._items[index]
 
-       redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
+       redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
 
-       redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
+       redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
 
 end
 
@@ -964,14 +1177,11 @@ private class FlatBufferByteIterator
 
        var target: FlatBuffer
 
-       var target_items: NativeString
+       var target_items: NativeString is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatBuffer, pos: Int)
-       do
-               init(tgt, tgt._items, pos)
-       end
+       init do target_items = target._items
 
        redef fun index do return curr_pos
 
@@ -990,11 +1200,6 @@ private class FlatBufferCharReverseIterator
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatBuffer, pos: Int)
-       do
-               init(tgt, pos)
-       end
-
        redef fun index do return curr_pos
 
        redef fun is_ok do return curr_pos >= 0
@@ -1040,13 +1245,13 @@ private class FlatBufferCharView
        redef fun append(s)
        do
                var s_length = s.length
-               if target.capacity < s.length then enlarge(s_length + target.length)
+               if target.capacity < s.length then enlarge(s_length + target._length)
                for i in s do target.add i
        end
 
-       redef fun iterator_from(pos) do return new FlatBufferCharIterator.with_pos(target, pos)
+       redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
 
-       redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator.with_pos(target, pos)
+       redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
 
 end
 
@@ -1055,14 +1260,11 @@ private class FlatBufferCharIterator
 
        var target: FlatBuffer
 
-       var max: Int
+       var max: Int is noautoinit
 
        var curr_pos: Int
 
-       init with_pos(tgt: FlatBuffer, pos: Int)
-       do
-               init(tgt, tgt.length - 1, pos)
-       end
+       init do max = target._length - 1
 
        redef fun index do return curr_pos
 
@@ -1080,26 +1282,31 @@ redef class NativeString
                return to_s_with_length(cstring_length)
        end
 
-       # Returns `self` as a String of `length`.
-       redef fun to_s_with_length(length): FlatString
+       redef fun to_s_with_length(length)
        do
                assert length >= 0
                return clean_utf8(length)
        end
 
        redef fun to_s_full(bytelen, unilen) do
-               return new FlatString.full(self, bytelen, 0, bytelen - 1, unilen)
+               return new FlatString.full(self, bytelen, 0, unilen)
        end
 
-       # Returns `self` as a new String.
-       redef fun to_s_with_copy: FlatString
+       redef fun to_s_unsafe(len) do
+               if len == null then len = cstring_length
+               return new FlatString.with_infos(self, len, 0)
+       end
+
+       redef fun to_s_with_copy do return to_s_with_copy_and_length(cstring_length)
+
+       # Get a `String` from `length` bytes at `self` copied into Nit memory
+       fun to_s_with_copy_and_length(length: Int): String
        do
-               var length = cstring_length
                var r = clean_utf8(length)
                if r.items != self then return r
                var new_self = new NativeString(length + 1)
                copy_to(new_self, length, 0, 0)
-               var str = new FlatString.with_infos(new_self, length, 0, length - 1)
+               var str = new FlatString.with_infos(new_self, length, 0)
                new_self[length] = 0u8
                str.to_cstring = new_self
                return str
@@ -1111,8 +1318,23 @@ redef class NativeString
                var end_length = len
                var pos = 0
                var chr_ln = 0
-               while pos < len do
+               var rem = len
+               while rem > 0 do
+                       while rem >= 4 do
+                               var i = fetch_4_chars(pos)
+                               if i & 0x80808080 != 0 then break
+                               pos += 4
+                               chr_ln += 4
+                               rem -= 4
+                       end
+                       if rem == 0 then break
                        var b = self[pos]
+                       if b & 0x80u8 == 0x00u8 then
+                               pos += 1
+                               chr_ln += 1
+                               rem -= 1
+                               continue
+                       end
                        var nxst = length_of_char_at(pos)
                        var ok_st: Bool
                        if nxst == 1 then
@@ -1129,6 +1351,7 @@ redef class NativeString
                                replacements.add pos
                                end_length += 2
                                pos += 1
+                               rem -= 1
                                chr_ln += 1
                                continue
                        end
@@ -1151,9 +1374,12 @@ redef class NativeString
                                end_length += 2
                                pos += 1
                                chr_ln += 1
+                               rem -= 1
                                continue
                        end
-                       pos += c.u8char_len
+                       var clen = c.u8char_len
+                       pos += clen
+                       rem -= clen
                        chr_ln += 1
                end
                var ret = self
@@ -1177,51 +1403,36 @@ redef class NativeString
                        end
                        copy_to(ret, len - old_repl, old_repl, off)
                end
-               return new FlatString.full(ret, end_length, 0, end_length - 1, chr_ln)
+               return new FlatString.full(ret, end_length, 0, chr_ln)
        end
 
        # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
        #
        # Very unsafe, make sure to have room for this char prior to calling this function.
        private fun set_char_at(pos: Int, c: Char) do
+               var cp = c.code_point
+               if cp < 128 then
+                       self[pos] = cp.to_b
+                       return
+               end
                var ln = c.u8char_len
-               native_set_char(pos, c, ln)
-       end
-
-       private fun native_set_char(pos: Int, c: Char, ln: Int) `{
-               char* dst = self + pos;
-               switch(ln){
-                       case 1:
-                               dst[0] = c;
-                               break;
-                       case 2:
-                               dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
-                               dst[1] = 0x80 | (c & 0x3F);
-                               break;
-                       case 3:
-                               dst[0] = 0xE0 | ((c & 0xF000) >> 12);
-                               dst[1] = 0x80 | ((c & 0xFC0) >> 6);
-                               dst[2] = 0x80 | (c & 0x3F);
-                               break;
-                       case 4:
-                               dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
-                               dst[1] = 0x80 | ((c & 0x3F000) >> 12);
-                               dst[2] = 0x80 | ((c & 0xFC0) >> 6);
-                               dst[3] = 0x80 | (c & 0x3F);
-                               break;
-               }
-       `}
+               if ln == 2 then
+                       self[pos] = (0xC0 | ((cp & 0x7C0) >> 6)).to_b
+                       self[pos + 1] = (0x80 | (cp & 0x3F)).to_b
+               else if ln == 3 then
+                       self[pos] = (0xE0 | ((cp & 0xF000) >> 12)).to_b
+                       self[pos + 1] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
+                       self[pos + 2] = (0x80 | (cp & 0x3F)).to_b
+               else if ln == 4 then
+                       self[pos] = (0xF0 | ((cp & 0x1C0000) >> 18)).to_b
+                       self[pos + 1] = (0x80 | ((cp & 0x3F000) >> 12)).to_b
+                       self[pos + 2] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
+                       self[pos + 3] = (0x80 | (cp & 0x3F)).to_b
+               end
+       end
 end
 
 redef class Int
-       redef fun to_base(base, signed)
-       do
-               var l = digit_count(base)
-               var s = new FlatBuffer.from(" " * l)
-               fill_buffer(s, base, signed)
-               return s.to_s
-       end
-
        # return displayable int in base 10 and signed
        #
        #     assert 1.to_s            == "1"
@@ -1235,7 +1446,7 @@ redef class Int
                var ns = new NativeString(nslen + 1)
                ns[nslen] = 0u8
                native_int_to_s(ns, nslen + 1)
-               return new FlatString.full(ns, nslen, 0, nslen - 1, nslen)
+               return new FlatString.full(ns, nslen, 0, nslen)
        end
 end
 
@@ -1244,7 +1455,7 @@ redef class Array[E]
        # Fast implementation
        redef fun plain_to_s
        do
-               var l = length
+               var l = _length
                if l == 0 then return ""
                var its = _items.as(not null)
                var first = its[0]
@@ -1285,7 +1496,7 @@ redef class Array[E]
                        end
                        i += 1
                end
-               return new FlatString.with_infos(ns, sl, 0, sl - 1)
+               return new FlatString.with_infos(ns, sl, 0)
        end
 end
 
@@ -1322,7 +1533,7 @@ redef class NativeArray[E]
                        end
                        i += 1
                end
-               return new FlatString.with_infos(ns, sl, 0, sl - 1)
+               return new FlatString.with_infos(ns, sl, 0)
        end
 end