lib/standard/text: Mutualized cache mechanism to FlatText
[nit.git] / lib / standard / text / flat.nit
index cec70f6..917b0e5 100644 (file)
@@ -12,6 +12,7 @@
 module flat
 
 intrude import abstract_text
+intrude import native
 
 `{
 #include <stdio.h>
@@ -33,48 +34,99 @@ private class FlatSubstringsIter
        redef fun next do tgt = null
 end
 
+redef class FlatText
+
+       private fun first_byte: Int do return 0
+
+       private fun last_byte: Int do return bytelen - 1
+
+       # Cache of the latest position (char) explored in the string
+       var position: Int = 0
+
+       # Cached position (bytes) in the NativeString underlying the String
+       var bytepos: Int = first_byte is lateinit
+
+       # Index of the character `index` in `items`
+       private fun char_to_byte_index(index: Int): Int do
+               var ln = length
+               assert index >= 0
+               assert index < ln
+
+               # Find best insertion point
+               var delta_begin = index
+               var delta_end = (ln - 1) - index
+               var delta_cache = (position - index).abs
+               var min = delta_begin
+               var its = items
+
+               if delta_cache < min then min = delta_cache
+               if delta_end < min then min = delta_end
+
+               var ns_i: Int
+               var my_i: Int
+
+               if min == delta_begin then
+                       ns_i = first_byte
+                       my_i = 0
+               else if min == delta_cache then
+                       ns_i = bytepos
+                       my_i = position
+               else
+                       ns_i = its.find_beginning_of_char_at(last_byte)
+                       my_i = length - 1
+               end
+
+               ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
+
+               position = index
+               bytepos = ns_i
+
+               return ns_i
+       end
+
+       redef fun [](index) do return items.char_at(char_to_byte_index(index))
+end
+
 # Immutable strings of characters.
 class FlatString
        super FlatText
        super String
 
-       # Index in _items of the start of the string
-       private var index_from: Int is noinit
+       # Index at which `self` begins in `items`, inclusively
+       redef var first_byte is noinit
 
-       # Indes in _items of the last item of the string
-       private var index_to: Int is noinit
+       # Index at which `self` ends in `items`, inclusively
+       redef var last_byte is noinit
 
        redef var chars = new FlatStringCharView(self) is lazy
 
-       redef fun [](index)
-       do
-               # Check that the index (+ index_from) is not larger than indexTo
-               # In other terms, if the index is valid
-               assert index >= 0
-               assert (index + index_from) <= index_to
-               return items[index + index_from]
+       redef var bytes = new FlatStringByteView(self) is lazy
+
+       redef var length is lazy do
+               if bytelen == 0 then return 0
+               var st = first_byte
+               var its = items
+               var ln = 0
+               var lst = last_byte
+               while st <= lst do
+                       st += its.length_of_char_at(st)
+                       ln += 1
+               end
+               return ln
        end
 
-       ################################################
-       #       AbstractString specific methods        #
-       ################################################
-
        redef fun reversed
        do
-               var native = new NativeString(self.length + 1)
-               var length = self.length
-               var items = self.items
-               var pos = 0
-               var ipos = length-1
-               while pos < length do
-                       native[pos] = items[ipos]
-                       pos += 1
-                       ipos -= 1
+               var b = new FlatBuffer.with_capacity(bytelen + 1)
+               for i in [length - 1 .. 0].step(-1) do
+                       b.add self[i]
                end
-               return native.to_s_with_length(self.length)
+               var s = b.to_s.as(FlatString)
+               s.length = self.length
+               return s
        end
 
-       redef fun fast_cstring do return items.fast_cstring(index_from)
+       redef fun fast_cstring do return items.fast_cstring(first_byte)
 
        redef fun substring(from, count)
        do
@@ -86,100 +138,91 @@ class FlatString
                        from = 0
                end
 
-               var new_from = index_from + from
-
-               if (new_from + count) > index_to then
-                       var new_len = index_to - new_from + 1
-                       if new_len <= 0 then return empty
-                       return new FlatString.with_infos(items, new_len, new_from, index_to)
-               end
-
-               if count <= 0 then return empty
+               if (count + from) > length then count = length - from
+               if count <= 0 then return ""
+               var end_index = from + count - 1
 
-               var to = new_from + count - 1
+               var bytefrom = char_to_byte_index(from)
+               var byteto = char_to_byte_index(end_index)
+               byteto += items.length_of_char_at(byteto) - 1
 
-               return new FlatString.with_infos(items, to - new_from + 1, new_from, to)
+               var s = new FlatString.full(items, byteto - bytefrom + 1, bytefrom, byteto, count)
+               return s
        end
 
        redef fun empty do return "".as(FlatString)
 
        redef fun to_upper
        do
-               var outstr = new NativeString(self.length + 1)
-               var out_index = 0
+               var outstr = new FlatBuffer.with_capacity(self.bytelen + 1)
 
-               var myitems = self.items
-               var index_from = self.index_from
-               var max = self.index_to
+               var mylen = length
+               var pos = 0
 
-               while index_from <= max do
-                       outstr[out_index] = myitems[index_from].to_upper
-                       out_index += 1
-                       index_from += 1
+               while pos < mylen do
+                       outstr.add(chars[pos].to_upper)
+                       pos += 1
                end
 
-               outstr[self.length] = '\0'
-
-               return outstr.to_s_with_length(self.length)
+               return outstr.to_s
        end
 
        redef fun to_lower
        do
-               var outstr = new NativeString(self.length + 1)
-               var out_index = 0
+               var outstr = new FlatBuffer.with_capacity(self.bytelen + 1)
 
-               var myitems = self.items
-               var index_from = self.index_from
-               var max = self.index_to
+               var mylen = length
+               var pos = 0
 
-               while index_from <= max do
-                       outstr[out_index] = myitems[index_from].to_lower
-                       out_index += 1
-                       index_from += 1
+               while pos < mylen do
+                       outstr.add(chars[pos].to_lower)
+                       pos += 1
                end
 
-               outstr[self.length] = '\0'
-
-               return outstr.to_s_with_length(self.length)
+               return outstr.to_s
        end
 
        redef fun output
        do
-               var i = self.index_from
-               var imax = self.index_to
-               while i <= imax do
-                       items[i].output
-                       i += 1
-               end
+               for i in chars do i.output
        end
 
        ##################################################
        #              String Specific Methods           #
        ##################################################
 
-       # Low-level creation of a new string with given data.
+       # Low-level creation of a new string with minimal data.
        #
        # `items` will be used as is, without copy, to retrieve the characters of the string.
        # Aliasing issues is the responsibility of the caller.
-       private init with_infos(items: NativeString, length: Int, from: Int, to: Int)
+       private init with_infos(items: NativeString, bytelen, from, to: Int)
        do
                self.items = items
-               self.length = length
-               index_from = from
-               index_to = to
+               self.bytelen = bytelen
+               first_byte = from
+               last_byte = to
        end
 
-       redef fun to_cstring
+       # Low-level creation of a new string with all the data.
+       #
+       # `items` will be used as is, without copy, to retrieve the characters of the string.
+       # Aliasing issues is the responsibility of the caller.
+       private init full(items: NativeString, bytelen, from, to, length: Int)
        do
-               if real_items != null then
-                       return real_items.as(not null)
-               else
-                       var newItems = new NativeString(length + 1)
-                       self.items.copy_to(newItems, length, index_from, 0)
-                       newItems[length] = '\0'
-                       self.real_items = newItems
-                       return newItems
-               end
+               self.items = items
+               self.length = length
+               self.bytelen = bytelen
+               first_byte = from
+               last_byte = to
+       end
+
+       redef fun to_cstring do
+               if real_items != null then return real_items.as(not null)
+               var new_items = new NativeString(bytelen + 1)
+               self.items.copy_to(new_items, bytelen, first_byte, 0)
+               new_items[bytelen] = 0u8
+               real_items = new_items
+               return new_items
        end
 
        redef fun ==(other)
@@ -188,12 +231,12 @@ class FlatString
 
                if self.object_id == other.object_id then return true
 
-               var my_length = length
+               var my_length = bytelen
 
-               if other.length != my_length then return false
+               if other.bytelen != my_length then return false
 
-               var my_index = index_from
-               var its_index = other.index_from
+               var my_index = first_byte
+               var its_index = other.first_byte
 
                var last_iteration = my_index + my_length
 
@@ -215,99 +258,74 @@ class FlatString
 
                if self.object_id == other.object_id then return false
 
-               var my_curr_char : Char
-               var its_curr_char : Char
-
-               var curr_id_self = self.index_from
-               var curr_id_other = other.index_from
+               var my_length = self.bytelen
+               var its_length = other.bytelen
 
-               var my_items = self.items
-               var its_items = other.items
+               var max = if my_length < its_length then my_length else its_length
 
-               var my_length = self.length
-               var its_length = other.length
+               var myits = self.bytes
+               var itsits = other.bytes
 
-               var max_iterations = curr_id_self + my_length
-
-               while curr_id_self < max_iterations do
-                       my_curr_char = my_items[curr_id_self]
-                       its_curr_char = its_items[curr_id_other]
+               for i in [0 .. max[ do
+                       var my_curr_char = myits[i]
+                       var its_curr_char = itsits[i]
 
                        if my_curr_char != its_curr_char then
                                if my_curr_char < its_curr_char then return true
                                return false
                        end
-
-                       curr_id_self += 1
-                       curr_id_other += 1
                end
 
                return my_length < its_length
        end
 
-       redef fun +(s)
-       do
-               var my_length = self.length
-               var its_length = s.length
-
-               var total_length = my_length + its_length
-
-               var target_string = new NativeString(my_length + its_length + 1)
-
-               self.items.copy_to(target_string, my_length, index_from, 0)
-               if s isa FlatString then
-                       s.items.copy_to(target_string, its_length, s.index_from, my_length)
-               else if s isa FlatBuffer then
-                       s.items.copy_to(target_string, its_length, 0, my_length)
+       redef fun +(o) do
+               var s = o.to_s
+               var slen = s.bytelen
+               var mlen = bytelen
+               var nlen = mlen + slen
+               var mits = items
+               var mifrom = first_byte
+               if s isa FlatText then
+                       var sits = s.items
+                       var sifrom = s.first_byte
+                       var ns = new NativeString(nlen + 1)
+                       mits.copy_to(ns, mlen, mifrom, 0)
+                       sits.copy_to(ns, slen, sifrom, mlen)
+                       return new FlatString.full(ns, nlen, 0, nlen - 1, length + o.length)
                else
-                       var curr_pos = my_length
-                       for i in [0..s.length[ do
-                               var c = s.chars[i]
-                               target_string[curr_pos] = c
-                               curr_pos += 1
-                       end
+                       abort
                end
-
-               target_string[total_length] = '\0'
-
-               return target_string.to_s_with_length(total_length)
        end
 
-       redef fun *(i)
-       do
-               assert i >= 0
-
-               var my_length = self.length
-
-               var final_length = my_length * i
-
-               var my_items = self.items
-
-               var target_string = new NativeString(final_length + 1)
-
-               target_string[final_length] = '\0'
-
-               var current_last = 0
-
-               for iteration in [1 .. i] do
-                       my_items.copy_to(target_string, my_length, 0, current_last)
-                       current_last += my_length
+       redef fun *(i) do
+               var mybtlen = bytelen
+               var new_bytelen = mybtlen * i
+               var mylen = length
+               var newlen = mylen * i
+               var ns = new NativeString(new_bytelen + 1)
+               ns[new_bytelen] = 0u8
+               var offset = 0
+               while i > 0 do
+                       items.copy_to(ns, bytelen, first_byte, offset)
+                       offset += mybtlen
+                       i -= 1
                end
-
-               return target_string.to_s_with_length(final_length)
+               return new FlatString.full(ns, new_bytelen, 0, new_bytelen - 1, newlen)
        end
 
+
        redef fun hash
        do
                if hash_cache == null then
                        # djb2 hash algorithm
                        var h = 5381
-                       var i = index_from
+                       var i = first_byte
 
                        var myitems = items
 
-                       while i <= index_to do
-                               h = h.lshift(5) + h + myitems[i].ascii
+                       while i <= last_byte do
+                               h = (h << 5) + h + myitems[i].to_i
                                i += 1
                        end
 
@@ -320,34 +338,91 @@ class FlatString
        redef fun substrings do return new FlatSubstringsIter(self)
 end
 
-private class FlatStringReverseIterator
+private class FlatStringCharReverseIterator
        super IndexedIterator[Char]
 
        var target: FlatString
 
+       var curr_pos: Int
+
+       init with_pos(tgt: FlatString, pos: Int)
+       do
+               init(tgt, pos)
+       end
+
+       redef fun is_ok do return curr_pos >= 0
+
+       redef fun item do return target[curr_pos]
+
+       redef fun next do curr_pos -= 1
+
+       redef fun index do return curr_pos
+
+end
+
+private class FlatStringCharIterator
+       super IndexedIterator[Char]
+
+       var target: FlatString
+
+       var max: Int
+
+       var curr_pos: Int
+
+       init with_pos(tgt: FlatString, pos: Int)
+       do
+               init(tgt, tgt.length - 1, pos)
+       end
+
+       redef fun is_ok do return curr_pos <= max
+
+       redef fun item do return target[curr_pos]
+
+       redef fun next do curr_pos += 1
+
+       redef fun index do return curr_pos
+
+end
+
+private class FlatStringCharView
+       super StringCharView
+
+       redef type SELFTYPE: FlatString
+
+       redef fun [](index) do return target[index]
+
+       redef fun iterator_from(start) do return new FlatStringCharIterator.with_pos(target, start)
+
+       redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator.with_pos(target, start)
+
+end
+
+private class FlatStringByteReverseIterator
+       super IndexedIterator[Byte]
+
+       var target: FlatString
+
        var target_items: NativeString
 
        var curr_pos: Int
 
        init with_pos(tgt: FlatString, pos: Int)
        do
-               target = tgt
-               target_items = tgt.items
-               curr_pos = pos + tgt.index_from
+               init(tgt, tgt.items, pos + tgt.first_byte)
        end
 
-       redef fun is_ok do return curr_pos >= target.index_from
+       redef fun is_ok do return curr_pos >= target.first_byte
 
        redef fun item do return target_items[curr_pos]
 
        redef fun next do curr_pos -= 1
 
-       redef fun index do return curr_pos - target.index_from
+       redef fun index do return curr_pos - target.first_byte
 
 end
 
-private class FlatStringIterator
-       super IndexedIterator[Char]
+private class FlatStringByteIterator
+       super IndexedIterator[Byte]
 
        var target: FlatString
 
@@ -357,39 +432,37 @@ private class FlatStringIterator
 
        init with_pos(tgt: FlatString, pos: Int)
        do
-               target = tgt
-               target_items = tgt.items
-               curr_pos = pos + target.index_from
+               init(tgt, tgt.items, pos + tgt.first_byte)
        end
 
-       redef fun is_ok do return curr_pos <= target.index_to
+       redef fun is_ok do return curr_pos <= target.last_byte
 
        redef fun item do return target_items[curr_pos]
 
        redef fun next do curr_pos += 1
 
-       redef fun index do return curr_pos - target.index_from
+       redef fun index do return curr_pos - target.first_byte
 
 end
 
-private class FlatStringCharView
-       super StringCharView
+private class FlatStringByteView
+       super StringByteView
 
        redef type SELFTYPE: FlatString
 
        redef fun [](index)
        do
-               # Check that the index (+ index_from) is not larger than indexTo
+               # Check that the index (+ first_byte) is not larger than last_byte
                # In other terms, if the index is valid
                assert index >= 0
                var target = self.target
-               assert (index + target.index_from) <= target.index_to
-               return target.items[index + target.index_from]
+               assert (index + target.first_byte) <= target.last_byte
+               return target.items[index + target.first_byte]
        end
 
-       redef fun iterator_from(start) do return new FlatStringIterator.with_pos(target, start)
+       redef fun iterator_from(start) do return new FlatStringByteIterator.with_pos(target, start)
 
-       redef fun reverse_iterator_from(start) do return new FlatStringReverseIterator.with_pos(target, start)
+       redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator.with_pos(target, start)
 
 end
 
@@ -406,7 +479,17 @@ class FlatBuffer
 
        redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
 
-       private var capacity: Int = 0
+       redef var bytes = new FlatBufferByteView(self) is lazy
+
+       redef var bytelen = 0
+
+       redef var length = 0
+
+       private var char_cache: Int = -1
+
+       private var byte_cache: Int = -1
+
+       private var capacity = 0
 
        redef fun fast_cstring do return items.fast_cstring(0)
 
@@ -418,41 +501,71 @@ class FlatBuffer
        # the Copy-On-Write flag `written` is set at true.
        private fun reset do
                var nns = new NativeString(capacity)
-               items.copy_to(nns, length, 0, 0)
+               items.copy_to(nns, bytelen, 0, 0)
                items = nns
                written = false
        end
 
-       redef fun [](index)
-       do
-               assert index >= 0
-               assert index  < length
-               return items[index]
+       # Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
+       #
+       # Internal only, does not modify bytelen or length, this is the caller's responsability
+       private fun rshift_bytes(from: Int, len: Int) do
+               var oit = items
+               var nit = items
+               if bytelen + len > capacity then
+                       capacity = capacity * 2 + 2
+                       nit = new NativeString(capacity)
+                       oit.copy_to(nit, 0, 0, from)
+               end
+               oit.copy_to(nit, bytelen - from, from, from + len)
+       end
+
+       # Shifts the content of the buffer by `len` bytes to the left, starting at `from`
+       #
+       # Internal only, does not modify bytelen or length, this is the caller's responsability
+       private fun lshift_bytes(from: Int, len: Int) do
+               items.copy_to(items, bytelen - from, from, from - len)
        end
 
        redef fun []=(index, item)
        do
+               assert index >= 0 and index <= length
+               if written then reset
                is_dirty = true
                if index == length then
-                       add(item)
+                       add item
                        return
                end
-               if written then reset
-               assert index >= 0 and index < length
-               items[index] = item
+               var ip = items.char_to_byte_index(index)
+               var c = items.char_at(ip)
+               var clen = c.u8char_len
+               var itemlen = item.u8char_len
+               var size_diff = itemlen - clen
+               if size_diff > 0 then
+                       rshift_bytes(ip + clen, size_diff)
+               else if size_diff < 0 then
+                       lshift_bytes(ip + clen, -size_diff)
+               end
+               bytelen += size_diff
+               bytepos += size_diff
+               items.set_char_at(ip, item)
        end
 
        redef fun add(c)
        do
+               if written then reset
                is_dirty = true
-               if capacity <= length then enlarge(length + 5)
-               items[length] = c
+               var clen = c.u8char_len
+               enlarge(bytelen + clen)
+               items.set_char_at(bytelen, c)
+               bytelen += clen
                length += 1
        end
 
        redef fun clear do
                is_dirty = true
                if written then reset
+               bytelen = 0
                length = 0
        end
 
@@ -467,7 +580,7 @@ class FlatBuffer
                # it does a copy of the current `Buffer`
                written = false
                var a = new NativeString(c+1)
-               if length > 0 then items.copy_to(a, length, 0, 0)
+               if bytelen > 0 then items.copy_to(a, bytelen, 0, 0)
                items = a
                capacity = c
        end
@@ -475,16 +588,16 @@ class FlatBuffer
        redef fun to_s
        do
                written = true
-               if length == 0 then items = new NativeString(1)
-               return new FlatString.with_infos(items, length, 0, length - 1)
+               if bytelen == 0 then items = new NativeString(1)
+               return new FlatString.with_infos(items, bytelen, 0, bytelen - 1)
        end
 
        redef fun to_cstring
        do
                if is_dirty then
-                       var new_native = new NativeString(length + 1)
-                       new_native[length] = '\0'
-                       if length > 0 then items.copy_to(new_native, length, 0, 0)
+                       var new_native = new NativeString(bytelen + 1)
+                       new_native[bytelen] = 0u8
+                       if length > 0 then items.copy_to(new_native, bytelen, 0, 0)
                        real_items = new_native
                        is_dirty = false
                end
@@ -501,61 +614,52 @@ class FlatBuffer
        #
        # If `items` is shared, `written` should be set to true after the creation
        # so that a modification will do a copy-on-write.
-       private init with_infos(items: NativeString, capacity, length: Int)
+       private init with_infos(items: NativeString, capacity, bytelen, length: Int)
        do
                self.items = items
-               self.length = length
                self.capacity = capacity
+               self.bytelen = bytelen
+               self.length = length
        end
 
        # Create a new string copied from `s`.
        init from(s: Text)
        do
-               capacity = s.length + 1
-               length = s.length
-               items = new NativeString(capacity)
-               if s isa FlatString then
-                       s.items.copy_to(items, length, s.index_from, 0)
-               else if s isa FlatBuffer then
-                       s.items.copy_to(items, length, 0, 0)
+               items = new NativeString(s.bytelen)
+               if s isa FlatText then
+                       items = s.items
                else
-                       var curr_pos = 0
-                       for i in [0..s.length[ do
-                               var c = s.chars[i]
-                               items[curr_pos] = c
-                               curr_pos += 1
-                       end
+                       for i in substrings do i.as(FlatString).items.copy_to(items, i.bytelen, 0, 0)
                end
+               bytelen = s.bytelen
+               length = s.length
+               capacity = s.bytelen
+               written = true
        end
 
        # Create a new empty string with a given capacity.
        init with_capacity(cap: Int)
        do
                assert cap >= 0
-               items = new NativeString(cap+1)
+               items = new NativeString(cap + 1)
                capacity = cap
-               length = 0
+               bytelen = 0
        end
 
        redef fun append(s)
        do
                if s.is_empty then return
                is_dirty = true
-               var sl = s.length
-               if capacity < length + sl then enlarge(length + sl)
-               if s isa FlatString then
-                       s.items.copy_to(items, sl, s.index_from, length)
-               else if s isa FlatBuffer then
-                       s.items.copy_to(items, sl, 0, length)
+               var sl = s.bytelen
+               enlarge(bytelen + sl)
+               if s isa FlatText then
+                       s.items.copy_to(items, sl, s.first_byte, bytelen)
                else
-                       var curr_pos = self.length
-                       for i in [0..s.length[ do
-                               var c = s.chars[i]
-                               items[curr_pos] = c
-                               curr_pos += 1
-                       end
+                       for i in s.substrings do append i
+                       return
                end
-               length += sl
+               bytelen += sl
+               length += s.length
        end
 
        # Copies the content of self in `dest`
@@ -571,15 +675,16 @@ class FlatBuffer
        redef fun substring(from, count)
        do
                assert count >= 0
-               count += from
                if from < 0 then from = 0
-               if count > length then count = length
-               if from < count then
-                       var len = count - from
-                       var r_items = new NativeString(len)
-                       items.copy_to(r_items, len, from, 0)
-                       var r = new FlatBuffer.with_infos(r_items, len, len)
-                       return r
+               if (from + count) > length then count = length - from
+               if count != 0 then
+                       var bytefrom = items.char_to_byte_index(from)
+                       var byteto = items.char_to_byte_index(count + from - 1)
+                       byteto += items.char_at(byteto).u8char_len - 1
+                       var byte_length = byteto - bytefrom + 1
+                       var r_items = new NativeString(byte_length)
+                       items.copy_to(r_items, byte_length, bytefrom, 0)
+                       return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
                else
                        return new Buffer
                end
@@ -588,22 +693,15 @@ class FlatBuffer
        redef fun reverse
        do
                written = false
-               var ns = new NativeString(capacity)
-               var si = length - 1
-               var ni = 0
-               var it = items
-               while si >= 0 do
-                       ns[ni] = it[si]
-                       ni += 1
-                       si -= 1
-               end
-               items = ns
+               var ns = new FlatBuffer.with_capacity(capacity)
+               for i in chars.reverse_iterator do ns.add i
+               items = ns.items
        end
 
        redef fun times(repeats)
        do
-               var x = new FlatString.with_infos(items, length, 0, length - 1)
-               for i in [1..repeats[ do
+               var x = new FlatString.with_infos(items, bytelen, 0, bytelen - 1)
+               for i in [1 .. repeats[ do
                        append(x)
                end
        end
@@ -611,28 +709,18 @@ class FlatBuffer
        redef fun upper
        do
                if written then reset
-               var it = items
-               var id = length - 1
-               while id >= 0 do
-                       it[id] = it[id].to_upper
-                       id -= 1
-               end
+               for i in [0 .. length[ do self[i] = self[i].to_upper
        end
 
        redef fun lower
        do
                if written then reset
-               var it = items
-               var id = length - 1
-               while id >= 0 do
-                       it[id] = it[id].to_lower
-                       id -= 1
-               end
+               for i in [0 .. length[ do self[i] = self[i].to_lower
        end
 end
 
-private class FlatBufferReverseIterator
-       super IndexedIterator[Char]
+private class FlatBufferByteReverseIterator
+       super IndexedIterator[Byte]
 
        var target: FlatBuffer
 
@@ -642,9 +730,7 @@ private class FlatBufferReverseIterator
 
        init with_pos(tgt: FlatBuffer, pos: Int)
        do
-               target = tgt
-               if tgt.length > 0 then target_items = tgt.items
-               curr_pos = pos
+               init(tgt, tgt.items, pos)
        end
 
        redef fun index do return curr_pos
@@ -657,12 +743,71 @@ private class FlatBufferReverseIterator
 
 end
 
+private class FlatBufferByteView
+       super BufferByteView
+
+       redef type SELFTYPE: FlatBuffer
+
+       redef fun [](index) do return target.items[index]
+
+       redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
+
+       redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
+
+end
+
+private class FlatBufferByteIterator
+       super IndexedIterator[Byte]
+
+       var target: FlatBuffer
+
+       var target_items: NativeString
+
+       var curr_pos: Int
+
+       init with_pos(tgt: FlatBuffer, pos: Int)
+       do
+               init(tgt, tgt.items, pos)
+       end
+
+       redef fun index do return curr_pos
+
+       redef fun is_ok do return curr_pos < target.bytelen
+
+       redef fun item do return target_items[curr_pos]
+
+       redef fun next do curr_pos += 1
+
+end
+
+private class FlatBufferCharReverseIterator
+       super IndexedIterator[Char]
+
+       var target: FlatBuffer
+
+       var curr_pos: Int
+
+       init with_pos(tgt: FlatBuffer, pos: Int)
+       do
+               init(tgt, pos)
+       end
+
+       redef fun index do return curr_pos
+
+       redef fun is_ok do return curr_pos >= 0
+
+       redef fun item do return target[curr_pos]
+
+       redef fun next do curr_pos -= 1
+
+end
+
 private class FlatBufferCharView
        super BufferCharView
 
        redef type SELFTYPE: FlatBuffer
 
-       redef fun [](index) do return target.items[index]
+       redef fun [](index) do return target[index]
 
        redef fun []=(index, item)
        do
@@ -671,7 +816,7 @@ private class FlatBufferCharView
                        add(item)
                        return
                end
-               target.items[index] = item
+               target[index] = item
        end
 
        redef fun push(c)
@@ -693,35 +838,34 @@ private class FlatBufferCharView
        do
                var s_length = s.length
                if target.capacity < s.length then enlarge(s_length + target.length)
+               for i in s do target.add i
        end
 
-       redef fun iterator_from(pos) do return new FlatBufferIterator.with_pos(target, pos)
+       redef fun iterator_from(pos) do return new FlatBufferCharIterator.with_pos(target, pos)
 
-       redef fun reverse_iterator_from(pos) do return new FlatBufferReverseIterator.with_pos(target, pos)
+       redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator.with_pos(target, pos)
 
 end
 
-private class FlatBufferIterator
+private class FlatBufferCharIterator
        super IndexedIterator[Char]
 
        var target: FlatBuffer
 
-       var target_items: NativeString
+       var max: Int
 
        var curr_pos: Int
 
        init with_pos(tgt: FlatBuffer, pos: Int)
        do
-               target = tgt
-               if tgt.length > 0 then target_items = tgt.items
-               curr_pos = pos
+               init(tgt, tgt.length - 1, pos)
        end
 
        redef fun index do return curr_pos
 
-       redef fun is_ok do return curr_pos < target.length
+       redef fun is_ok do return curr_pos <= max
 
-       redef fun item do return target_items[curr_pos]
+       redef fun item do return target[curr_pos]
 
        redef fun next do curr_pos += 1
 
@@ -748,10 +892,42 @@ redef class NativeString
                var new_self = new NativeString(length + 1)
                copy_to(new_self, length, 0, 0)
                var str = new FlatString.with_infos(new_self, length, 0, length - 1)
-               new_self[length] = '\0'
+               new_self[length] = 0u8
                str.real_items = new_self
                return str
        end
+
+       # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
+       #
+       # Very unsafe, make sure to have room for this char prior to calling this function.
+       private fun set_char_at(pos: Int, c: Char) do
+               var ln = c.u8char_len
+               native_set_char(pos, c, ln)
+       end
+
+       private fun native_set_char(pos: Int, c: Char, ln: Int) `{
+               char* dst = self + pos;
+               switch(ln){
+                       case 1:
+                               dst[0] = c;
+                               break;
+                       case 2:
+                               dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
+                               dst[1] = 0x80 | (c & 0x3F);
+                               break;
+                       case 3:
+                               dst[0] = 0xE0 | ((c & 0xF000) >> 12);
+                               dst[1] = 0x80 | ((c & 0xFC0) >> 6);
+                               dst[2] = 0x80 | (c & 0x3F);
+                               break;
+                       case 4:
+                               dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
+                               dst[1] = 0x80 | ((c & 0x3F000) >> 12);
+                               dst[2] = 0x80 | ((c & 0xFC0) >> 6);
+                               dst[3] = 0x80 | (c & 0x3F);
+                               break;
+               }
+       `}
 end
 
 redef class Int
@@ -774,9 +950,9 @@ redef class Int
 
                var nslen = int_to_s_len
                var ns = new NativeString(nslen + 1)
-               ns[nslen] = '\0'
+               ns[nslen] = 0u8
                native_int_to_s(ns, nslen + 1)
-               return ns.to_s_with_length(nslen)
+               return new FlatString.full(ns, nslen, 0, nslen - 1, nslen)
        end
 end
 
@@ -800,26 +976,26 @@ redef class Array[E]
                                continue
                        end
                        var tmp = itsi.to_s
-                       sl += tmp.length
+                       sl += tmp.bytelen
                        na[mypos] = tmp
                        i += 1
                        mypos += 1
                end
                var ns = new NativeString(sl + 1)
-               ns[sl] = '\0'
+               ns[sl] = 0u8
                i = 0
                var off = 0
                while i < mypos do
                        var tmp = na[i]
-                       var tpl = tmp.length
                        if tmp isa FlatString then
-                               tmp.items.copy_to(ns, tpl, tmp.index_from, off)
+                               var tpl = tmp.bytelen
+                               tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
                                off += tpl
                        else
                                for j in tmp.substrings do
                                        var s = j.as(FlatString)
-                                       var slen = s.length
-                                       s.items.copy_to(ns, slen, s.index_from, off)
+                                       var slen = s.bytelen
+                                       s.items.copy_to(ns, slen, s.first_byte, off)
                                        off += slen
                                end
                        end
@@ -838,25 +1014,25 @@ redef class NativeArray[E]
                var sl = 0
                var mypos = 0
                while i < l do
-                       sl += na[i].length
+                       sl += na[i].bytelen
                        i += 1
                        mypos += 1
                end
                var ns = new NativeString(sl + 1)
-               ns[sl] = '\0'
+               ns[sl] = 0u8
                i = 0
                var off = 0
                while i < mypos do
                        var tmp = na[i]
-                       var tpl = tmp.length
                        if tmp isa FlatString then
-                               tmp.items.copy_to(ns, tpl, tmp.index_from, off)
+                               var tpl = tmp.bytelen
+                               tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
                                off += tpl
                        else
                                for j in tmp.substrings do
                                        var s = j.as(FlatString)
-                                       var slen = s.length
-                                       s.items.copy_to(ns, slen, s.index_from, off)
+                                       var slen = s.bytelen
+                                       s.items.copy_to(ns, slen, s.first_byte, off)
                                        off += slen
                                end
                        end