lib/standard/text: Added methods for bytes to char position translation
authorLucas Bajolet <r4pass@hotmail.com>
Thu, 27 Aug 2015 18:09:33 +0000 (14:09 -0400)
committerLucas Bajolet <r4pass@hotmail.com>
Thu, 27 Aug 2015 18:09:33 +0000 (14:09 -0400)
Signed-off-by: Lucas Bajolet <r4pass@hotmail.com>

lib/standard/text/flat.nit
lib/standard/text/native.nit

index 917b0e5..75cf729 100644 (file)
@@ -84,6 +84,43 @@ redef class FlatText
                return ns_i
        end
 
+       private fun byte_to_char_index(index: Int): Int do
+               var ln = bytelen
+               assert index >= 0
+               assert index < bytelen
+
+               # Find best insertion point
+               var delta_begin = index
+               var delta_end = (ln - 1) - index
+               var delta_cache = (bytepos - index).abs
+               var min = delta_begin
+               var its = items
+
+               if delta_cache < min then min = delta_cache
+               if delta_end < min then min = delta_end
+
+               var ns_i: Int
+               var my_i: Int
+
+               if min == delta_begin then
+                       ns_i = first_byte
+                       my_i = 0
+               else if min == delta_cache then
+                       ns_i = bytepos
+                       my_i = position
+               else
+                       ns_i = its.find_beginning_of_char_at(last_byte)
+                       my_i = length - 1
+               end
+
+               my_i = its.byte_to_char_index_cached(index, my_i, ns_i)
+
+               position = my_i
+               bytepos = index
+
+               return my_i
+       end
+
        redef fun [](index) do return items.char_at(char_to_byte_index(index))
 end
 
index 170b196..11c8d34 100644 (file)
@@ -130,6 +130,29 @@ extern class NativeString `{ char* `}
                return ns_i
        end
 
+       # Gets the byte index of char at position `n` in UTF-8 String
+       #
+       # `char_from` and `byte_from` are cached values to seek from.
+       #
+       # NOTE: char_from and byte_from are not guaranteed to be valid cache values
+       # It it up to the client to ensure the validity of the information
+       fun byte_to_char_index_cached(n, char_from, byte_from: Int): Int do
+               var ns_i = byte_from
+               var my_i = char_from
+
+               while ns_i < n do
+                       ns_i += length_of_char_at(ns_i)
+                       my_i += 1
+               end
+
+               while ns_i > n do
+                       ns_i = find_beginning_of_char_at(ns_i - 1)
+                       my_i -= 1
+               end
+
+               return my_i
+       end
+
        # Returns the beginning position of the char at position `pos`
        #
        # If the char is invalid UTF-8, `pos` is returned as-is