return ns_i
end
+ # Gets the char index of byte at position `n` in a UTF-8 String
+ #
+ # `char_from` and `byte_from` are cached values to seek from.
+ #
+ # NOTE: char_from and byte_from are not guaranteed to be valid cache values
+ # It it up to the client to ensure the validity of the information
+ fun byte_to_char_index_cached(n, char_from, byte_from: Int): Int do
+ var ns_i = byte_from
+ var my_i = char_from
+
+ while ns_i < n do
+ ns_i += length_of_char_at(ns_i)
+ my_i += 1
+ end
+
+ while ns_i > n do
+ ns_i = find_beginning_of_char_at(ns_i - 1)
+ my_i -= 1
+ end
+
+ return my_i
+ end
+
# Returns the beginning position of the char at position `pos`
#
# If the char is invalid UTF-8, `pos` is returned as-is
if length_of_char_at(stpos) >= (endpos - stpos + 1) then return pos
return endpos
end
+
+ # Number of UTF-8 characters in `self` between positions `from` and `to`
+ fun utf8_length(from, to: Int): Int do
+ var st = from
+ var lst = to
+ var ln = 0
+ while st <= lst do
+ st += length_of_char_at(st)
+ ln += 1
+ end
+ return ln
+ end
end