redef class Byte
# Gives the length of the UTF-8 char starting with `self`
- private fun u8len: Int do
+ fun u8len: Int do
if self & 0b1000_0000u8 == 0u8 then
return 1
else if self & 0b1110_0000u8 == 0b1100_0000u8 then
end
end
+redef class Int
+ # Returns the code_point from a utf16 surrogate pair
+ #
+ # assert 0xD83DDE02.from_utf16_surr == 0x1F602
+ fun from_utf16_surr: Int do
+ var hi = (self & 0xFFFF0000) >> 16
+ var lo = self & 0xFFFF
+ var cp = 0
+ cp += (hi - 0xD800) << 10
+ cp += lo - 0xDC00
+ cp += 0x10000
+ return cp
+ end
+end
+
# Native strings are simple C char *
extern class NativeString `{ char* `}
# Creates a new NativeString with a capacity of `length`
return ns_i
end
- # Gets the byte index of char at position `n` in UTF-8 String
+ # Gets the char index of byte at position `n` in a UTF-8 String
#
# `char_from` and `byte_from` are cached values to seek from.
#
if length_of_char_at(stpos) >= (endpos - stpos + 1) then return pos
return endpos
end
+
+ # Number of UTF-8 characters in `self` between positions `from` and `to`
+ fun utf8_length(from, to: Int): Int do
+ var st = from
+ var lst = to
+ var ln = 0
+ while st <= lst do
+ st += length_of_char_at(st)
+ ln += 1
+ end
+ return ln
+ end
end