lib/core: Renamed `Text::bytelen` to `Text::byte_length`
[nit.git] / lib / core / text / native.nit
index 1c9e262..ebb5661 100644 (file)
@@ -47,6 +47,22 @@ redef class Byte
                        return 1
                end
        end
+
+       # Is `self` a valid UTF-8 sequence start ?
+       #
+       # ~~~nit
+       # assert 0u8.is_valid_utf8_start
+       # assert 0xC0u8.is_valid_utf8_start
+       # assert 0xE0u8.is_valid_utf8_start
+       # assert 0xF0u8.is_valid_utf8_start
+       # ~~~
+       fun is_valid_utf8_start: Bool do
+               if self & 0x80u8 == 0u8 then return true
+               if self & 0b1110_0000u8 == 0b1100_0000u8 then return true
+               if self & 0b1111_0000u8 == 0b1110_0000u8 then return true
+               if self & 0b1111_1000u8 == 0b1111_0000u8 then return true
+               return false
+       end
 end
 
 redef class Int
@@ -252,34 +268,41 @@ extern class NativeString `{ char* `}
                return endpos
        end
 
-       # Number of UTF-8 characters in `self` starting at `from`, for a length of `bytelen`
-       fun utf8_length(from, bytelen: Int): Int do
+       # Number of UTF-8 characters in `self` starting at `from`, for a length of `byte_length`
+       fun utf8_length(from, byte_length: Int): Int is intern do
                var st = from
                var ln = 0
-               while bytelen > 0 do
-                       while bytelen >= 4 do
+               while byte_length > 0 do
+                       while byte_length >= 4 do
                                var i = fetch_4_chars(st)
                                if i & 0x80808080 != 0 then break
-                               bytelen -= 4
+                               byte_length -= 4
                                st += 4
                                ln += 4
                        end
-                       if bytelen == 0 then break
+                       if byte_length == 0 then break
                        var cln = length_of_char_at(st)
                        st += cln
                        ln += 1
-                       bytelen -= cln
+                       byte_length -= cln
                end
                return ln
        end
 
        # Fetch 4 chars in `self` at `pos`
-       fun fetch_4_chars(pos: Int): Int is intern do return fetch_4_ffi(pos)
+       fun fetch_4_chars(pos: Int): Int is intern `{ return (long)*((uint32_t*)(self+pos)); `}
 
        # Fetch 4 chars in `self` at `pos`
-       fun fetch_4_hchars(pos: Int): Int is intern do return fetch_4h_ffi(pos)
+       fun fetch_4_hchars(pos: Int): Int is intern `{ return (long)be32toh(*((uint32_t*)(self+pos))); `}
 
-       # FIXME: To remove when bootstrap supports PR #1898
-       private fun fetch_4_ffi(pos: Int): Int `{ return (long)*((uint32_t*)(self+pos)); `}
-       private fun fetch_4h_ffi(pos: Int): Int `{ return (long)be32toh(*((uint32_t*)(self+pos))); `}
+
+       # Right shifts `len` bytes of `self` from `sh` bytes starting at position `pos`
+       fun rshift(sh, len, pos: Int) do
+               copy_to(self, len, pos, pos + sh)
+       end
+
+       # Left shifts `len` bytes of `self` from `sh` bytes starting at position `pos`
+       fun lshift(sh, len, pos: Int) do
+               copy_to(self, len, pos, pos - sh)
+       end
 end