#include <libkern/OSByteOrder.h>
#define be32toh(x) OSSwapBigToHostInt32(x)
#endif
+#ifdef _WIN32
+ #define be32toh(val) _byteswap_ulong(val)
+#endif
#ifdef __pnacl__
#define be16toh(val) (((val) >> 8) | ((val) << 8))
return 1
end
end
+
+ # Is `self` a valid UTF-8 sequence start ?
+ #
+ # ~~~nit
+ # assert 0u8.is_valid_utf8_start
+ # assert 0xC0u8.is_valid_utf8_start
+ # assert 0xE0u8.is_valid_utf8_start
+ # assert 0xF0u8.is_valid_utf8_start
+ # ~~~
+ fun is_valid_utf8_start: Bool do
+ if self & 0x80u8 == 0u8 then return true
+ if self & 0b1110_0000u8 == 0b1100_0000u8 then return true
+ if self & 0b1111_0000u8 == 0b1110_0000u8 then return true
+ if self & 0b1111_1000u8 == 0b1111_0000u8 then return true
+ return false
+ end
end
redef class Int
end
# Native strings are simple C char *
-extern class NativeString `{ char* `}
- # Creates a new NativeString with a capacity of `length`
+extern class CString `{ char* `}
+ # Creates a new CString with a capacity of `length`
new(length: Int) is intern
# Returns a char* starting at `index`.
#
# WARNING: Unsafe for extern code, use only for temporary
# pointer manipulation purposes (e.g. write to file or such)
- fun fast_cstring(index: Int): NativeString is intern
+ fun fast_cstring(index: Int): CString is intern
# Get char at `index`.
fun [](index: Int): Byte is intern
fun []=(index: Int, item: Byte) is intern
# Copy `self` to `dest`.
- fun copy_to(dest: NativeString, length: Int, from: Int, to: Int) is intern
+ fun copy_to(dest: CString, length: Int, from: Int, to: Int) is intern
+
+ redef fun ==(o) is intern do return is_same_instance(o)
+
+ redef fun !=(o) is intern do return not is_same_instance(o)
# Position of the first nul character.
fun cstring_length: Int
fun find_beginning_of_char_at(pos: Int): Int do
var endpos = pos
var c = self[pos]
+ if c & 0x80u8 == 0x00u8 then return pos
while c & 0xC0u8 == 0x80u8 do
pos -= 1
c = self[pos]
return endpos
end
- # Number of UTF-8 characters in `self` between positions `from` and `to`
- fun utf8_length(from, to: Int): Int do
+ # Number of UTF-8 characters in `self` starting at `from`, for a length of `byte_length`
+ fun utf8_length(from, byte_length: Int): Int is intern do
var st = from
- var lst = to
var ln = 0
- while st <= lst do
- st += length_of_char_at(st)
+ while byte_length > 0 do
+ while byte_length >= 4 do
+ var i = fetch_4_chars(st)
+ if i & 0x80808080 != 0 then break
+ byte_length -= 4
+ st += 4
+ ln += 4
+ end
+ if byte_length == 0 then break
+ var cln = length_of_char_at(st)
+ st += cln
ln += 1
+ byte_length -= cln
end
return ln
end
# Fetch 4 chars in `self` at `pos`
- fun fetch_4_chars(pos: Int): Int is intern do return fetch_4_ffi(pos)
+ fun fetch_4_chars(pos: Int): Int is intern `{ return (long)*((uint32_t*)(self+pos)); `}
# Fetch 4 chars in `self` at `pos`
- fun fetch_4_hchars(pos: Int): Int is intern do return fetch_4h_ffi(pos)
+ fun fetch_4_hchars(pos: Int): Int is intern `{ return (long)be32toh(*((uint32_t*)(self+pos))); `}
+
- # FIXME: To remove when bootstrap supports PR #1898
- private fun fetch_4_ffi(pos: Int): Int `{ return (long)*((uint32_t*)(self+pos)); `}
- private fun fetch_4h_ffi(pos: Int): Int `{ return (long)be32toh(*((uint32_t*)(self+pos))); `}
+ # Right shifts `len` bytes of `self` from `sh` bytes starting at position `pos`
+ fun rshift(sh, len, pos: Int) do
+ copy_to(self, len, pos, pos + sh)
+ end
+
+ # Left shifts `len` bytes of `self` from `sh` bytes starting at position `pos`
+ fun lshift(sh, len, pos: Int) do
+ copy_to(self, len, pos, pos - sh)
+ end
end