# Index of the character `index` in `_items`
fun char_to_byte_index(index: Int): Int do
- var ln = length
- assert index >= 0
- assert index < ln
+ var dpos = index - _position
+ var b = _bytepos
+
+ if dpos == 0 then return b
+ if dpos == 1 then
+ b += _items.length_of_char_at(b)
+ _bytepos = b
+ _position = index
+ return b
+ end
+ if dpos == -1 then
+ b = _items.find_beginning_of_char_at(b - 1)
+ _bytepos = b
+ _position = index
+ return b
+ end
+ var ln = _length
var pos = _position
# Find best insertion point
var delta_begin = index
var ns_i: Int
var my_i: Int
- if min == delta_begin then
- ns_i = first_byte
- my_i = 0
- else if min == delta_cache then
+ if min == delta_cache then
ns_i = _bytepos
my_i = pos
+ else if min == delta_begin then
+ ns_i = first_byte
+ my_i = 0
else
ns_i = its.find_beginning_of_char_at(last_byte)
- my_i = length - 1
+ my_i = _length - 1
end
ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
return nns.to_s_unsafe(nlen)
end
- redef fun [](index) do return _items.char_at(char_to_byte_index(index))
+ redef fun [](index) do
+ assert index >= 0 and index < _length
+ return fetch_char_at(index)
+ end
+
+ # Gets a `Char` at `index` in `self`
+ #
+ # WARNING: Use at your own risks as no bound-checking is done
+ fun fetch_char_at(index: Int): Char do
+ var i = char_to_byte_index(index)
+ var items = _items
+ var b = items[i]
+ if b & 0x80u8 == 0x00u8 then return b.ascii
+ return items.char_at(i)
+ end
# If `self` contains only digits and alpha <= 'f', return the corresponding integer.
#
return new_items
end
- redef fun reversed
- do
+ redef fun reversed do
var b = new FlatBuffer.with_capacity(_bytelen + 1)
- for i in [0 .. _length[.step(-1) do
- b.add self[i]
+ var i = _length - 1
+ while i >= 0 do
+ b.add self.fetch_char_at(i)
+ i -= 1
end
var s = b.to_s.as(FlatString)
s._length = self._length
do
# Check that the index (+ _first_byte) is not larger than last_byte
# In other terms, if the index is valid
- assert index >= 0
- var target = self.target
+ var target = _target
+ assert index >= 0 and index < target._bytelen
var ind = index + target._first_byte
- assert ind <= target.last_byte
return target._items[ind]
end
lshift_bytes(ip + clen, -size_diff)
end
_bytelen += size_diff
- bytepos += size_diff
it.set_char_at(ip, item)
end
# ~~~raw
# assert "かきく".as(FlatString).items.char_at(1) == '�'
# ~~~
- fun char_at(pos: Int): Char `{
- char c = self[pos];
- if((c & 0x80) == 0x00) return (uint32_t)c;
- if(((c & 0xE0) == 0xC0) && ((self[pos + 1] & 0xC0) == 0x80)) return ((((uint32_t)c) & 0x1F) << 6) + ((((uint32_t)self[pos + 1] & 0x3F)));
- if(((c & 0xF0) == 0xE0) && ((self[pos + 1] & 0xC0) == 0x80) && ((self[pos + 2] & 0xC0) == 0x80)) return ((((uint32_t)c) & 0xF) << 12) + ((((uint32_t)self[pos + 1]) & 0x3F) << 6) + ((((uint32_t)self[pos + 2] & 0x3F)));
- if(((c & 0xF8) == 0xF0) && ((self[pos + 1] & 0xC0) == 0x80) && ((self[pos + 2] & 0xC0) == 0x80) && ((self[pos + 3] & 0xC0) == 0x80)) return ((((uint32_t)c) & 0x7) << 18) + ((((uint32_t)self[pos + 1]) & 0x3F) << 12) + ((((uint32_t)self[pos + 2]) & 0x3F) << 6) + ((((uint32_t)self[pos + 3] & 0x3F)));
- return 0xFFFD;
- `}
+ fun char_at(pos: Int): Char do
+ var c = self[pos]
+ if c & 0x80u8 == 0u8 then return c.ascii
+ var b = fetch_4_hchars(pos)
+ var ret = 0
+ if b & 0xC00000 != 0x800000 then return 0xFFFD.code_point
+ if b & 0xE0000000 == 0xC0000000 then
+ ret |= (b & 0x1F000000) >> 18
+ ret |= (b & 0x3F0000) >> 16
+ return ret.code_point
+ end
+ if not b & 0xC000 == 0x8000 then return 0xFFFD.code_point
+ if b & 0xF0000000 == 0xE0000000 then
+ ret |= (b & 0xF000000) >> 12
+ ret |= (b & 0x3F0000) >> 10
+ ret |= (b & 0x3F00) >> 8
+ return ret.code_point
+ end
+ if not b & 0xC0 == 0x80 then return 0xFFFD.code_point
+ if b & 0xF8000000 == 0xF0000000 then
+ ret |= (b.to_i & 0x7000000) >> 6
+ ret |= (b.to_i & 0x3F0000) >> 4
+ ret |= (b.to_i & 0x3F00) >> 2
+ ret |= b.to_i & 0x3F
+ return ret.code_point
+ end
+ return 0xFFFD.code_point
+ end
# Gets the byte index of char at position `n` in UTF-8 String
fun char_to_byte_index(n: Int): Int do return char_to_byte_index_cached(n, 0, 0)
var ns_i = byte_from
var my_i = char_from
- while my_i < n do
+ var dist = n - my_i
+
+ while dist > 0 do
+ while dist >= 4 do
+ var i = fetch_4_chars(ns_i)
+ if i & 0x80808080 != 0 then break
+ ns_i += 4
+ my_i += 4
+ dist -= 4
+ end
+ if dist == 0 then break
ns_i += length_of_char_at(ns_i)
my_i += 1
+ dist -= 1
end
- while my_i > n do
+ while dist < 0 do
+ while dist <= -4 do
+ var i = fetch_4_chars(ns_i - 4)
+ if i & 0x80808080 != 0 then break
+ ns_i -= 4
+ my_i -= 4
+ dist += 4
+ end
+ if dist == 0 then break
ns_i = find_beginning_of_char_at(ns_i - 1)
my_i -= 1
+ dist += 1
end
return ns_i