redef class FlatText
- private fun first_byte: Int do return 0
+ # First byte of the CString
+ protected fun first_byte: Int do return 0
- private fun last_byte: Int do return bytelen - 1
+ # Last byte of the CString
+ protected fun last_byte: Int do return first_byte + _byte_length - 1
# Cache of the latest position (char) explored in the string
var position: Int = 0
- # Cached position (bytes) in the NativeString underlying the String
- var bytepos: Int = first_byte is lateinit
+ # Cached position (bytes) in the CString underlying the String
+ var bytepos: Int = 0
- # Index of the character `index` in `items`
- private fun char_to_byte_index(index: Int): Int do
- var ln = length
- assert index >= 0
- assert index < ln
+ # Index of the character `index` in `_items`
+ fun char_to_byte_index(index: Int): Int do
+ var dpos = index - _position
+ var b = _bytepos
+ var its = _items
+
+ if dpos == 1 then
+ if its[b] & 0x80u8 == 0x00u8 then
+ b += 1
+ else
+ b += its.length_of_char_at(b)
+ end
+ _bytepos = b
+ _position = index
+ return b
+ end
+ if dpos == -1 then
+ b = its.find_beginning_of_char_at(b - 1)
+ _bytepos = b
+ _position = index
+ return b
+ end
+ if dpos == 0 then return b
+ var ln = _length
+ var pos = _position
# Find best insertion point
var delta_begin = index
var delta_end = (ln - 1) - index
- var delta_cache = (position - index).abs
+ var delta_cache = (pos - index).abs
var min = delta_begin
- var its = items
if delta_cache < min then min = delta_cache
if delta_end < min then min = delta_end
var ns_i: Int
var my_i: Int
- if min == delta_begin then
+ if min == delta_cache then
+ ns_i = _bytepos
+ my_i = pos
+ else if min == delta_begin then
ns_i = first_byte
my_i = 0
- else if min == delta_cache then
- ns_i = bytepos
- my_i = position
else
ns_i = its.find_beginning_of_char_at(last_byte)
- my_i = length - 1
+ my_i = _length - 1
end
ns_i = its.char_to_byte_index_cached(index, my_i, ns_i)
- position = index
- bytepos = ns_i
+ _position = index
+ _bytepos = ns_i
return ns_i
end
- private fun byte_to_char_index(index: Int): Int do
- var ln = bytelen
- assert index >= 0
- assert index < bytelen
+ # By escaping `self` to HTML, how many more bytes will be needed ?
+ fun chars_to_html_escape: Int do
+ var its = _items
+ var max = last_byte
+ var pos = first_byte
+ var endlen = 0
+ while pos <= max do
+ var c = its[pos]
+ if c == 0x3Cu8 then
+ endlen += 3
+ else if c == 0x3Eu8 then
+ endlen += 3
+ else if c == 0x26u8 then
+ endlen += 4
+ else if c == 0x22u8 then
+ endlen += 4
+ else if c == 0x27u8 then
+ endlen += 4
+ else if c == 0x2Fu8 then
+ endlen += 4
+ end
+ pos += 1
+ end
+ return endlen
+ end
- # Find best insertion point
- var delta_begin = index
- var delta_end = (ln - 1) - index
- var delta_cache = (bytepos - index).abs
- var min = delta_begin
- var its = items
+ redef fun html_escape
+ do
+ var extra = chars_to_html_escape
+ if extra == 0 then return to_s
+ var its = _items
+ var max = last_byte
+ var pos = first_byte
+ var nlen = extra + _byte_length
+ var nits = new CString(nlen)
+ var outpos = 0
+ while pos <= max do
+ var c = its[pos]
+ # Special codes:
+ # Some HTML characters are used as meta-data, they need
+ # to be replaced by an HTML-Escaped equivalent
+ #
+ # * 0x3C (<) => <
+ # * 0x3E (>) => >
+ # * 0x26 (&) => &
+ # * 0x22 (") => "
+ # * 0x27 (') => '
+ # * 0x2F (/) => /
+ if c == 0x3Cu8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x6Cu8
+ nits[outpos + 2] = 0x74u8
+ nits[outpos + 3] = 0x3Bu8
+ outpos += 4
+ else if c == 0x3Eu8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x67u8
+ nits[outpos + 2] = 0x74u8
+ nits[outpos + 3] = 0x3Bu8
+ outpos += 4
+ else if c == 0x26u8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x61u8
+ nits[outpos + 2] = 0x6Du8
+ nits[outpos + 3] = 0x70u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else if c == 0x22u8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x23u8
+ nits[outpos + 2] = 0x33u8
+ nits[outpos + 3] = 0x34u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else if c == 0x27u8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x23u8
+ nits[outpos + 2] = 0x33u8
+ nits[outpos + 3] = 0x39u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else if c == 0x2Fu8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x23u8
+ nits[outpos + 2] = 0x34u8
+ nits[outpos + 3] = 0x37u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else
+ nits[outpos] = c
+ outpos += 1
+ end
+ pos += 1
+ end
+ var s = new FlatString.with_infos(nits, nlen, 0)
+ return s
+ end
- if delta_cache < min then min = delta_cache
- if delta_end < min then min = delta_end
+ # By escaping `self` to C, how many more bytes will be needed ?
+ #
+ # This enables a double-optimization in `escape_to_c` since if this
+ # method returns 0, then `self` does not need escaping and can be
+ # returned as-is
+ fun chars_to_escape_to_c: Int do
+ var its = _items
+ var max = last_byte
+ var pos = first_byte
+ var req_esc = 0
+ while pos <= max do
+ var c = its[pos]
+ if c == 0x0Au8 then
+ req_esc += 1
+ else if c == 0x09u8 then
+ req_esc += 1
+ else if c == 0x22u8 then
+ req_esc += 1
+ else if c == 0x27u8 then
+ req_esc += 1
+ else if c == 0x5Cu8 then
+ req_esc += 1
+ else if c == 0x3Fu8 then
+ var j = pos + 1
+ if j < length then
+ var next = its[j]
+ # We ignore `??'` because it will be escaped as `??\'`.
+ if
+ next == 0x21u8 or
+ next == 0x28u8 or
+ next == 0x29u8 or
+ next == 0x2Du8 or
+ next == 0x2Fu8 or
+ next == 0x3Cu8 or
+ next == 0x3Du8 or
+ next == 0x3Eu8
+ then req_esc += 1
+ end
+ else if c < 32u8 then
+ req_esc += 3
+ end
+ pos += 1
+ end
+ return req_esc
+ end
- var ns_i: Int
- var my_i: Int
+ redef fun escape_to_c do
+ var ln_extra = chars_to_escape_to_c
+ if ln_extra == 0 then return self.to_s
+ var its = _items
+ var max = last_byte
+ var nlen = _byte_length + ln_extra
+ var nns = new CString(nlen)
+ var pos = first_byte
+ var opos = 0
+ while pos <= max do
+ var c = its[pos]
+ # Special codes:
+ #
+ # Any byte with value < 32 is a control character
+ # All their uses will be replaced by their octal
+ # value in C.
+ #
+ # There are two exceptions however:
+ #
+ # * 0x09 => \t
+ # * 0x0A => \n
+ #
+ # Aside from the code points above, the following are:
+ #
+ # * 0x22 => \"
+ # * 0x27 => \'
+ # * 0x5C => \\
+ if c == 0x09u8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x74u8
+ opos += 2
+ else if c == 0x0Au8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x6Eu8
+ opos += 2
+ else if c == 0x22u8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x22u8
+ opos += 2
+ else if c == 0x27u8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x27u8
+ opos += 2
+ else if c == 0x5Cu8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x5Cu8
+ opos += 2
+ else if c == 0x3Fu8 then
+ var j = pos + 1
+ if j < length then
+ var next = its[j]
+ # We ignore `??'` because it will be escaped as `??\'`.
+ if
+ next == 0x21u8 or
+ next == 0x28u8 or
+ next == 0x29u8 or
+ next == 0x2Du8 or
+ next == 0x2Fu8 or
+ next == 0x3Cu8 or
+ next == 0x3Du8 or
+ next == 0x3Eu8
+ then
+ nns[opos] = 0x5Cu8
+ opos += 1
+ end
+ end
+ nns[opos] = 0x3Fu8
+ opos += 1
+ else if c < 32u8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x30u8
+ nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
+ nns[opos + 3] = (c & 0x07u8) + 0x30u8
+ opos += 4
+ else
+ nns[opos] = c
+ opos += 1
+ end
+ pos += 1
+ end
+ return nns.to_s_unsafe(nlen)
+ end
- if min == delta_begin then
- ns_i = first_byte
- my_i = 0
- else if min == delta_cache then
- ns_i = bytepos
- my_i = position
- else
- ns_i = its.find_beginning_of_char_at(last_byte)
- my_i = length - 1
+ redef fun [](index) do
+ var len = _length
+
+ # Statistically:
+ # * ~70% want the next char
+ # * ~23% want the previous
+ # * ~7% want the same char
+ #
+ # So it makes sense to shortcut early. And early is here.
+ var dpos = index - _position
+ var b = _bytepos
+ if dpos == 1 and index < len - 1 then
+ var its = _items
+ var c = its[b]
+ if c & 0x80u8 == 0x00u8 then
+ # We want the next, and current is easy.
+ # So next is easy to find!
+ b += 1
+ _position = index
+ _bytepos = b
+ # The rest will be done by `dpos==0` bellow.
+ dpos = 0
+ end
+ else if dpos == -1 and index > 1 then
+ var its = _items
+ var c = its[b-1]
+ if c & 0x80u8 == 0x00u8 then
+ # We want the previous, and it is easy.
+ b -= 1
+ dpos = 0
+ _position = index
+ _bytepos = b
+ return c.ascii
+ end
+ end
+ if dpos == 0 then
+ # We know what we want (+0 or +1) just get it now!
+ var its = _items
+ var c = its[b]
+ if c & 0x80u8 == 0x00u8 then return c.ascii
+ return items.char_at(b)
end
- my_i = its.byte_to_char_index_cached(index, my_i, ns_i)
+ assert index >= 0 and index < len
+ return fetch_char_at(index)
+ end
- position = my_i
- bytepos = index
+ # Gets a `Char` at `index` in `self`
+ #
+ # WARNING: Use at your own risks as no bound-checking is done
+ fun fetch_char_at(index: Int): Char do
+ var i = char_to_byte_index(index)
+ var items = _items
+ var b = items[i]
+ if b & 0x80u8 == 0x00u8 then return b.ascii
+ return items.char_at(i)
+ end
- return my_i
+ # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
+ #
+ # assert "ff".to_hex == 255
+ redef fun to_hex(pos, ln) do
+ var res = 0
+ if pos == null then pos = 0
+ if ln == null then ln = length - pos
+ pos = char_to_byte_index(pos)
+ var its = _items
+ var max = pos + ln
+ for i in [pos .. max[ do
+ res <<= 4
+ res += its[i].ascii.from_hex
+ end
+ return res
end
- redef fun [](index) do return items.char_at(char_to_byte_index(index))
+ redef fun copy_to_native(dst, n, src_off, dst_off) do
+ _items.copy_to(dst, n, first_byte + src_off, dst_off)
+ end
end
# Immutable strings of characters.
-class FlatString
+abstract class FlatString
super FlatText
super String
- # Index at which `self` begins in `items`, inclusively
+ # Index at which `self` begins in `_items`, inclusively
redef var first_byte is noinit
- # Index at which `self` ends in `items`, inclusively
- redef var last_byte is noinit
-
- redef var chars = new FlatStringCharView(self) is lazy
+ redef fun chars do return new FlatStringCharView(self)
- redef var bytes = new FlatStringByteView(self) is lazy
+ redef fun bytes do return new FlatStringByteView(self)
- redef var length is lazy do
- if bytelen == 0 then return 0
- var st = first_byte
- var its = items
- var ln = 0
- var lst = last_byte
- while st <= lst do
- st += its.length_of_char_at(st)
- ln += 1
- end
- return ln
+ redef fun to_cstring do
+ var blen = _byte_length
+ var new_items = new CString(blen + 1)
+ _items.copy_to(new_items, blen, _first_byte, 0)
+ new_items[blen] = 0u8
+ return new_items
end
- redef fun reversed
- do
- var b = new FlatBuffer.with_capacity(bytelen + 1)
- for i in [length - 1 .. 0].step(-1) do
- b.add self[i]
+ redef fun reversed do
+ var b = new FlatBuffer.with_capacity(_byte_length + 1)
+ var i = _length - 1
+ while i >= 0 do
+ b.add self.fetch_char_at(i)
+ i -= 1
end
var s = b.to_s.as(FlatString)
- s.length = self.length
+ s._length = self._length
return s
end
- redef fun fast_cstring do return items.fast_cstring(first_byte)
+ redef fun fast_cstring do return _items.fast_cstring(_first_byte)
redef fun substring(from, count)
do
- assert count >= 0
+ if count <= 0 then return ""
if from < 0 then
count += from
- if count < 0 then count = 0
+ if count <= 0 then return ""
from = 0
end
- if (count + from) > length then count = length - from
+ var ln = _length
+ if (count + from) > ln then count = ln - from
if count <= 0 then return ""
var end_index = from + count - 1
+ return substring_impl(from, count, end_index)
+ end
+
+ private fun substring_impl(from, count, end_index: Int): String do
+ var cache = _position
+ var dfrom = (cache - from).abs
+ var dend = (end_index - from).abs
- var bytefrom = char_to_byte_index(from)
- var byteto = char_to_byte_index(end_index)
- byteto += items.length_of_char_at(byteto) - 1
+ var bytefrom: Int
+ var byteto: Int
+ if dfrom < dend then
+ bytefrom = char_to_byte_index(from)
+ byteto = char_to_byte_index(end_index)
+ else
+ byteto = char_to_byte_index(end_index)
+ bytefrom = char_to_byte_index(from)
+ end
+
+ var its = _items
+ byteto += its.length_of_char_at(byteto) - 1
- var s = new FlatString.full(items, byteto - bytefrom + 1, bytefrom, byteto, count)
+ var s = new FlatString.full(its, byteto - bytefrom + 1, bytefrom, count)
return s
end
redef fun to_upper
do
- var outstr = new FlatBuffer.with_capacity(self.bytelen + 1)
+ var outstr = new FlatBuffer.with_capacity(self._byte_length + 1)
- var mylen = length
+ var mylen = _length
var pos = 0
while pos < mylen do
redef fun to_lower
do
- var outstr = new FlatBuffer.with_capacity(self.bytelen + 1)
+ var outstr = new FlatBuffer.with_capacity(self._byte_length + 1)
- var mylen = length
+ var mylen = _length
var pos = 0
while pos < mylen do
# Low-level creation of a new string with minimal data.
#
- # `items` will be used as is, without copy, to retrieve the characters of the string.
+ # `_items` will be used as is, without copy, to retrieve the characters of the string.
# Aliasing issues is the responsibility of the caller.
- private init with_infos(items: NativeString, bytelen, from, to: Int)
+ private new with_infos(items: CString, byte_length, from: Int)
do
- self.items = items
- self.bytelen = bytelen
- first_byte = from
- last_byte = to
+ var len = items.utf8_length(from, byte_length)
+ if byte_length == len then return new ASCIIFlatString.full_data(items, byte_length, from, len)
+ return new UnicodeFlatString.full_data(items, byte_length, from, len)
end
# Low-level creation of a new string with all the data.
#
- # `items` will be used as is, without copy, to retrieve the characters of the string.
+ # `_items` will be used as is, without copy, to retrieve the characters of the string.
# Aliasing issues is the responsibility of the caller.
- private init full(items: NativeString, bytelen, from, to, length: Int)
+ private new full(items: CString, byte_length, from, length: Int)
do
- self.items = items
- self.length = length
- self.bytelen = bytelen
- first_byte = from
- last_byte = to
- end
-
- redef fun to_cstring do
- if real_items != null then return real_items.as(not null)
- var new_items = new NativeString(bytelen + 1)
- self.items.copy_to(new_items, bytelen, first_byte, 0)
- new_items[bytelen] = 0u8
- real_items = new_items
- return new_items
+ if byte_length == length then return new ASCIIFlatString.full_data(items, byte_length, from, length)
+ return new UnicodeFlatString.full_data(items, byte_length, from, length)
end
redef fun ==(other)
do
- if not other isa FlatString then return super
+ if not other isa FlatText then return super
if self.object_id == other.object_id then return true
- var my_length = bytelen
+ var my_length = _byte_length
- if other.bytelen != my_length then return false
+ if other._byte_length != my_length then return false
- var my_index = first_byte
+ var my_index = _first_byte
var its_index = other.first_byte
var last_iteration = my_index + my_length
- var itsitems = other.items
- var myitems = self.items
+ var its_items = other._items
+ var my_items = self._items
while my_index < last_iteration do
- if myitems[my_index] != itsitems[its_index] then return false
+ if my_items[my_index] != its_items[its_index] then return false
my_index += 1
its_index += 1
end
redef fun <(other)
do
- if not other isa FlatString then return super
+ if not other isa FlatText then return super
if self.object_id == other.object_id then return false
- var my_length = self.bytelen
- var its_length = other.bytelen
+ var myits = _items
+ var itsits = other._items
- var max = if my_length < its_length then my_length else its_length
+ var mbt = _byte_length
+ var obt = other.byte_length
- var myits = self.bytes
- var itsits = other.bytes
+ var minln = if mbt < obt then mbt else obt
+ var mst = _first_byte
+ var ost = other.first_byte
- for i in [0 .. max[ do
- var my_curr_char = myits[i]
- var its_curr_char = itsits[i]
+ for i in [0 .. minln[ do
+ var my_curr_char = myits[mst]
+ var its_curr_char = itsits[ost]
- if my_curr_char != its_curr_char then
- if my_curr_char < its_curr_char then return true
- return false
- end
+ if my_curr_char > its_curr_char then return false
+ if my_curr_char < its_curr_char then return true
+
+ mst += 1
+ ost += 1
end
- return my_length < its_length
+ return mbt < obt
end
redef fun +(o) do
var s = o.to_s
- var slen = s.bytelen
- var mlen = bytelen
+ var slen = s.byte_length
+ var mlen = _byte_length
var nlen = mlen + slen
- var mits = items
- var mifrom = first_byte
+ var mits = _items
+ var mifrom = _first_byte
if s isa FlatText then
- var sits = s.items
+ var sits = s._items
var sifrom = s.first_byte
- var ns = new NativeString(nlen + 1)
+ var ns = new CString(nlen + 1)
mits.copy_to(ns, mlen, mifrom, 0)
sits.copy_to(ns, slen, sifrom, mlen)
- return new FlatString.full(ns, nlen, 0, nlen - 1, length + o.length)
+ return new FlatString.full(ns, nlen, 0, _length + o.length)
else
abort
end
end
redef fun *(i) do
- var mybtlen = bytelen
- var new_bytelen = mybtlen * i
- var mylen = length
+ var mybtlen = _byte_length
+ var new_byte_length = mybtlen * i
+ var mylen = _length
var newlen = mylen * i
- var ns = new NativeString(new_bytelen + 1)
- ns[new_bytelen] = 0u8
+ var its = _items
+ var fb = _first_byte
+ var ns = new CString(new_byte_length + 1)
+ ns[new_byte_length] = 0u8
var offset = 0
while i > 0 do
- items.copy_to(ns, bytelen, first_byte, offset)
+ its.copy_to(ns, mybtlen, fb, offset)
offset += mybtlen
i -= 1
end
- return new FlatString.full(ns, new_bytelen, 0, new_bytelen - 1, newlen)
+ return new FlatString.full(ns, new_byte_length, 0, newlen)
end
-
redef fun hash
do
if hash_cache == null then
# djb2 hash algorithm
var h = 5381
- var i = first_byte
+ var i = _first_byte
- var myitems = items
+ var my_items = _items
+ var max = last_byte
- while i <= last_byte do
- h = (h << 5) + h + myitems[i].to_i
+ while i <= max do
+ h = (h << 5) + h + my_items[i].to_i
i += 1
end
redef fun substrings do return new FlatSubstringsIter(self)
end
+# Regular Nit UTF-8 strings
+private class UnicodeFlatString
+ super FlatString
+
+ init full_data(items: CString, byte_length, from, length: Int) do
+ self._items = items
+ self._length = length
+ self._byte_length = byte_length
+ _first_byte = from
+ _bytepos = from
+ end
+
+ redef fun substring_from(from) do
+ if from >= self._length then return empty
+ if from <= 0 then return self
+ var c = char_to_byte_index(from)
+ var st = c - _first_byte
+ var fln = byte_length - st
+ return new FlatString.full(items, fln, c, _length - from)
+ end
+end
+
+# Special cases of String where all the characters are ASCII-based
+#
+# Optimizes access operations to O(1) complexity.
+private class ASCIIFlatString
+ super FlatString
+
+ init full_data(items: CString, byte_length, from, length: Int) do
+ self._items = items
+ self._length = length
+ self._byte_length = byte_length
+ _first_byte = from
+ _bytepos = from
+ end
+
+ redef fun [](idx) do
+ assert idx < _byte_length and idx >= 0
+ return _items[idx + _first_byte].ascii
+ end
+
+ redef fun substring(from, count) do
+ var ln = _length
+ if count <= 0 then return ""
+ if (count + from) > ln then count = ln - from
+ if count <= 0 then return ""
+ if from < 0 then
+ count += from
+ if count <= 0 then return ""
+ from = 0
+ end
+ return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
+ end
+
+ redef fun reversed do
+ var b = new FlatBuffer.with_capacity(_byte_length + 1)
+ var i = _length - 1
+ while i >= 0 do
+ b.add self[i]
+ i -= 1
+ end
+ var s = b.to_s.as(FlatString)
+ return s
+ end
+
+ redef fun char_to_byte_index(index) do return index + _first_byte
+
+ redef fun substring_impl(from, count, end_index) do
+ return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
+ end
+
+ redef fun fetch_char_at(i) do return _items[i + _first_byte].ascii
+end
+
private class FlatStringCharReverseIterator
super IndexedIterator[Char]
var curr_pos: Int
- init with_pos(tgt: FlatString, pos: Int)
- do
- init(tgt, pos)
- end
-
redef fun is_ok do return curr_pos >= 0
redef fun item do return target[curr_pos]
var target: FlatString
- var max: Int
+ var max: Int is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatString, pos: Int)
- do
- init(tgt, tgt.length - 1, pos)
- end
+ init do max = target._length - 1
redef fun is_ok do return curr_pos <= max
redef fun [](index) do return target[index]
- redef fun iterator_from(start) do return new FlatStringCharIterator.with_pos(target, start)
+ redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
- redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator.with_pos(target, start)
+ redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
end
var target: FlatString
- var target_items: NativeString
+ var target_items: CString is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatString, pos: Int)
+ init
do
- init(tgt, tgt.items, pos + tgt.first_byte)
+ var tgt = target
+ target_items = tgt._items
+ curr_pos += tgt._first_byte
end
- redef fun is_ok do return curr_pos >= target.first_byte
+ redef fun is_ok do return curr_pos >= target._first_byte
redef fun item do return target_items[curr_pos]
redef fun next do curr_pos -= 1
- redef fun index do return curr_pos - target.first_byte
+ redef fun index do return curr_pos - target._first_byte
end
var target: FlatString
- var target_items: NativeString
+ var target_items: CString is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatString, pos: Int)
+ init
do
- init(tgt, tgt.items, pos + tgt.first_byte)
+ var tgt = target
+ target_items = tgt._items
+ curr_pos += tgt._first_byte
end
redef fun is_ok do return curr_pos <= target.last_byte
redef fun next do curr_pos += 1
- redef fun index do return curr_pos - target.first_byte
+ redef fun index do return curr_pos - target._first_byte
end
redef fun [](index)
do
- # Check that the index (+ first_byte) is not larger than last_byte
+ # Check that the index (+ _first_byte) is not larger than last_byte
# In other terms, if the index is valid
- assert index >= 0
- var target = self.target
- assert (index + target.first_byte) <= target.last_byte
- return target.items[index + target.first_byte]
+ var target = _target
+ assert index >= 0 and index < target._byte_length
+ var ind = index + target._first_byte
+ return target._items[ind]
end
- redef fun iterator_from(start) do return new FlatStringByteIterator.with_pos(target, start)
+ redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
- redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator.with_pos(target, start)
+ redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
end
super FlatText
super Buffer
- redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
+ redef fun chars do return new FlatBufferCharView(self)
- redef var bytes = new FlatBufferByteView(self) is lazy
-
- redef var bytelen = 0
-
- redef var length = 0
-
- private var char_cache: Int = -1
-
- private var byte_cache: Int = -1
+ redef fun bytes do return new FlatBufferByteView(self)
private var capacity = 0
- redef fun fast_cstring do return items.fast_cstring(0)
+ redef fun fast_cstring do return _items.fast_cstring(0)
redef fun substrings do return new FlatSubstringsIter(self)
- # Re-copies the `NativeString` into a new one and sets it as the new `Buffer`
+ # Re-copies the `CString` into a new one and sets it as the new `Buffer`
#
# This happens when an operation modifies the current `Buffer` and
# the Copy-On-Write flag `written` is set at true.
private fun reset do
- var nns = new NativeString(capacity)
- items.copy_to(nns, bytelen, 0, 0)
- items = nns
+ var nns = new CString(capacity)
+ if _byte_length != 0 then _items.copy_to(nns, _byte_length, 0, 0)
+ _items = nns
written = false
end
# Shifts the content of the buffer by `len` bytes to the right, starting at byte `from`
#
- # Internal only, does not modify bytelen or length, this is the caller's responsability
+ # Internal only, does not modify _byte_length or length, this is the caller's responsability
private fun rshift_bytes(from: Int, len: Int) do
- var oit = items
- var nit = items
- if bytelen + len > capacity then
+ var oit = _items
+ var nit = _items
+ var bt = _byte_length
+ if bt + len > capacity then
capacity = capacity * 2 + 2
- nit = new NativeString(capacity)
+ nit = new CString(capacity)
oit.copy_to(nit, 0, 0, from)
end
- oit.copy_to(nit, bytelen - from, from, from + len)
+ oit.copy_to(nit, bt - from, from, from + len)
end
# Shifts the content of the buffer by `len` bytes to the left, starting at `from`
#
- # Internal only, does not modify bytelen or length, this is the caller's responsability
+ # Internal only, does not modify _byte_length or length, this is the caller's responsability
private fun lshift_bytes(from: Int, len: Int) do
- items.copy_to(items, bytelen - from, from, from - len)
+ var it = _items
+ it.copy_to(it, _byte_length - from, from, from - len)
end
redef fun []=(index, item)
do
- assert index >= 0 and index <= length
+ assert index >= 0 and index <= _length
if written then reset
- is_dirty = true
- if index == length then
+ if index == _length then
add item
return
end
- var ip = items.char_to_byte_index(index)
- var c = items.char_at(ip)
+ var it = _items
+ var ip = it.char_to_byte_index(index)
+ var c = it.char_at(ip)
var clen = c.u8char_len
var itemlen = item.u8char_len
var size_diff = itemlen - clen
else if size_diff < 0 then
lshift_bytes(ip + clen, -size_diff)
end
- bytelen += size_diff
- bytepos += size_diff
- items.set_char_at(ip, item)
+ _byte_length += size_diff
+ it.set_char_at(ip, item)
+ end
+
+ redef fun insert(s, pos) do
+ assert pos >= 0 and pos <= length
+ if pos == length then
+ append s
+ return
+ end
+ var slen = s.byte_length
+ enlarge(byte_length + slen)
+ var it = _items
+ var shpos = it.char_to_byte_index(pos)
+ rshift_bytes(shpos, slen)
+ s.copy_to_native(it, slen, 0, shpos)
+ length += s.length
+ byte_length += slen
+ end
+
+ redef fun insert_char(c, pos) do
+ assert pos >= 0 and pos <= length
+ if pos == length then
+ add c
+ return
+ end
+ var clen = c.u8char_len
+ enlarge(byte_length + clen)
+ var it = _items
+ var shpos = it.char_to_byte_index(pos)
+ rshift_bytes(shpos, clen)
+ it.set_char_at(shpos, c)
+ length += 1
+ byte_length += clen
end
redef fun add(c)
do
if written then reset
- is_dirty = true
var clen = c.u8char_len
- enlarge(bytelen + clen)
- items.set_char_at(bytelen, c)
- bytelen += clen
- length += 1
+ var bt = _byte_length
+ enlarge(bt + clen)
+ _items.set_char_at(bt, c)
+ _byte_length += clen
+ _length += 1
end
redef fun clear do
- is_dirty = true
- if written then reset
- bytelen = 0
- length = 0
+ _byte_length = 0
+ _length = 0
+ if written then
+ _capacity = 16
+ reset
+ end
end
redef fun empty do return new Buffer
do
var c = capacity
if cap <= c then return
- while c <= cap do c = c * 2 + 2
+ if c <= 16 then c = 16
+ while c <= cap do c = c * 2
# The COW flag can be set at false here, since
# it does a copy of the current `Buffer`
written = false
- var a = new NativeString(c+1)
- if bytelen > 0 then items.copy_to(a, bytelen, 0, 0)
- items = a
+ var bln = _byte_length
+ var a = new CString(c)
+ if bln > 0 then
+ var it = _items
+ if bln > 0 then it.copy_to(a, bln, 0, 0)
+ end
+ _items = a
capacity = c
end
redef fun to_s
do
written = true
- if bytelen == 0 then items = new NativeString(1)
- return new FlatString.with_infos(items, bytelen, 0, bytelen - 1)
+ var bln = _byte_length
+ if bln == 0 then _items = new CString(1)
+ return new FlatString.full(_items, bln, 0, _length)
end
redef fun to_cstring
do
- if is_dirty then
- var new_native = new NativeString(bytelen + 1)
- new_native[bytelen] = 0u8
- if length > 0 then items.copy_to(new_native, bytelen, 0, 0)
- real_items = new_native
- is_dirty = false
- end
- return real_items.as(not null)
+ var bln = _byte_length
+ var new_native = new CString(bln + 1)
+ new_native[bln] = 0u8
+ if _length > 0 then _items.copy_to(new_native, bln, 0, 0)
+ return new_native
end
# Create a new empty string.
# Low-level creation a new buffer with given data.
#
- # `items` will be used as is, without copy, to store the characters of the buffer.
+ # `_items` will be used as is, without copy, to store the characters of the buffer.
# Aliasing issues is the responsibility of the caller.
#
- # If `items` is shared, `written` should be set to true after the creation
+ # If `_items` is shared, `written` should be set to true after the creation
# so that a modification will do a copy-on-write.
- private init with_infos(items: NativeString, capacity, bytelen, length: Int)
+ private init with_infos(items: CString, capacity, byte_length, length: Int)
do
- self.items = items
+ self._items = items
self.capacity = capacity
- self.bytelen = bytelen
- self.length = length
+ self._byte_length = byte_length
+ self._length = length
end
# Create a new string copied from `s`.
init from(s: Text)
do
- items = new NativeString(s.bytelen)
- if s isa FlatText then
- items = s.items
- else
- for i in substrings do i.as(FlatString).items.copy_to(items, i.bytelen, 0, 0)
- end
- bytelen = s.bytelen
- length = s.length
- capacity = s.bytelen
- written = true
+ _items = new CString(s.byte_length)
+ for i in s.substrings do i._items.copy_to(_items, i._byte_length, first_byte, 0)
+ _byte_length = s.byte_length
+ _length = s.length
+ _capacity = _byte_length
end
# Create a new empty string with a given capacity.
init with_capacity(cap: Int)
do
assert cap >= 0
- items = new NativeString(cap + 1)
+ _items = new CString(cap)
capacity = cap
- bytelen = 0
+ _byte_length = 0
end
redef fun append(s)
do
if s.is_empty then return
- is_dirty = true
- var sl = s.bytelen
- enlarge(bytelen + sl)
+ var sl = s.byte_length
+ var nln = _byte_length + sl
+ enlarge(nln)
if s isa FlatText then
- s.items.copy_to(items, sl, s.first_byte, bytelen)
+ s._items.copy_to(_items, sl, s.first_byte, _byte_length)
else
for i in s.substrings do append i
return
end
- bytelen += sl
- length += s.length
+ _byte_length = nln
+ _length += s.length
end
# Copies the content of self in `dest`
do
assert count >= 0
if from < 0 then from = 0
- if (from + count) > length then count = length - from
- if count != 0 then
- var bytefrom = items.char_to_byte_index(from)
- var byteto = items.char_to_byte_index(count + from - 1)
- byteto += items.char_at(byteto).u8char_len - 1
- var byte_length = byteto - bytefrom + 1
- var r_items = new NativeString(byte_length)
- items.copy_to(r_items, byte_length, bytefrom, 0)
- return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
- else
- return new Buffer
+ if (from + count) > _length then count = _length - from
+ if count <= 0 then return new Buffer
+ var its = _items
+ var bytefrom = its.char_to_byte_index(from)
+ var byteto = its.char_to_byte_index(count + from - 1)
+ byteto += its.char_at(byteto).u8char_len - 1
+ var byte_length = byteto - bytefrom + 1
+ var r_items = new CString(byte_length)
+ its.copy_to(r_items, byte_length, bytefrom, 0)
+ return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
+ end
+
+ redef fun append_substring_impl(s, from, length) do
+ if length <= 0 then return
+ if not s isa FlatText then
+ super
+ return
end
+ var sits = s._items
+ var bytest = s.char_to_byte_index(from)
+ var bytend = s.char_to_byte_index(from + length - 1)
+ var btln = bytend - bytest + sits.char_at(bytend).u8char_len
+ enlarge(btln + _byte_length)
+ sits.copy_to(_items, btln, bytest, _byte_length)
+ _byte_length += btln
+ _length += length
+ end
+
+ redef fun remove_at(p, len) do
+ if len == null then len = 1
+ if len == 0 then return
+ var its = _items
+ var bst = char_to_byte_index(p)
+ var bend = char_to_byte_index(p + len - 1)
+ bend += its.char_at(bend).u8char_len
+ var blen = bend - bst
+ lshift_bytes(bend, bend - bst)
+ byte_length -= blen
+ length -= len
end
redef fun reverse
written = false
var ns = new FlatBuffer.with_capacity(capacity)
for i in chars.reverse_iterator do ns.add i
- items = ns.items
+ _items = ns._items
end
redef fun times(repeats)
do
- var x = new FlatString.with_infos(items, bytelen, 0, bytelen - 1)
+ var bln = _byte_length
+ var x = new FlatString.full(_items, bln, 0, _length)
for i in [1 .. repeats[ do
append(x)
end
redef fun upper
do
if written then reset
- for i in [0 .. length[ do self[i] = self[i].to_upper
+ for i in [0 .. _length[ do self[i] = self[i].to_upper
end
redef fun lower
do
if written then reset
- for i in [0 .. length[ do self[i] = self[i].to_lower
+ for i in [0 .. _length[ do self[i] = self[i].to_lower
end
end
var target: FlatBuffer
- var target_items: NativeString
+ var target_items: CString is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatBuffer, pos: Int)
- do
- init(tgt, tgt.items, pos)
- end
+ init do target_items = target._items
redef fun index do return curr_pos
redef type SELFTYPE: FlatBuffer
- redef fun [](index) do return target.items[index]
+ redef fun [](index) do return target._items[index]
- redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
+ redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
- redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
+ redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
end
var target: FlatBuffer
- var target_items: NativeString
+ var target_items: CString is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatBuffer, pos: Int)
- do
- init(tgt, tgt.items, pos)
- end
+ init do target_items = target._items
redef fun index do return curr_pos
- redef fun is_ok do return curr_pos < target.bytelen
+ redef fun is_ok do return curr_pos < target._byte_length
redef fun item do return target_items[curr_pos]
var curr_pos: Int
- init with_pos(tgt: FlatBuffer, pos: Int)
- do
- init(tgt, pos)
- end
-
redef fun index do return curr_pos
redef fun is_ok do return curr_pos >= 0
redef fun append(s)
do
var s_length = s.length
- if target.capacity < s.length then enlarge(s_length + target.length)
+ if target.capacity < s.length then enlarge(s_length + target._length)
for i in s do target.add i
end
- redef fun iterator_from(pos) do return new FlatBufferCharIterator.with_pos(target, pos)
+ redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
- redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator.with_pos(target, pos)
+ redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
end
var target: FlatBuffer
- var max: Int
+ var max: Int is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatBuffer, pos: Int)
- do
- init(tgt, tgt.length - 1, pos)
- end
+ init do max = target._length - 1
redef fun index do return curr_pos
end
-redef class NativeString
+redef class CString
redef fun to_s
do
return to_s_with_length(cstring_length)
end
- # Returns `self` as a String of `length`.
- redef fun to_s_with_length(length): FlatString
+ redef fun to_s_with_length(length)
do
assert length >= 0
- var str = new FlatString.with_infos(self, length, 0, length - 1)
- return str
+ return clean_utf8(length)
+ end
+
+ redef fun to_s_full(byte_length, unilen) do
+ return new FlatString.full(self, byte_length, 0, unilen)
+ end
+
+ redef fun to_s_unsafe(len) do
+ if len == null then len = cstring_length
+ return new FlatString.with_infos(self, len, 0)
end
- # Returns `self` as a new String.
- redef fun to_s_with_copy: FlatString
+ redef fun to_s_with_copy do return to_s_with_copy_and_length(cstring_length)
+
+ # Get a `String` from `length` bytes at `self` copied into Nit memory
+ fun to_s_with_copy_and_length(length: Int): String
do
- var length = cstring_length
- var new_self = new NativeString(length + 1)
+ var r = clean_utf8(length)
+ if r.items != self then return r
+ var new_self = new CString(length + 1)
copy_to(new_self, length, 0, 0)
- var str = new FlatString.with_infos(new_self, length, 0, length - 1)
+ var str = new FlatString.with_infos(new_self, length, 0)
new_self[length] = 0u8
- str.real_items = new_self
return str
end
+ # Cleans a CString if necessary
+ fun clean_utf8(len: Int): FlatString do
+ var replacements: nullable Array[Int] = null
+ var end_length = len
+ var pos = 0
+ var chr_ln = 0
+ var rem = len
+ while rem > 0 do
+ while rem >= 4 do
+ var i = fetch_4_chars(pos)
+ if i & 0x80808080 != 0 then break
+ pos += 4
+ chr_ln += 4
+ rem -= 4
+ end
+ if rem == 0 then break
+ var b = self[pos]
+ if b & 0x80u8 == 0x00u8 then
+ pos += 1
+ chr_ln += 1
+ rem -= 1
+ continue
+ end
+ var nxst = length_of_char_at(pos)
+ var ok_st: Bool
+ if nxst == 1 then
+ ok_st = b & 0x80u8 == 0u8
+ else if nxst == 2 then
+ ok_st = b & 0xE0u8 == 0xC0u8
+ else if nxst == 3 then
+ ok_st = b & 0xF0u8 == 0xE0u8
+ else
+ ok_st = b & 0xF8u8 == 0xF0u8
+ end
+ if not ok_st then
+ if replacements == null then replacements = new Array[Int]
+ replacements.add pos
+ end_length += 2
+ pos += 1
+ rem -= 1
+ chr_ln += 1
+ continue
+ end
+ var ok_c: Bool
+ var c = char_at(pos)
+ var cp = c.code_point
+ if nxst == 1 then
+ ok_c = cp >= 0 and cp <= 0x7F
+ else if nxst == 2 then
+ ok_c = cp >= 0x80 and cp <= 0x7FF
+ else if nxst == 3 then
+ ok_c = cp >= 0x800 and cp <= 0xFFFF
+ ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
+ else
+ ok_c = cp >= 0x10000 and cp <= 0x10FFFF
+ end
+ if not ok_c then
+ if replacements == null then replacements = new Array[Int]
+ replacements.add pos
+ end_length += 2
+ pos += 1
+ chr_ln += 1
+ rem -= 1
+ continue
+ end
+ var clen = c.u8char_len
+ pos += clen
+ rem -= clen
+ chr_ln += 1
+ end
+ var ret = self
+ if end_length != len then
+ ret = new CString(end_length)
+ var old_repl = 0
+ var off = 0
+ var repls = replacements.as(not null)
+ var r = repls.items.as(not null)
+ var imax = repls.length
+ for i in [0 .. imax[ do
+ var repl_pos = r[i]
+ var chkln = repl_pos - old_repl
+ copy_to(ret, chkln, old_repl, off)
+ off += chkln
+ ret[off] = 0xEFu8
+ ret[off + 1] = 0xBFu8
+ ret[off + 2] = 0xBDu8
+ old_repl = repl_pos + 1
+ off += 3
+ end
+ copy_to(ret, len - old_repl, old_repl, off)
+ end
+ return new FlatString.full(ret, end_length, 0, chr_ln)
+ end
+
# Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
#
# Very unsafe, make sure to have room for this char prior to calling this function.
private fun set_char_at(pos: Int, c: Char) do
+ var cp = c.code_point
+ if cp < 128 then
+ self[pos] = cp.to_b
+ return
+ end
var ln = c.u8char_len
- native_set_char(pos, c, ln)
- end
-
- private fun native_set_char(pos: Int, c: Char, ln: Int) `{
- char* dst = self + pos;
- switch(ln){
- case 1:
- dst[0] = c;
- break;
- case 2:
- dst[0] = 0xC0 | ((c & 0x7C0) >> 6);
- dst[1] = 0x80 | (c & 0x3F);
- break;
- case 3:
- dst[0] = 0xE0 | ((c & 0xF000) >> 12);
- dst[1] = 0x80 | ((c & 0xFC0) >> 6);
- dst[2] = 0x80 | (c & 0x3F);
- break;
- case 4:
- dst[0] = 0xF0 | ((c & 0x1C0000) >> 18);
- dst[1] = 0x80 | ((c & 0x3F000) >> 12);
- dst[2] = 0x80 | ((c & 0xFC0) >> 6);
- dst[3] = 0x80 | (c & 0x3F);
- break;
- }
- `}
+ if ln == 2 then
+ self[pos] = (0xC0 | ((cp & 0x7C0) >> 6)).to_b
+ self[pos + 1] = (0x80 | (cp & 0x3F)).to_b
+ else if ln == 3 then
+ self[pos] = (0xE0 | ((cp & 0xF000) >> 12)).to_b
+ self[pos + 1] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
+ self[pos + 2] = (0x80 | (cp & 0x3F)).to_b
+ else if ln == 4 then
+ self[pos] = (0xF0 | ((cp & 0x1C0000) >> 18)).to_b
+ self[pos + 1] = (0x80 | ((cp & 0x3F000) >> 12)).to_b
+ self[pos + 2] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
+ self[pos + 3] = (0x80 | (cp & 0x3F)).to_b
+ end
+ end
end
redef class Int
- redef fun to_base(base, signed)
- do
- var l = digit_count(base)
- var s = new FlatBuffer.from(" " * l)
- fill_buffer(s, base, signed)
- return s.to_s
- end
-
# return displayable int in base 10 and signed
#
# assert 1.to_s == "1"
if self == 1 then return "1"
var nslen = int_to_s_len
- var ns = new NativeString(nslen + 1)
+ var ns = new CString(nslen + 1)
ns[nslen] = 0u8
native_int_to_s(ns, nslen + 1)
- return new FlatString.full(ns, nslen, 0, nslen - 1, nslen)
+ return new FlatString.full(ns, nslen, 0, nslen)
end
end
# Fast implementation
redef fun plain_to_s
do
- var l = length
+ var l = _length
if l == 0 then return ""
- if l == 1 then if self[0] == null then return "" else return self[0].to_s
- var its = _items
+ var its = _items.as(not null)
+ var first = its[0]
+ if l == 1 then if first == null then return "" else return first.to_s
var na = new NativeArray[String](l)
var i = 0
var sl = 0
continue
end
var tmp = itsi.to_s
- sl += tmp.bytelen
+ sl += tmp.byte_length
na[mypos] = tmp
i += 1
mypos += 1
end
- var ns = new NativeString(sl + 1)
+ var ns = new CString(sl + 1)
ns[sl] = 0u8
i = 0
var off = 0
while i < mypos do
var tmp = na[i]
if tmp isa FlatString then
- var tpl = tmp.bytelen
- tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
+ var tpl = tmp._byte_length
+ tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
off += tpl
else
for j in tmp.substrings do
var s = j.as(FlatString)
- var slen = s.bytelen
- s.items.copy_to(ns, slen, s.first_byte, off)
+ var slen = s._byte_length
+ s._items.copy_to(ns, slen, s._first_byte, off)
off += slen
end
end
i += 1
end
- return ns.to_s_with_length(sl)
+ return new FlatString.with_infos(ns, sl, 0)
end
end
var sl = 0
var mypos = 0
while i < l do
- sl += na[i].bytelen
+ sl += na[i].byte_length
i += 1
mypos += 1
end
- var ns = new NativeString(sl + 1)
+ var ns = new CString(sl + 1)
ns[sl] = 0u8
i = 0
var off = 0
while i < mypos do
var tmp = na[i]
if tmp isa FlatString then
- var tpl = tmp.bytelen
- tmp.items.copy_to(ns, tpl, tmp.first_byte, off)
+ var tpl = tmp._byte_length
+ tmp._items.copy_to(ns, tpl, tmp._first_byte, off)
off += tpl
else
for j in tmp.substrings do
var s = j.as(FlatString)
- var slen = s.bytelen
- s.items.copy_to(ns, slen, s.first_byte, off)
+ var slen = s._byte_length
+ s._items.copy_to(ns, slen, s._first_byte, off)
off += slen
end
end
i += 1
end
- return ns.to_s_with_length(sl)
+ return new FlatString.with_infos(ns, sl, 0)
end
end
var e = i.item
s.append("{k or else "<null>"}{couple_sep}{e or else "<null>"}")
- # Concat other items
+ # Concat other _items
i.next
while i.is_ok do
s.append(sep)