redef class FlatText
- private fun first_byte: Int do return 0
+ # First byte of the NativeString
+ protected fun first_byte: Int do return 0
- private fun last_byte: Int do return _bytelen - 1
+ # Last byte of the NativeString
+ protected fun last_byte: Int do return _bytelen - 1
# Cache of the latest position (char) explored in the string
- private var position: Int = 0
+ var position: Int = 0
# Cached position (bytes) in the NativeString underlying the String
- private var bytepos: Int = 0
+ var bytepos: Int = 0
# Index of the character `index` in `_items`
- private fun char_to_byte_index(index: Int): Int do
+ fun char_to_byte_index(index: Int): Int do
var ln = length
assert index >= 0
assert index < ln
return ns_i
end
+ # By escaping `self` to HTML, how many more bytes will be needed ?
+ fun chars_to_html_escape: Int do
+ var its = _items
+ var max = last_byte
+ var pos = first_byte
+ var endlen = 0
+ while pos <= max do
+ var c = its[pos]
+ if c == 0x3Cu8 then
+ endlen += 3
+ else if c == 0x3Eu8 then
+ endlen += 3
+ else if c == 0x26u8 then
+ endlen += 4
+ else if c == 0x22u8 then
+ endlen += 4
+ else if c == 0x27u8 then
+ endlen += 4
+ else if c == 0x2Fu8 then
+ endlen += 4
+ end
+ pos += 1
+ end
+ return endlen
+ end
+
+ redef fun html_escape
+ do
+ var extra = chars_to_html_escape
+ if extra == 0 then return to_s
+ var its = _items
+ var max = last_byte
+ var pos = first_byte
+ var nlen = extra + _bytelen
+ var nits = new NativeString(nlen)
+ var outpos = 0
+ while pos <= max do
+ var c = its[pos]
+ # Special codes:
+ # Some HTML characters are used as meta-data, they need
+ # to be replaced by an HTML-Escaped equivalent
+ #
+ # * 0x3C (<) => <
+ # * 0x3E (>) => >
+ # * 0x26 (&) => &
+ # * 0x22 (") => "
+ # * 0x27 (') => '
+ # * 0x2F (/) => /
+ if c == 0x3Cu8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x6Cu8
+ nits[outpos + 2] = 0x74u8
+ nits[outpos + 3] = 0x3Bu8
+ outpos += 4
+ else if c == 0x3Eu8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x67u8
+ nits[outpos + 2] = 0x74u8
+ nits[outpos + 3] = 0x3Bu8
+ outpos += 4
+ else if c == 0x26u8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x61u8
+ nits[outpos + 2] = 0x6Du8
+ nits[outpos + 3] = 0x70u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else if c == 0x22u8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x23u8
+ nits[outpos + 2] = 0x33u8
+ nits[outpos + 3] = 0x34u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else if c == 0x27u8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x23u8
+ nits[outpos + 2] = 0x33u8
+ nits[outpos + 3] = 0x39u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else if c == 0x2Fu8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x23u8
+ nits[outpos + 2] = 0x34u8
+ nits[outpos + 3] = 0x37u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else
+ nits[outpos] = c
+ outpos += 1
+ end
+ pos += 1
+ end
+ var s = new FlatString.with_infos(nits, nlen, 0, nlen - 1)
+ return s
+ end
+
# By escaping `self` to C, how many more bytes will be needed ?
#
# This enables a double-optimization in `escape_to_c` since if this
# method returns 0, then `self` does not need escaping and can be
# returned as-is
- protected fun chars_to_escape_to_c: Int do
+ fun chars_to_escape_to_c: Int do
var its = _items
var max = last_byte
var pos = first_byte
end
redef fun [](index) do return _items.char_at(char_to_byte_index(index))
+
+ # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
+ #
+ # assert "ff".to_hex == 255
+ redef fun to_hex(pos, ln) do
+ var res = 0
+ if pos == null then pos = 0
+ if ln == null then ln = length - pos
+ pos = char_to_byte_index(pos)
+ var its = _items
+ var max = pos + ln
+ for i in [pos .. max[ do
+ res <<= 4
+ res += its[i].ascii.from_hex
+ end
+ return res
+ end
end
# Immutable strings of characters.
return _items.utf8_length(_first_byte, _last_byte)
end
+ redef var to_cstring is lazy do
+ var blen = _bytelen
+ var new_items = new NativeString(blen + 1)
+ _items.copy_to(new_items, blen, _first_byte, 0)
+ new_items[blen] = 0u8
+ return new_items
+ end
+
redef fun reversed
do
var b = new FlatBuffer.with_capacity(_bytelen + 1)
_bytepos = from
end
- redef fun to_cstring do
- if real_items != null then return real_items.as(not null)
- var blen = _bytelen
- var new_items = new NativeString(blen + 1)
- _items.copy_to(new_items, blen, _first_byte, 0)
- new_items[blen] = 0u8
- real_items = new_items
- return new_items
- end
-
redef fun ==(other)
do
- if not other isa FlatString then return super
+ if not other isa FlatText then return super
if self.object_id == other.object_id then return true
if other._bytelen != my_length then return false
var my_index = _first_byte
- var its_index = other._first_byte
+ var its_index = other.first_byte
var last_iteration = my_index + my_length
redef fun <(other)
do
- if not other isa FlatString then return super
+ if not other isa FlatText then return super
if self.object_id == other.object_id then return false
- var my_length = self._bytelen
- var its_length = other._bytelen
+ var myits = _items
+ var itsits = other._items
- var max = if my_length < its_length then my_length else its_length
+ var mbt = _bytelen
+ var obt = other.bytelen
- var myits = self.bytes
- var itsits = other.bytes
+ var minln = if mbt < obt then mbt else obt
+ var mst = _first_byte
+ var ost = other.first_byte
- for i in [0 .. max[ do
- var my_curr_char = myits[i]
- var its_curr_char = itsits[i]
+ for i in [0 .. minln[ do
+ var my_curr_char = myits[mst]
+ var its_curr_char = itsits[ost]
- if my_curr_char != its_curr_char then
- if my_curr_char < its_curr_char then return true
- return false
- end
+ if my_curr_char > its_curr_char then return false
+ if my_curr_char < its_curr_char then return true
+
+ mst += 1
+ ost += 1
end
- return my_length < its_length
+ return mbt < obt
end
redef fun +(o) do
var curr_pos: Int
- init with_pos(tgt: FlatString, pos: Int)
- do
- init(tgt, pos)
- end
-
redef fun is_ok do return curr_pos >= 0
redef fun item do return target[curr_pos]
var target: FlatString
- var max: Int
+ var max: Int is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatString, pos: Int)
- do
- init(tgt, tgt.length - 1, pos)
- end
+ init do max = target.length - 1
redef fun is_ok do return curr_pos <= max
redef fun [](index) do return target[index]
- redef fun iterator_from(start) do return new FlatStringCharIterator.with_pos(target, start)
+ redef fun iterator_from(start) do return new FlatStringCharIterator(target, start)
- redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator.with_pos(target, start)
+ redef fun reverse_iterator_from(start) do return new FlatStringCharReverseIterator(target, start)
end
var target: FlatString
- var target_items: NativeString
+ var target_items: NativeString is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatString, pos: Int)
+ init
do
- init(tgt, tgt._items, pos + tgt._first_byte)
+ var tgt = target
+ target_items = tgt._items
+ curr_pos += tgt._first_byte
end
redef fun is_ok do return curr_pos >= target._first_byte
var target: FlatString
- var target_items: NativeString
+ var target_items: NativeString is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatString, pos: Int)
+ init
do
- init(tgt, tgt._items, pos + tgt._first_byte)
+ var tgt = target
+ target_items = tgt._items
+ curr_pos += tgt._first_byte
end
redef fun is_ok do return curr_pos <= target._last_byte
return target._items[ind]
end
- redef fun iterator_from(start) do return new FlatStringByteIterator.with_pos(target, start)
+ redef fun iterator_from(start) do return new FlatStringByteIterator(target, start)
- redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator.with_pos(target, start)
+ redef fun reverse_iterator_from(start) do return new FlatStringByteReverseIterator(target, start)
end
private var capacity = 0
+ # Real items, used as cache for when to_cstring is called
+ private var real_items: NativeString is noinit
+
redef fun fast_cstring do return _items.fast_cstring(0)
redef fun substrings do return new FlatSubstringsIter(self)
real_items = new_native
is_dirty = false
end
- return real_items.as(not null)
+ return real_items
end
# Create a new empty string.
assert count >= 0
if from < 0 then from = 0
if (from + count) > length then count = length - from
- if count != 0 then
- var its = _items
- var bytefrom = its.char_to_byte_index(from)
- var byteto = its.char_to_byte_index(count + from - 1)
- byteto += its.char_at(byteto).u8char_len - 1
- var byte_length = byteto - bytefrom + 1
- var r_items = new NativeString(byte_length)
- its.copy_to(r_items, byte_length, bytefrom, 0)
- return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
- else
- return new Buffer
- end
+ if count <= 0 then return new Buffer
+ var its = _items
+ var bytefrom = its.char_to_byte_index(from)
+ var byteto = its.char_to_byte_index(count + from - 1)
+ byteto += its.char_at(byteto).u8char_len - 1
+ var byte_length = byteto - bytefrom + 1
+ var r_items = new NativeString(byte_length)
+ its.copy_to(r_items, byte_length, bytefrom, 0)
+ return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
end
redef fun reverse
var target: FlatBuffer
- var target_items: NativeString
+ var target_items: NativeString is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatBuffer, pos: Int)
- do
- init(tgt, tgt._items, pos)
- end
+ init do target_items = target._items
redef fun index do return curr_pos
redef fun [](index) do return target._items[index]
- redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
+ redef fun iterator_from(pos) do return new FlatBufferByteIterator(target, pos)
- redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
+ redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator(target, pos)
end
var target: FlatBuffer
- var target_items: NativeString
+ var target_items: NativeString is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatBuffer, pos: Int)
- do
- init(tgt, tgt._items, pos)
- end
+ init do target_items = target._items
redef fun index do return curr_pos
var curr_pos: Int
- init with_pos(tgt: FlatBuffer, pos: Int)
- do
- init(tgt, pos)
- end
-
redef fun index do return curr_pos
redef fun is_ok do return curr_pos >= 0
for i in s do target.add i
end
- redef fun iterator_from(pos) do return new FlatBufferCharIterator.with_pos(target, pos)
+ redef fun iterator_from(pos) do return new FlatBufferCharIterator(target, pos)
- redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator.with_pos(target, pos)
+ redef fun reverse_iterator_from(pos) do return new FlatBufferCharReverseIterator(target, pos)
end
var target: FlatBuffer
- var max: Int
+ var max: Int is noautoinit
var curr_pos: Int
- init with_pos(tgt: FlatBuffer, pos: Int)
- do
- init(tgt, tgt.length - 1, pos)
- end
+ init do max = target.length - 1
redef fun index do return curr_pos
redef fun to_s_with_length(length): FlatString
do
assert length >= 0
- var str = new FlatString.with_infos(self, length, 0, length - 1)
- return str
+ return clean_utf8(length)
end
redef fun to_s_full(bytelen, unilen) do
redef fun to_s_with_copy: FlatString
do
var length = cstring_length
+ var r = clean_utf8(length)
+ if r.items != self then return r
var new_self = new NativeString(length + 1)
copy_to(new_self, length, 0, 0)
var str = new FlatString.with_infos(new_self, length, 0, length - 1)
new_self[length] = 0u8
- str.real_items = new_self
+ str.to_cstring = new_self
return str
end
+ # Cleans a NativeString if necessary
+ fun clean_utf8(len: Int): FlatString do
+ var replacements: nullable Array[Int] = null
+ var end_length = len
+ var pos = 0
+ var chr_ln = 0
+ while pos < len do
+ var b = self[pos]
+ var nxst = length_of_char_at(pos)
+ var ok_st: Bool
+ if nxst == 1 then
+ ok_st = b & 0x80u8 == 0u8
+ else if nxst == 2 then
+ ok_st = b & 0xE0u8 == 0xC0u8
+ else if nxst == 3 then
+ ok_st = b & 0xF0u8 == 0xE0u8
+ else
+ ok_st = b & 0xF8u8 == 0xF0u8
+ end
+ if not ok_st then
+ if replacements == null then replacements = new Array[Int]
+ replacements.add pos
+ end_length += 2
+ pos += 1
+ chr_ln += 1
+ continue
+ end
+ var ok_c: Bool
+ var c = char_at(pos)
+ var cp = c.code_point
+ if nxst == 1 then
+ ok_c = cp >= 0 and cp <= 0x7F
+ else if nxst == 2 then
+ ok_c = cp >= 0x80 and cp <= 0x7FF
+ else if nxst == 3 then
+ ok_c = cp >= 0x800 and cp <= 0xFFFF
+ ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
+ else
+ ok_c = cp >= 0x10000 and cp <= 0x10FFFF
+ end
+ if not ok_c then
+ if replacements == null then replacements = new Array[Int]
+ replacements.add pos
+ end_length += 2
+ pos += 1
+ chr_ln += 1
+ continue
+ end
+ pos += c.u8char_len
+ chr_ln += 1
+ end
+ var ret = self
+ if end_length != len then
+ ret = new NativeString(end_length)
+ var old_repl = 0
+ var off = 0
+ var repls = replacements.as(not null)
+ var r = repls.items.as(not null)
+ var imax = repls.length
+ for i in [0 .. imax[ do
+ var repl_pos = r[i]
+ var chkln = repl_pos - old_repl
+ copy_to(ret, chkln, old_repl, off)
+ off += chkln
+ ret[off] = 0xEFu8
+ ret[off + 1] = 0xBFu8
+ ret[off + 2] = 0xBDu8
+ old_repl = repl_pos + 1
+ off += 3
+ end
+ copy_to(ret, len - old_repl, old_repl, off)
+ end
+ return new FlatString.full(ret, end_length, 0, end_length - 1, chr_ln)
+ end
+
# Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
#
# Very unsafe, make sure to have room for this char prior to calling this function.
do
var l = length
if l == 0 then return ""
- if l == 1 then if self[0] == null then return "" else return self[0].to_s
- var its = _items
+ var its = _items.as(not null)
+ var first = its[0]
+ if l == 1 then if first == null then return "" else return first.to_s
var na = new NativeArray[String](l)
var i = 0
var sl = 0
end
i += 1
end
- return ns.to_s_with_length(sl)
+ return new FlatString.with_infos(ns, sl, 0, sl - 1)
end
end
end
i += 1
end
- return ns.to_s_with_length(sl)
+ return new FlatString.with_infos(ns, sl, 0, sl - 1)
end
end