var its = _items
if dpos == 1 then
- if its[b] & 0x80u8 == 0x00u8 then
+ if its[b] & 0x80 == 0x00 then
b += 1
else
b += its.length_of_char_at(b)
var endlen = 0
while pos <= max do
var c = its[pos]
- if c == 0x3Cu8 then
+ if c == u'<' then
endlen += 3
- else if c == 0x3Eu8 then
+ else if c == u'>' then
endlen += 3
- else if c == 0x26u8 then
+ else if c == u'&' then
endlen += 4
- else if c == 0x22u8 then
+ else if c == u'"' then
endlen += 4
- else if c == 0x27u8 then
+ else if c == u'\'' then
endlen += 4
- else if c == 0x2Fu8 then
+ else if c == 0x2F then
endlen += 4
end
pos += 1
# Special codes:
# Some HTML characters are used as meta-data, they need
# to be replaced by an HTML-Escaped equivalent
- #
- # * 0x3C (<) => <
- # * 0x3E (>) => >
- # * 0x26 (&) => &
- # * 0x22 (") => "
- # * 0x27 (') => '
- # * 0x2F (/) => /
- if c == 0x3Cu8 then
- nits[outpos] = 0x26u8
- nits[outpos + 1] = 0x6Cu8
- nits[outpos + 2] = 0x74u8
- nits[outpos + 3] = 0x3Bu8
+ if c == u'<' then
+ nits[outpos] = u'&'
+ nits[outpos + 1] = u'l'
+ nits[outpos + 2] = u't'
+ nits[outpos + 3] = u';'
outpos += 4
- else if c == 0x3Eu8 then
- nits[outpos] = 0x26u8
- nits[outpos + 1] = 0x67u8
- nits[outpos + 2] = 0x74u8
- nits[outpos + 3] = 0x3Bu8
+ else if c == u'>' then
+ nits[outpos] = u'&'
+ nits[outpos + 1] = u'g'
+ nits[outpos + 2] = u't'
+ nits[outpos + 3] = u';'
outpos += 4
- else if c == 0x26u8 then
- nits[outpos] = 0x26u8
- nits[outpos + 1] = 0x61u8
- nits[outpos + 2] = 0x6Du8
- nits[outpos + 3] = 0x70u8
- nits[outpos + 4] = 0x3Bu8
+ else if c == u'&' then
+ nits[outpos] = u'&'
+ nits[outpos + 1] = u'a'
+ nits[outpos + 2] = u'm'
+ nits[outpos + 3] = u'p'
+ nits[outpos + 4] = u';'
outpos += 5
- else if c == 0x22u8 then
- nits[outpos] = 0x26u8
- nits[outpos + 1] = 0x23u8
- nits[outpos + 2] = 0x33u8
- nits[outpos + 3] = 0x34u8
- nits[outpos + 4] = 0x3Bu8
+ else if c == u'"' then
+ nits[outpos] = u'&'
+ nits[outpos + 1] = u'#'
+ nits[outpos + 2] = u'3'
+ nits[outpos + 3] = u'4'
+ nits[outpos + 4] = u';'
outpos += 5
- else if c == 0x27u8 then
- nits[outpos] = 0x26u8
- nits[outpos + 1] = 0x23u8
- nits[outpos + 2] = 0x33u8
- nits[outpos + 3] = 0x39u8
- nits[outpos + 4] = 0x3Bu8
+ else if c == u'\'' then
+ nits[outpos] = u'&'
+ nits[outpos + 1] = u'#'
+ nits[outpos + 2] = u'3'
+ nits[outpos + 3] = u'9'
+ nits[outpos + 4] = u';'
outpos += 5
- else if c == 0x2Fu8 then
- nits[outpos] = 0x26u8
- nits[outpos + 1] = 0x23u8
- nits[outpos + 2] = 0x34u8
- nits[outpos + 3] = 0x37u8
- nits[outpos + 4] = 0x3Bu8
+ else if c == u'/' then
+ nits[outpos] = u'&'
+ nits[outpos + 1] = u'#'
+ nits[outpos + 2] = u'4'
+ nits[outpos + 3] = u'7'
+ nits[outpos + 4] = u';'
outpos += 5
else
nits[outpos] = c
var req_esc = 0
while pos <= max do
var c = its[pos]
- if c == 0x0Au8 then
+ if c == u'\n' then
req_esc += 1
- else if c == 0x09u8 then
+ else if c == u'\t' then
req_esc += 1
- else if c == 0x22u8 then
+ else if c == u'"' then
req_esc += 1
- else if c == 0x27u8 then
+ else if c == u'\'' then
req_esc += 1
- else if c == 0x5Cu8 then
+ else if c == u'\\' then
req_esc += 1
- else if c == 0x3Fu8 then
+ else if c == u'?' then
var j = pos + 1
if j < length then
var next = its[j]
# We ignore `??'` because it will be escaped as `??\'`.
if
- next == 0x21u8 or
- next == 0x28u8 or
- next == 0x29u8 or
- next == 0x2Du8 or
- next == 0x2Fu8 or
- next == 0x3Cu8 or
- next == 0x3Du8 or
- next == 0x3Eu8
+ next == 0x21 or
+ next == 0x28 or
+ next == 0x29 or
+ next == 0x2D or
+ next == 0x2F or
+ next == 0x3C or
+ next == 0x3D or
+ next == 0x3E
then req_esc += 1
end
- else if c < 32u8 then
+ else if c < 32 then
req_esc += 3
end
pos += 1
# * 0x22 => \"
# * 0x27 => \'
# * 0x5C => \\
- if c == 0x09u8 then
- nns[opos] = 0x5Cu8
- nns[opos + 1] = 0x74u8
+ if c == u'\t' then
+ nns[opos] = u'\\'
+ nns[opos + 1] = u't'
opos += 2
- else if c == 0x0Au8 then
- nns[opos] = 0x5Cu8
- nns[opos + 1] = 0x6Eu8
+ else if c == u'\n' then
+ nns[opos] = u'\\'
+ nns[opos + 1] = u'n'
opos += 2
- else if c == 0x22u8 then
- nns[opos] = 0x5Cu8
- nns[opos + 1] = 0x22u8
+ else if c == u'"' then
+ nns[opos] = u'\\'
+ nns[opos + 1] = u'"'
opos += 2
- else if c == 0x27u8 then
- nns[opos] = 0x5Cu8
- nns[opos + 1] = 0x27u8
+ else if c == u'\'' then
+ nns[opos] = u'\\'
+ nns[opos + 1] = u'\''
opos += 2
- else if c == 0x5Cu8 then
- nns[opos] = 0x5Cu8
- nns[opos + 1] = 0x5Cu8
+ else if c == u'\\' then
+ nns[opos] = u'\\'
+ nns[opos + 1] = u'\\'
opos += 2
- else if c == 0x3Fu8 then
+ else if c == u'?' then
var j = pos + 1
if j < length then
var next = its[j]
# We ignore `??'` because it will be escaped as `??\'`.
if
- next == 0x21u8 or
- next == 0x28u8 or
- next == 0x29u8 or
- next == 0x2Du8 or
- next == 0x2Fu8 or
- next == 0x3Cu8 or
- next == 0x3Du8 or
- next == 0x3Eu8
+ next == 0x21 or
+ next == 0x28 or
+ next == 0x29 or
+ next == 0x2D or
+ next == 0x2F or
+ next == 0x3C or
+ next == 0x3D or
+ next == 0x3E
then
- nns[opos] = 0x5Cu8
+ nns[opos] = 0x5C
opos += 1
end
end
- nns[opos] = 0x3Fu8
+ nns[opos] = 0x3F
opos += 1
- else if c < 32u8 then
- nns[opos] = 0x5Cu8
- nns[opos + 1] = 0x30u8
- nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
- nns[opos + 3] = (c & 0x07u8) + 0x30u8
+ else if c < 32 then
+ nns[opos] = u'\\'
+ nns[opos + 1] = u'0'
+ nns[opos + 2] = ((c & 0x38) >> 3) + u'0'
+ nns[opos + 3] = (c & 0x07) + u'0'
opos += 4
else
nns[opos] = c
end
pos += 1
end
- return nns.to_s_unsafe(nlen)
+ return nns.to_s_unsafe(nlen, copy=false, clean=false)
end
redef fun [](index) do
if dpos == 1 and index < len - 1 then
var its = _items
var c = its[b]
- if c & 0x80u8 == 0x00u8 then
+ if c & 0x80 == 0x00 then
# We want the next, and current is easy.
# So next is easy to find!
b += 1
else if dpos == -1 and index > 1 then
var its = _items
var c = its[b-1]
- if c & 0x80u8 == 0x00u8 then
+ if c & 0x80 == 0x00 then
# We want the previous, and it is easy.
b -= 1
dpos = 0
_position = index
_bytepos = b
- return c.ascii
+ return c.code_point
end
end
if dpos == 0 then
# We know what we want (+0 or +1) just get it now!
var its = _items
var c = its[b]
- if c & 0x80u8 == 0x00u8 then return c.ascii
+ if c & 0x80 == 0x00 then return c.code_point
return items.char_at(b)
end
var i = char_to_byte_index(index)
var items = _items
var b = items[i]
- if b & 0x80u8 == 0x00u8 then return b.ascii
+ if b & 0x80 == 0x00 then return b.code_point
return items.char_at(i)
end
var max = pos + ln
for i in [pos .. max[ do
res <<= 4
- res += its[i].ascii.from_hex
+ res += its[i].code_point.from_hex
end
return res
end
var blen = _byte_length
var new_items = new CString(blen + 1)
_items.copy_to(new_items, blen, _first_byte, 0)
- new_items[blen] = 0u8
+ new_items[blen] = 0
return new_items
end
var its = _items
var fb = _first_byte
var ns = new CString(new_byte_length + 1)
- ns[new_byte_length] = 0u8
+ ns[new_byte_length] = 0
var offset = 0
while i > 0 do
its.copy_to(ns, mybtlen, fb, offset)
redef fun [](idx) do
assert idx < _byte_length and idx >= 0
- return _items[idx + _first_byte].ascii
+ return _items[idx + _first_byte].code_point
end
redef fun substring(from, count) do
return new ASCIIFlatString.full_data(_items, count, from + _first_byte, count)
end
- redef fun fetch_char_at(i) do return _items[i + _first_byte].ascii
+ redef fun fetch_char_at(i) do return _items[i + _first_byte].code_point
end
private class FlatStringCharReverseIterator
end
private class FlatStringByteReverseIterator
- super IndexedIterator[Byte]
+ super IndexedIterator[Int]
var target: FlatString
end
private class FlatStringByteIterator
- super IndexedIterator[Byte]
+ super IndexedIterator[Int]
var target: FlatString
do
var bln = _byte_length
var new_native = new CString(bln + 1)
- new_native[bln] = 0u8
+ new_native[bln] = 0
if _length > 0 then _items.copy_to(new_native, bln, 0, 0)
return new_native
end
end
private class FlatBufferByteReverseIterator
- super IndexedIterator[Byte]
+ super IndexedIterator[Int]
var target: FlatBuffer
end
private class FlatBufferByteIterator
- super IndexedIterator[Byte]
+ super IndexedIterator[Int]
var target: FlatBuffer
var curr_pos: Int
- init do target_items = target._items
+ init do if isset target._items then target_items = target._items
redef fun index do return curr_pos
end
redef class CString
- redef fun to_s
- do
- return to_s_with_length(cstring_length)
- end
- redef fun to_s_with_length(length)
- do
- assert length >= 0
- return clean_utf8(length)
- end
+ # Get a `String` from the data at `self` copied into Nit memory
+ #
+ # Require: `self` is a null-terminated string.
+ redef fun to_s do return to_s_unsafe
- redef fun to_s_full(byte_length, unilen) do
- return new FlatString.full(self, byte_length, 0, unilen)
- end
+ # Get a `String` from `byte_length` bytes at `self` copied into Nit memory
+ #
+ # The string is cleaned.
+ fun to_s_with_length(byte_length: Int): String do return to_s_unsafe(byte_length)
- redef fun to_s_unsafe(len) do
- if len == null then len = cstring_length
- return new FlatString.with_infos(self, len, 0)
- end
+ redef fun to_s_unsafe(byte_length, char_length, copy, clean)
+ do
+ byte_length = byte_length or else cstring_length
+ clean = clean or else true
+ copy = copy or else true
+
+ # Clean?
+ var str = null
+ if clean then
+ str = clean_utf8(byte_length)
+ char_length = str.length
+ else
+ char_length = char_length or else utf8_length(0, byte_length)
+ end
- redef fun to_s_with_copy do return to_s_with_copy_and_length(cstring_length)
+ # Copy? (if not already copied by `clean_utf8`)
+ if copy and (str == null or str.items == self) then
+ var new_cstr = new CString(byte_length + 1)
+ copy_to(new_cstr, byte_length, 0, 0)
+ new_cstr[byte_length] = 0
+ str = new FlatString.full(new_cstr, byte_length, 0, char_length)
+ end
+
+ if str == null then
+ str = new FlatString.full(self, byte_length, 0, char_length)
+ end
- # Get a `String` from `length` bytes at `self` copied into Nit memory
- fun to_s_with_copy_and_length(length: Int): String
- do
- var r = clean_utf8(length)
- if r.items != self then return r
- var new_self = new CString(length + 1)
- copy_to(new_self, length, 0, 0)
- var str = new FlatString.with_infos(new_self, length, 0)
- new_self[length] = 0u8
return str
end
while rem > 0 do
while rem >= 4 do
var i = fetch_4_chars(pos)
- if i & 0x80808080 != 0 then break
+ if i & 0x80808080u32 != 0u32 then break
pos += 4
chr_ln += 4
rem -= 4
end
if rem == 0 then break
var b = self[pos]
- if b & 0x80u8 == 0x00u8 then
+ if b & 0x80 == 0x00 then
pos += 1
chr_ln += 1
rem -= 1
var nxst = length_of_char_at(pos)
var ok_st: Bool
if nxst == 1 then
- ok_st = b & 0x80u8 == 0u8
+ ok_st = b & 0x80 == 0
else if nxst == 2 then
- ok_st = b & 0xE0u8 == 0xC0u8
+ ok_st = b & 0xE0 == 0xC0
else if nxst == 3 then
- ok_st = b & 0xF0u8 == 0xE0u8
+ ok_st = b & 0xF0 == 0xE0
else
- ok_st = b & 0xF8u8 == 0xF0u8
+ ok_st = b & 0xF8 == 0xF0
end
if not ok_st then
if replacements == null then replacements = new Array[Int]
var chkln = repl_pos - old_repl
copy_to(ret, chkln, old_repl, off)
off += chkln
- ret[off] = 0xEFu8
- ret[off + 1] = 0xBFu8
- ret[off + 2] = 0xBDu8
+ ret[off] = 0xEF
+ ret[off + 1] = 0xBF
+ ret[off + 2] = 0xBD
old_repl = repl_pos + 1
off += 3
end
private fun set_char_at(pos: Int, c: Char) do
var cp = c.code_point
if cp < 128 then
- self[pos] = cp.to_b
+ self[pos] = cp
return
end
var ln = c.u8char_len
if ln == 2 then
- self[pos] = (0xC0 | ((cp & 0x7C0) >> 6)).to_b
- self[pos + 1] = (0x80 | (cp & 0x3F)).to_b
+ self[pos] = 0xC0 | ((cp & 0x7C0) >> 6)
+ self[pos + 1] = 0x80 | (cp & 0x3F)
else if ln == 3 then
- self[pos] = (0xE0 | ((cp & 0xF000) >> 12)).to_b
- self[pos + 1] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
- self[pos + 2] = (0x80 | (cp & 0x3F)).to_b
+ self[pos] = 0xE0 | ((cp & 0xF000) >> 12)
+ self[pos + 1] = 0x80 | ((cp & 0xFC0) >> 6)
+ self[pos + 2] = 0x80 | (cp & 0x3F)
else if ln == 4 then
- self[pos] = (0xF0 | ((cp & 0x1C0000) >> 18)).to_b
- self[pos + 1] = (0x80 | ((cp & 0x3F000) >> 12)).to_b
- self[pos + 2] = (0x80 | ((cp & 0xFC0) >> 6)).to_b
- self[pos + 3] = (0x80 | (cp & 0x3F)).to_b
+ self[pos] = 0xF0 | ((cp & 0x1C0000) >> 18)
+ self[pos + 1] = 0x80 | ((cp & 0x3F000) >> 12)
+ self[pos + 2] = 0x80 | ((cp & 0xFC0) >> 6)
+ self[pos + 3] = 0x80 | (cp & 0x3F)
end
end
end
var nslen = int_to_s_len
var ns = new CString(nslen + 1)
- ns[nslen] = 0u8
+ ns[nslen] = 0
native_int_to_s(ns, nslen + 1)
return new FlatString.full(ns, nslen, 0, nslen)
end
mypos += 1
end
var ns = new CString(sl + 1)
- ns[sl] = 0u8
+ ns[sl] = 0
i = 0
var off = 0
while i < mypos do
mypos += 1
end
var ns = new CString(sl + 1)
- ns[sl] = 0u8
+ ns[sl] = 0
i = 0
var off = 0
while i < mypos do