+ # By escaping `self` to HTML, how many more bytes will be needed ?
+ fun chars_to_html_escape: Int do
+ var its = _items
+ var max = last_byte
+ var pos = first_byte
+ var endlen = 0
+ while pos <= max do
+ var c = its[pos]
+ if c == 0x3Cu8 then
+ endlen += 3
+ else if c == 0x3Eu8 then
+ endlen += 3
+ else if c == 0x26u8 then
+ endlen += 4
+ else if c == 0x22u8 then
+ endlen += 4
+ else if c == 0x27u8 then
+ endlen += 4
+ else if c == 0x2Fu8 then
+ endlen += 4
+ end
+ pos += 1
+ end
+ return endlen
+ end
+
+ redef fun html_escape
+ do
+ var extra = chars_to_html_escape
+ if extra == 0 then return to_s
+ var its = _items
+ var max = last_byte
+ var pos = first_byte
+ var nlen = extra + _byte_length
+ var nits = new CString(nlen)
+ var outpos = 0
+ while pos <= max do
+ var c = its[pos]
+ # Special codes:
+ # Some HTML characters are used as meta-data, they need
+ # to be replaced by an HTML-Escaped equivalent
+ #
+ # * 0x3C (<) => <
+ # * 0x3E (>) => >
+ # * 0x26 (&) => &
+ # * 0x22 (") => "
+ # * 0x27 (') => '
+ # * 0x2F (/) => /
+ if c == 0x3Cu8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x6Cu8
+ nits[outpos + 2] = 0x74u8
+ nits[outpos + 3] = 0x3Bu8
+ outpos += 4
+ else if c == 0x3Eu8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x67u8
+ nits[outpos + 2] = 0x74u8
+ nits[outpos + 3] = 0x3Bu8
+ outpos += 4
+ else if c == 0x26u8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x61u8
+ nits[outpos + 2] = 0x6Du8
+ nits[outpos + 3] = 0x70u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else if c == 0x22u8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x23u8
+ nits[outpos + 2] = 0x33u8
+ nits[outpos + 3] = 0x34u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else if c == 0x27u8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x23u8
+ nits[outpos + 2] = 0x33u8
+ nits[outpos + 3] = 0x39u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else if c == 0x2Fu8 then
+ nits[outpos] = 0x26u8
+ nits[outpos + 1] = 0x23u8
+ nits[outpos + 2] = 0x34u8
+ nits[outpos + 3] = 0x37u8
+ nits[outpos + 4] = 0x3Bu8
+ outpos += 5
+ else
+ nits[outpos] = c
+ outpos += 1
+ end
+ pos += 1
+ end
+ var s = new FlatString.with_infos(nits, nlen, 0)
+ return s
+ end
+
+ # By escaping `self` to C, how many more bytes will be needed ?
+ #
+ # This enables a double-optimization in `escape_to_c` since if this
+ # method returns 0, then `self` does not need escaping and can be
+ # returned as-is
+ fun chars_to_escape_to_c: Int do
+ var its = _items
+ var max = last_byte
+ var pos = first_byte
+ var req_esc = 0
+ while pos <= max do
+ var c = its[pos]
+ if c == 0x0Au8 then
+ req_esc += 1
+ else if c == 0x09u8 then
+ req_esc += 1
+ else if c == 0x22u8 then
+ req_esc += 1
+ else if c == 0x27u8 then
+ req_esc += 1
+ else if c == 0x5Cu8 then
+ req_esc += 1
+ else if c == 0x3Fu8 then
+ var j = pos + 1
+ if j < length then
+ var next = its[j]
+ # We ignore `??'` because it will be escaped as `??\'`.
+ if
+ next == 0x21u8 or
+ next == 0x28u8 or
+ next == 0x29u8 or
+ next == 0x2Du8 or
+ next == 0x2Fu8 or
+ next == 0x3Cu8 or
+ next == 0x3Du8 or
+ next == 0x3Eu8
+ then req_esc += 1
+ end
+ else if c < 32u8 then
+ req_esc += 3
+ end
+ pos += 1
+ end
+ return req_esc
+ end
+
+ redef fun escape_to_c do
+ var ln_extra = chars_to_escape_to_c
+ if ln_extra == 0 then return self.to_s
+ var its = _items
+ var max = last_byte
+ var nlen = _byte_length + ln_extra
+ var nns = new CString(nlen)
+ var pos = first_byte
+ var opos = 0
+ while pos <= max do
+ var c = its[pos]
+ # Special codes:
+ #
+ # Any byte with value < 32 is a control character
+ # All their uses will be replaced by their octal
+ # value in C.
+ #
+ # There are two exceptions however:
+ #
+ # * 0x09 => \t
+ # * 0x0A => \n
+ #
+ # Aside from the code points above, the following are:
+ #
+ # * 0x22 => \"
+ # * 0x27 => \'
+ # * 0x5C => \\
+ if c == 0x09u8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x74u8
+ opos += 2
+ else if c == 0x0Au8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x6Eu8
+ opos += 2
+ else if c == 0x22u8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x22u8
+ opos += 2
+ else if c == 0x27u8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x27u8
+ opos += 2
+ else if c == 0x5Cu8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x5Cu8
+ opos += 2
+ else if c == 0x3Fu8 then
+ var j = pos + 1
+ if j < length then
+ var next = its[j]
+ # We ignore `??'` because it will be escaped as `??\'`.
+ if
+ next == 0x21u8 or
+ next == 0x28u8 or
+ next == 0x29u8 or
+ next == 0x2Du8 or
+ next == 0x2Fu8 or
+ next == 0x3Cu8 or
+ next == 0x3Du8 or
+ next == 0x3Eu8
+ then
+ nns[opos] = 0x5Cu8
+ opos += 1
+ end
+ end
+ nns[opos] = 0x3Fu8
+ opos += 1
+ else if c < 32u8 then
+ nns[opos] = 0x5Cu8
+ nns[opos + 1] = 0x30u8
+ nns[opos + 2] = ((c & 0x38u8) >> 3) + 0x30u8
+ nns[opos + 3] = (c & 0x07u8) + 0x30u8
+ opos += 4
+ else
+ nns[opos] = c
+ opos += 1
+ end
+ pos += 1
+ end
+ return nns.to_s_unsafe(nlen)
+ end
+
+ redef fun [](index) do
+ var len = _length
+
+ # Statistically:
+ # * ~70% want the next char
+ # * ~23% want the previous
+ # * ~7% want the same char
+ #
+ # So it makes sense to shortcut early. And early is here.
+ var dpos = index - _position
+ var b = _bytepos
+ if dpos == 1 and index < len - 1 then
+ var its = _items
+ var c = its[b]
+ if c & 0x80u8 == 0x00u8 then
+ # We want the next, and current is easy.
+ # So next is easy to find!
+ b += 1
+ _position = index
+ _bytepos = b
+ # The rest will be done by `dpos==0` bellow.
+ dpos = 0
+ end
+ else if dpos == -1 and index > 1 then
+ var its = _items
+ var c = its[b-1]
+ if c & 0x80u8 == 0x00u8 then
+ # We want the previous, and it is easy.
+ b -= 1
+ dpos = 0
+ _position = index
+ _bytepos = b
+ return c.ascii
+ end
+ end
+ if dpos == 0 then
+ # We know what we want (+0 or +1) just get it now!
+ var its = _items
+ var c = its[b]
+ if c & 0x80u8 == 0x00u8 then return c.ascii
+ return items.char_at(b)
+ end
+
+ assert index >= 0 and index < len
+ return fetch_char_at(index)
+ end
+
+ # Gets a `Char` at `index` in `self`
+ #
+ # WARNING: Use at your own risks as no bound-checking is done
+ fun fetch_char_at(index: Int): Char do
+ var i = char_to_byte_index(index)
+ var items = _items
+ var b = items[i]
+ if b & 0x80u8 == 0x00u8 then return b.ascii
+ return items.char_at(i)
+ end
+
+ # If `self` contains only digits and alpha <= 'f', return the corresponding integer.
+ #
+ # assert "ff".to_hex == 255
+ redef fun to_hex(pos, ln) do
+ var res = 0
+ if pos == null then pos = 0
+ if ln == null then ln = length - pos
+ pos = char_to_byte_index(pos)
+ var its = _items
+ var max = pos + ln
+ for i in [pos .. max[ do
+ res <<= 4
+ res += its[i].ascii.from_hex
+ end
+ return res
+ end
+
+ redef fun copy_to_native(dst, n, src_off, dst_off) do
+ _items.copy_to(dst, n, first_byte + src_off, dst_off)
+ end