init(ns, 0, 0)
end
+ # Init a `Bytes` with capacity `cap`
init with_capacity(cap: Int) do
var ns = new NativeString(cap)
init(ns, 0, cap)
end
end
+ # var b = new Bytes.empty
+ # b.append([0x41u8, 0x41u8, 0x18u8])
+ # b.pop
+ # assert b.to_s == "AA"
+ redef fun pop do
+ assert length >= 1
+ length -= 1
+ return items[length]
+ end
+
redef fun clear do length = 0
# Regenerates the buffer, necessary when it was persisted
redef fun to_s do
persisted = true
- return new FlatString.with_infos(items, length, 0, length -1)
+ var b = self
+ if not is_utf8 then
+ b = clean_utf8
+ persisted = false
+ end
+ return new FlatString.with_infos(b.items, b.length, 0, b.length -1)
end
redef fun iterator do return new BytesIterator.with_buffer(self)
+
+ # Is the byte collection valid UTF-8 ?
+ fun is_utf8: Bool do
+ var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
+ var lobounds = once [0, 0x80, 0x800, 0x10000]
+ var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
+ var pos = 0
+ var len = length
+ var mits = items
+ while pos < len do
+ var nxst = mits.length_of_char_at(pos)
+ var charst_index = (nxst - 1) * 2
+ if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
+ var c = mits.char_at(pos)
+ var cp = c.ascii
+ if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
+ if cp >= 0xD800 and cp <= 0xDFFF or
+ cp == 0xFFFE or cp == 0xFFFF then return false
+ else
+ return false
+ end
+ else
+ return false
+ end
+ pos += nxst
+ end
+ return true
+ end
+
+ # Cleans the bytes of `self` to be UTF-8 compliant
+ private fun clean_utf8: Bytes do
+ var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
+ var badchar = once [0xEFu8, 0xBFu8, 0xBDu8]
+ var lobounds = once [0, 0x80, 0x800, 0x10000]
+ var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
+ var pos = 0
+ var len = length
+ var ret = new Bytes.with_capacity(len)
+ var mits = items
+ while pos < len do
+ var nxst = mits.length_of_char_at(pos)
+ var charst_index = (nxst - 1) * 2
+ if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
+ var c = mits.char_at(pos)
+ var cp = c.ascii
+ if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
+ if cp >= 0xD800 and cp <= 0xDFFF or
+ cp == 0xFFFE or cp == 0xFFFF then
+ ret.append badchar
+ pos += 1
+ else
+ var pend = pos + nxst
+ for i in [pos .. pend[ do ret.add mits[i]
+ pos += nxst
+ end
+ else
+ ret.append badchar
+ pos += 1
+ end
+ else
+ ret.append badchar
+ pos += 1
+ end
+ end
+ return ret
+ end
end
private class BytesIterator
redef fun item do return tgt[index]
end
+redef class Text
+ # Returns a mutable copy of `self`'s bytes
+ #
+ # ~~~nit
+ # assert "String".to_bytes isa Bytes
+ # assert "String".to_bytes == [83u8, 116u8, 114u8, 105u8, 110u8, 103u8]
+ # ~~~
+ fun to_bytes: Bytes do
+ var b = new Bytes.with_capacity(bytelen)
+ append_to_bytes b
+ return b
+ end
+
+ # Appends `self.bytes` to `b`
+ fun append_to_bytes(b: Bytes) do
+ for s in substrings do
+ var from = if s isa FlatString then s.first_byte else 0
+ b.append_ns_from(s.items, s.bytelen, from)
+ end
+ end
+end
+
+redef class FlatText
+ redef fun append_to_bytes(b) do
+ var from = if self isa FlatString then first_byte else 0
+ b.append_ns_from(items, bytelen, from)
+ end
+end
+
redef class NativeString
+ # Creates a new `Bytes` object from `self` with `strlen` as length
fun to_bytes: Bytes do
var len = cstring_length
return new Bytes(self, len, len)