core :: Bytes :: defaultinit
# A buffer containing Byte-manipulation facilities
#
# Uses Copy-On-Write when persisted
class Bytes
super AbstractArray[Int]
super BytePattern
# A CString being a char*, it can be used as underlying representation here.
var items: CString
# Number of bytes in the array
redef var length
# Capacity of the array
private var capacity: Int
# Has this buffer been persisted (to_s'd)?
#
# Used for Copy-On-Write
private var persisted = false
# var b = new Bytes.empty
# assert b.to_s == ""
init empty do
var ns = new CString(0)
init(ns, 0, 0)
end
# Init a `Bytes` with capacity `cap`
init with_capacity(cap: Int) do
var ns = new CString(cap)
init(ns, 0, cap)
end
redef fun pattern_length do return length
redef fun is_empty do return length == 0
# var b = new Bytes.empty
# b.add 101
# assert b[0] == 101
redef fun [](i) do
assert i >= 0
assert i < length
return items[i]
end
# Returns a copy of `self`
fun clone: Bytes do
var b = new Bytes.with_capacity(length)
b.append(self)
return b
end
# Trims off the whitespaces at the beginning and the end of `self`
#
# var b = "102041426E6F1020" .hexdigest_to_bytes
# assert b.trim.hexdigest == "41426E6F"
#
# NOTE: A whitespace is defined here as a byte whose value is <= 0x20
fun trim: Bytes do
var st = 0
while st < length do
if self[st] > 0x20 then break
st += 1
end
if st >= length then return new Bytes.empty
var ed = length - 1
while ed > 0 do
if self[ed] > 0x20 then break
ed -= 1
end
return slice(st, ed - st + 1)
end
# Copy a subset of `self` starting at `from` and of `count` bytes
#
# var b = "abcd".to_bytes
# assert b.slice(1, 2).hexdigest == "6263"
# assert b.slice(-1, 2).hexdigest == "61"
# assert b.slice(1, 0).hexdigest == ""
# assert b.slice(2, 5).hexdigest == "6364"
fun slice(from, count: Int): Bytes do
if count <= 0 then return new Bytes.empty
if from < 0 then
count += from
if count < 0 then count = 0
from = 0
end
if (count + from) > length then count = length - from
if count <= 0 then return new Bytes.empty
var ret = new Bytes.with_capacity(count)
ret.append_ns(items.fast_cstring(from), count)
return ret
end
# Copy of `self` starting at `from`
#
# var b = "abcd".to_bytes
# assert b.slice_from(1).hexdigest == "626364"
# assert b.slice_from(-1).hexdigest == "61626364"
# assert b.slice_from(2).hexdigest == "6364"
fun slice_from(from: Int): Bytes do
if from >= length then return new Bytes.empty
if from < 0 then from = 0
return slice(from, length)
end
# Reverse the byte array in place
#
# var b = "abcd".to_bytes
# b.reverse
# assert b.to_s == "dcba"
fun reverse
do
var l = length
for i in [0..l/2[ do
var tmp = self[i]
self[i] = self[l-i-1]
self[l-i-1] = tmp
end
end
# Returns self as an hexadecimal digest.
#
# Also known as plain hexdump or postscript hexdump.
#
# ~~~
# var b = "abcd".to_bytes
# assert b.hexdigest == "61626364"
# assert b.hexdigest.hexdigest_to_bytes == b
# ~~~
fun hexdigest: String do
var elen = length * 2
var ns = new CString(elen)
var i = 0
var oi = 0
while i < length do
self[i].add_digest_at(ns, oi)
i += 1
oi += 2
end
return new FlatString.full(ns, elen, 0, elen)
end
# Return self as a C hexadecimal digest where bytes are prefixed by `\x`
#
# The output is compatible with literal stream of bytes for most languages
# including C and Nit.
#
# ~~~
# var b = "abcd".to_bytes
# assert b.chexdigest == "\\x61\\x62\\x63\\x64"
# assert b.chexdigest.unescape_to_bytes == b
# ~~~
fun chexdigest: String do
var elen = length * 4
var ns = new CString(elen)
var i = 0
var oi = 0
while i < length do
ns[oi] = u'\\'
ns[oi+1] = u'x'
self[i].add_digest_at(ns, oi+2)
i += 1
oi += 4
end
return new FlatString.full(ns, elen, 0, elen)
end
# Returns self as a stream of bits (0 and 1)
#
# ~~~
# var b = "abcd".to_bytes
# assert b.binarydigest == "01100001011000100110001101100100"
# assert b.binarydigest.binarydigest_to_bytes == b
# ~~~
fun binarydigest: String do
var elen = length * 8
var ns = new CString(elen)
var i = 0
var oi = 0
while i < length do
var c = self[i]
var b = 128
while b > 0 do
if c & b == 0 then
ns[oi] = u'0'
else
ns[oi] = u'1'
end
oi += 1
b = b >> 1
end
i += 1
end
return new FlatString.full(ns, elen, 0, elen)
end
# Interprets `self` as a big-endian integer (unsigned by default)
#
# ~~~
# var b = "0102".hexdigest_to_bytes
# assert b.to_i == 258
#
# assert "01".hexdigest_to_bytes.to_i == 1
# assert "FF".hexdigest_to_bytes.to_i == 255
# assert "0000".hexdigest_to_bytes.to_i == 0
# ~~~
#
# If `self.is_empty`, 0 is returned.
#
# ~~~
# assert "".hexdigest_to_bytes.to_i == 0
# ~~~
#
# If `signed == true`, the bytes are read as a signed integer.
# As usual, the sign bit is the left most bit, no matter the
# `length` of `self`.
#
# ~~~
# assert "01".hexdigest_to_bytes.to_i(true) == 1
# assert "FF".hexdigest_to_bytes.to_i(true) == -1
# assert "00FF".hexdigest_to_bytes.to_i(true) == 255
# assert "E0".hexdigest_to_bytes.to_i(true) == -32
# assert "FE00".hexdigest_to_bytes.to_i(true) == -512
# assert "FEFEFE".hexdigest_to_bytes.to_i(true) == -65794
# ~~~
#
# `Int::to_bytes` is a loosely reverse method.
#
# ~~~
# assert b.to_i.to_bytes == b
# assert (b.to_i + 1).to_bytes.hexdigest == "0103"
# assert "0001".hexdigest_to_bytes.to_i.to_bytes.hexdigest == "01"
#
# assert (-32).to_bytes.to_i(true) == -32
# ~~~
#
# Warning: `Int` might overflow for bytes with more than 60 bits.
fun to_i(signed: nullable Bool): Int do
var res = 0
var i = 0
while i < length do
res *= 256
res += self[i].to_i
i += 1
end
# Two's complement is `signed`
if signed == true and not_empty and first > 0x80 then
var ff = 0
for j in [0..length[ do
ff *= 0x100
ff += 0xFF
end
res = -((res ^ ff) + 1)
end
return res
end
# var b = new Bytes.with_capacity(1)
# b[0] = 101
# assert b.to_s == "e"
redef fun []=(i, v) do
if persisted then regen
assert i >= 0
assert i <= length
if i == length then add(v)
items[i] = v
end
# var b = new Bytes.empty
# b.add 101
# assert b.to_s == "e"
redef fun add(c) do
if persisted then regen
if length >= capacity then
enlarge(length)
end
items[length] = c
length += 1
end
# Adds the UTF-8 representation of `c` to `self`
#
# var b = new Bytes.empty
# b.add_char('A')
# b.add_char('キ')
# assert b.hexdigest == "41E382AD"
fun add_char(c: Char) do
if persisted then regen
var cln = c.u8char_len
var ln = length
enlarge(ln + cln)
items.set_char_at(length, c)
length += cln
end
redef fun has(c)
do
if not c isa Int then return false
return super(c&255)
end
# var b = new Bytes.empty
# b.append([104, 101, 108, 108, 111])
# assert b.to_s == "hello"
redef fun append(arr) do
if arr isa Bytes then
append_ns(arr.items, arr.length)
else
for i in arr do add i
end
end
# var b = new Bytes.empty
# b.append([0x41, 0x41, 0x18])
# b.pop
# assert b.to_s == "AA"
redef fun pop do
assert length >= 1
length -= 1
return items[length]
end
redef fun clear do length = 0
# Regenerates the buffer, necessary when it was persisted
private fun regen do
var nns = new CString(capacity)
items.copy_to(nns, length, 0, 0)
persisted = false
end
# Appends the `ln` first bytes of `ns` to self
fun append_ns(ns: CString, ln: Int) do
if persisted then regen
var nlen = length + ln
if nlen > capacity then enlarge(nlen)
ns.copy_to(items, ln, 0, length)
length += ln
end
# Appends `ln` bytes from `ns` starting at index `from` to self
fun append_ns_from(ns: CString, ln, from: Int) do
if persisted then regen
var nlen = length + ln
if nlen > capacity then enlarge(nlen)
ns.copy_to(items, ln, from, length)
length += ln
end
# Appends the bytes of `str` to `self`
fun append_text(str: Text) do str.append_to_bytes self
redef fun append_to(b) do b.append self
redef fun enlarge(sz) do
if capacity >= sz then return
persisted = false
if capacity < 16 then capacity = 16
while capacity < sz do capacity = capacity * 2 + 2
var ns = new CString(capacity)
items.copy_to(ns, length, 0, 0)
items = ns
end
redef fun to_s do
persisted = true
var b = self
var r = b.items.to_s_unsafe(length, copy=false)
if r != items then persisted = false
return r
end
redef fun iterator do return new BytesIterator.with_buffer(self)
redef fun first_index_in_from(b, from) do
if is_empty then return -1
var fst = self[0]
var bpos = fst.first_index_in_from(self, from)
for i in [0 .. length[ do
if self[i] != b[bpos] then return first_index_in_from(b, bpos + 1)
bpos += 1
end
return bpos
end
redef fun last_index_in_from(b, from) do
if is_empty then return -1
var lst = self[length - 1]
var bpos = lst.last_index_in_from(b, from)
for i in [0 .. length[.step(-1) do
if self[i] != b[bpos] then return last_index_in_from(b, bpos - 1)
bpos -= 1
end
return bpos
end
redef fun search_all_in(b) do
var ret = new Array[Int]
var pos = first_index_in_from(b, 0)
if pos == -1 then return ret
pos = pos + 1
ret.add pos
loop
pos = first_index_in_from(b, pos)
if pos == -1 then return ret
ret.add pos
pos += length
end
end
# Splits the content on self when encountering `b`
#
# var a = "String is string".to_bytes.split_with(u's')
# assert a.length == 3
# assert a[0].hexdigest == "537472696E672069"
# assert a[1].hexdigest == "20"
# assert a[2].hexdigest == "7472696E67"
fun split_with(b: BytePattern): Array[Bytes] do
var fst = b.search_all_in(self)
if fst.is_empty then return [clone]
var retarr = new Array[Bytes]
var prev = 0
for i in fst do
retarr.add(slice(prev, i - prev))
prev = i + b.pattern_length
end
retarr.add slice_from(prev)
return retarr
end
# Splits `self` in two parts at the first occurence of `b`
#
# var a = "String is string".to_bytes.split_once_on(u's')
# assert a[0].hexdigest == "537472696E672069"
# assert a[1].hexdigest == "20737472696E67"
fun split_once_on(b: BytePattern): Array[Bytes] do
var spl = b.first_index_in(self)
if spl == -1 then return [clone]
var ret = new Array[Bytes].with_capacity(2)
ret.add(slice(0, spl))
ret.add(slice_from(spl + b.pattern_length))
return ret
end
# Replaces all the occurences of `this` in `self` by `by`
#
# var b = "String is string".to_bytes.replace(0x20, 0x41)
# assert b.hexdigest == "537472696E6741697341737472696E67"
fun replace(pattern: BytePattern, bytes: BytePattern): Bytes do
if is_empty then return new Bytes.empty
var pos = pattern.search_all_in(self)
if pos.is_empty then return clone
var ret = new Bytes.with_capacity(length)
var prev = 0
for i in pos do
ret.append_ns(items.fast_cstring(prev), i - prev)
bytes.append_to ret
prev = i + pattern.pattern_length
end
ret.append(slice_from(pos.last + pattern.pattern_length))
return ret
end
# Decode `self` from percent (or URL) encoding to a clear string
#
# Invalid '%' are not decoded.
#
# assert "aBc09-._~".to_bytes.from_percent_encoding == "aBc09-._~".to_bytes
# assert "%25%28%29%3c%20%3e".to_bytes.from_percent_encoding == "%()< >".to_bytes
# assert ".com%2fpost%3fe%3dasdf%26f%3d123".to_bytes.from_percent_encoding == ".com/post?e=asdf&f=123".to_bytes
# assert "%25%28%29%3C%20%3E".to_bytes.from_percent_encoding == "%()< >".to_bytes
# assert "incomplete %".to_bytes.from_percent_encoding == "incomplete %".to_bytes
# assert "invalid % usage".to_bytes.from_percent_encoding == "invalid % usage".to_bytes
# assert "%c3%a9%e3%81%82%e3%81%84%e3%81%86".to_bytes.from_percent_encoding == "éあいう".to_bytes
# assert "%1 %A %C3%A9A9".to_bytes.from_percent_encoding == "%1 %A éA9".to_bytes
fun from_percent_encoding: Bytes do
var tmp = new Bytes.with_capacity(length)
var pos = 0
while pos < length do
var b = self[pos]
if b != u'%' then
tmp.add b
pos += 1
continue
end
if length - pos < 2 then
tmp.add u'%'
pos += 1
continue
end
var bn = self[pos + 1]
var bnn = self[pos + 2]
if not bn.is_valid_hexdigit or not bnn.is_valid_hexdigit then
tmp.add u'%'
pos += 1
continue
end
tmp.add((bn.hexdigit_to_byteval << 4) + bnn.hexdigit_to_byteval)
pos += 3
end
return tmp
end
# Is `b` a prefix of `self` ?
fun has_prefix(b: BytePattern): Bool do return b.is_prefix(self)
# Is `b` a suffix of `self` ?
fun has_suffix(b: BytePattern): Bool do return b.is_suffix(self)
redef fun is_suffix(b) do
if length > b.length then return false
var j = b.length - 1
var i = length - 1
while i > 0 do
if self[i] != b[j] then return false
i -= 1
j -= 1
end
return true
end
redef fun is_prefix(b) do
if length > b.length then return false
for i in [0 .. length[ do if self[i] != b[i] then return false
return true
end
end
lib/core/bytes.nit:235,1--772,3