intrude import text::flat
redef class Byte
+ # Write self as a string into `ns` at position `pos`
+ private fun add_digest_at(ns: NativeString, pos: Int) do
+ var tmp = (0xF0u8 & self) >> 4
+ ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
+ tmp = 0x0Fu8 & self
+ ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
+ end
++
+ # Is `self` a valid hexadecimal digit (in ASCII)
+ #
+ # ~~~nit
+ # intrude import core::bytes
+ # assert not '/'.ascii.to_b.is_valid_hexdigit
+ # assert '0'.ascii.to_b.is_valid_hexdigit
+ # assert '9'.ascii.to_b.is_valid_hexdigit
+ # assert not ':'.ascii.to_b.is_valid_hexdigit
+ # assert not '@'.ascii.to_b.is_valid_hexdigit
+ # assert 'A'.ascii.to_b.is_valid_hexdigit
+ # assert 'F'.ascii.to_b.is_valid_hexdigit
+ # assert not 'G'.ascii.to_b.is_valid_hexdigit
+ # assert not '`'.ascii.to_b.is_valid_hexdigit
+ # assert 'a'.ascii.to_b.is_valid_hexdigit
+ # assert 'f'.ascii.to_b.is_valid_hexdigit
+ # assert not 'g'.ascii.to_b.is_valid_hexdigit
+ # ~~~
+ private fun is_valid_hexdigit: Bool do
+ return (self >= 0x30u8 and self <= 0x39u8) or
+ (self >= 0x41u8 and self <= 0x46u8) or
+ (self >= 0x61u8 and self <= 0x66u8)
+ end
+
+ # `self` as a hexdigit to its byte value
+ #
+ # ~~~nit
+ # intrude import core::bytes
+ # assert 0x39u8.hexdigit_to_byteval == 0x09u8
+ # assert 0x43u8.hexdigit_to_byteval == 0x0Cu8
+ # ~~~
+ #
+ # REQUIRE: `self.is_valid_hexdigit`
+ private fun hexdigit_to_byteval: Byte do
+ if self >= 0x30u8 and self <= 0x39u8 then
+ return self - 0x30u8
+ else if self >= 0x41u8 and self <= 0x46u8 then
+ return self - 0x37u8
+ else if self >= 0x61u8 and self <= 0x66u8 then
+ return self - 0x57u8
+ end
+ # Happens only if the requirement is not met.
+ # i.e. this abort is here to please the compiler
+ abort
+ end
end
# A buffer containing Byte-manipulation facilities
super AbstractArray[Byte]
# A NativeString being a char*, it can be used as underlying representation here.
- private var items: NativeString
+ var items: NativeString
# Number of bytes in the array
redef var length
return items[i]
end
+ # Returns self as a hexadecimal digest
+ fun hexdigest: String do
+ var elen = length * 2
+ var ns = new NativeString(elen)
+ var i = 0
+ var oi = 0
+ while i < length do
+ self[i].add_digest_at(ns, oi)
+ i += 1
+ oi += 2
+ end
+ return new FlatString.full(ns, elen, 0, elen - 1, elen)
+ end
+
# var b = new Bytes.with_capacity(1)
# b[0] = 101u8
# assert b.to_s == "e"
redef fun to_s do
persisted = true
var b = self
- if not is_utf8 then
- b = clean_utf8
- persisted = false
- end
- return new FlatString.with_infos(b.items, b.length, 0, b.length -1)
+ var r = b.items.to_s_with_length(length)
+ if r != items then persisted = false
+ return r
end
redef fun iterator do return new BytesIterator.with_buffer(self)
- # Is the byte collection valid UTF-8 ?
- fun is_utf8: Bool do
- var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
- var lobounds = once [0, 0x80, 0x800, 0x10000]
- var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
- var pos = 0
- var len = length
- var mits = items
- while pos < len do
- var nxst = mits.length_of_char_at(pos)
- var charst_index = (nxst - 1) * 2
- if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
- var c = mits.char_at(pos)
- var cp = c.ascii
- if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
- if cp >= 0xD800 and cp <= 0xDFFF or
- cp == 0xFFFE or cp == 0xFFFF then return false
- else
- return false
- end
- else
- return false
- end
- pos += nxst
- end
- return true
- end
-
- # Cleans the bytes of `self` to be UTF-8 compliant
- private fun clean_utf8: Bytes do
- var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
- var badchar = once [0xEFu8, 0xBFu8, 0xBDu8]
- var lobounds = once [0, 0x80, 0x800, 0x10000]
- var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
- var pos = 0
- var len = length
- var ret = new Bytes.with_capacity(len)
- var mits = items
- while pos < len do
- var nxst = mits.length_of_char_at(pos)
- var charst_index = (nxst - 1) * 2
- if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
- var c = mits.char_at(pos)
- var cp = c.ascii
- if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
- if cp >= 0xD800 and cp <= 0xDFFF or
- cp == 0xFFFE or cp == 0xFFFF then
- ret.append badchar
- pos += 1
- else
- var pend = pos + nxst
- for i in [pos .. pend[ do ret.add mits[i]
- pos += nxst
- end
- else
- ret.append badchar
- pos += 1
- end
- else
- ret.append badchar
- pos += 1
- end
- end
- return ret
- end
end
private class BytesIterator
var max: Int
- init with_buffer(b: Bytes) do init(b.items, 0, b.length - 1)
+ init with_buffer(b: Bytes) do init(b.items, 0, b.length)
redef fun is_ok do return index < max
return b
end
+ # Is `self` a valid hexdigest ?
+ #
+ # assert "0B1d3F".is_valid_hexdigest
+ # assert not "5G".is_valid_hexdigest
+ fun is_valid_hexdigest: Bool do
+ for i in bytes do if not i.is_valid_hexdigit then return false
+ return true
+ end
+
# Appends `self.bytes` to `b`
fun append_to_bytes(b: Bytes) do
for s in substrings do
b.append_ns_from(s.items, s.bytelen, from)
end
end
+
+ # Returns a new `Bytes` instance with the digest as content
+ #
+ # assert "0B1F4D".hexdigest_to_bytes == [0x0Bu8, 0x1Fu8, 0x4Du8]
+ #
+ # REQUIRE: `self` is a valid hexdigest and hexdigest.length % 2 == 0
+ fun hexdigest_to_bytes: Bytes do
+ var b = bytes
+ var pos = 0
+ var max = bytelen
+ var ret = new Bytes.with_capacity(max / 2)
+ while pos < max do
+ ret.add((b[pos].hexdigit_to_byteval << 4) |
+ b[pos + 1].hexdigit_to_byteval)
+ pos += 2
+ end
+ return ret
+ end
end
redef class FlatText