From: Jean Privat Date: Tue, 15 Sep 2015 16:59:10 +0000 (-0400) Subject: Merge: Bytes from hexdigest X-Git-Tag: v0.7.8~22 X-Git-Url: http://nitlanguage.org?hp=-c Merge: Bytes from hexdigest As @xymus requested in #1705, a new method is available in Text, `hexdigest_to_bytes` which transforms a regular hexdigest in its Bytes counterpart. Ping @ppepos this could please your Pythonneous eyes Pull-Request: #1716 Reviewed-by: Alexis Laferrière Reviewed-by: Jean Privat --- 32b79dde9c35e1dd5a840cdc26b64cd896fb3407 diff --combined lib/core/bytes.nit index d589646,75d7ccd..356f386 --- a/lib/core/bytes.nit +++ b/lib/core/bytes.nit @@@ -20,13 -20,50 +20,58 @@@ import collection::arra intrude import text::flat redef class Byte + # Write self as a string into `ns` at position `pos` + private fun add_digest_at(ns: NativeString, pos: Int) do + var tmp = (0xF0u8 & self) >> 4 + ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8 + tmp = 0x0Fu8 & self + ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8 + end ++ + # Is `self` a valid hexadecimal digit (in ASCII) + # + # ~~~nit + # intrude import core::bytes + # assert not '/'.ascii.to_b.is_valid_hexdigit + # assert '0'.ascii.to_b.is_valid_hexdigit + # assert '9'.ascii.to_b.is_valid_hexdigit + # assert not ':'.ascii.to_b.is_valid_hexdigit + # assert not '@'.ascii.to_b.is_valid_hexdigit + # assert 'A'.ascii.to_b.is_valid_hexdigit + # assert 'F'.ascii.to_b.is_valid_hexdigit + # assert not 'G'.ascii.to_b.is_valid_hexdigit + # assert not '`'.ascii.to_b.is_valid_hexdigit + # assert 'a'.ascii.to_b.is_valid_hexdigit + # assert 'f'.ascii.to_b.is_valid_hexdigit + # assert not 'g'.ascii.to_b.is_valid_hexdigit + # ~~~ + private fun is_valid_hexdigit: Bool do + return (self >= 0x30u8 and self <= 0x39u8) or + (self >= 0x41u8 and self <= 0x46u8) or + (self >= 0x61u8 and self <= 0x66u8) + end + + # `self` as a hexdigit to its byte value + # + # ~~~nit + # intrude import core::bytes + # assert 0x39u8.hexdigit_to_byteval == 0x09u8 + # assert 0x43u8.hexdigit_to_byteval == 0x0Cu8 + # ~~~ + # + # REQUIRE: `self.is_valid_hexdigit` + private fun hexdigit_to_byteval: Byte do + if self >= 0x30u8 and self <= 0x39u8 then + return self - 0x30u8 + else if self >= 0x41u8 and self <= 0x46u8 then + return self - 0x37u8 + else if self >= 0x61u8 and self <= 0x66u8 then + return self - 0x57u8 + end + # Happens only if the requirement is not met. + # i.e. this abort is here to please the compiler + abort + end end # A buffer containing Byte-manipulation facilities @@@ -36,7 -73,7 +81,7 @@@ class Byte super AbstractArray[Byte] # A NativeString being a char*, it can be used as underlying representation here. - private var items: NativeString + var items: NativeString # Number of bytes in the array redef var length @@@ -73,20 -110,6 +118,20 @@@ return items[i] end + # Returns self as a hexadecimal digest + fun hexdigest: String do + var elen = length * 2 + var ns = new NativeString(elen) + var i = 0 + var oi = 0 + while i < length do + self[i].add_digest_at(ns, oi) + i += 1 + oi += 2 + end + return new FlatString.full(ns, elen, 0, elen - 1, elen) + end + # var b = new Bytes.with_capacity(1) # b[0] = 101u8 # assert b.to_s == "e" @@@ -170,13 -193,80 +215,13 @@@ redef fun to_s do persisted = true var b = self - if not is_utf8 then - b = clean_utf8 - persisted = false - end - return new FlatString.with_infos(b.items, b.length, 0, b.length -1) + var r = b.items.to_s_with_length(length) + if r != items then persisted = false + return r end redef fun iterator do return new BytesIterator.with_buffer(self) - # Is the byte collection valid UTF-8 ? - fun is_utf8: Bool do - var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8] - var lobounds = once [0, 0x80, 0x800, 0x10000] - var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF] - var pos = 0 - var len = length - var mits = items - while pos < len do - var nxst = mits.length_of_char_at(pos) - var charst_index = (nxst - 1) * 2 - if mits[pos] & charst[charst_index] == charst[charst_index + 1] then - var c = mits.char_at(pos) - var cp = c.ascii - if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then - if cp >= 0xD800 and cp <= 0xDFFF or - cp == 0xFFFE or cp == 0xFFFF then return false - else - return false - end - else - return false - end - pos += nxst - end - return true - end - - # Cleans the bytes of `self` to be UTF-8 compliant - private fun clean_utf8: Bytes do - var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8] - var badchar = once [0xEFu8, 0xBFu8, 0xBDu8] - var lobounds = once [0, 0x80, 0x800, 0x10000] - var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF] - var pos = 0 - var len = length - var ret = new Bytes.with_capacity(len) - var mits = items - while pos < len do - var nxst = mits.length_of_char_at(pos) - var charst_index = (nxst - 1) * 2 - if mits[pos] & charst[charst_index] == charst[charst_index + 1] then - var c = mits.char_at(pos) - var cp = c.ascii - if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then - if cp >= 0xD800 and cp <= 0xDFFF or - cp == 0xFFFE or cp == 0xFFFF then - ret.append badchar - pos += 1 - else - var pend = pos + nxst - for i in [pos .. pend[ do ret.add mits[i] - pos += nxst - end - else - ret.append badchar - pos += 1 - end - else - ret.append badchar - pos += 1 - end - end - return ret - end end private class BytesIterator @@@ -188,7 -278,7 +233,7 @@@ var max: Int - init with_buffer(b: Bytes) do init(b.items, 0, b.length - 1) + init with_buffer(b: Bytes) do init(b.items, 0, b.length) redef fun is_ok do return index < max @@@ -210,6 -300,15 +255,15 @@@ redef class Tex return b end + # Is `self` a valid hexdigest ? + # + # assert "0B1d3F".is_valid_hexdigest + # assert not "5G".is_valid_hexdigest + fun is_valid_hexdigest: Bool do + for i in bytes do if not i.is_valid_hexdigit then return false + return true + end + # Appends `self.bytes` to `b` fun append_to_bytes(b: Bytes) do for s in substrings do @@@ -217,6 -316,24 +271,24 @@@ b.append_ns_from(s.items, s.bytelen, from) end end + + # Returns a new `Bytes` instance with the digest as content + # + # assert "0B1F4D".hexdigest_to_bytes == [0x0Bu8, 0x1Fu8, 0x4Du8] + # + # REQUIRE: `self` is a valid hexdigest and hexdigest.length % 2 == 0 + fun hexdigest_to_bytes: Bytes do + var b = bytes + var pos = 0 + var max = bytelen + var ret = new Bytes.with_capacity(max / 2) + while pos < max do + ret.add((b[pos].hexdigit_to_byteval << 4) | + b[pos + 1].hexdigit_to_byteval) + pos += 2 + end + return ret + end end redef class FlatText