lib/core: Added new `hexdigest` service on `Text`
[nit.git] / lib / core / bytes.nit
index 59c4c5f..94d3d27 100644 (file)
@@ -19,6 +19,61 @@ import kernel
 import collection::array
 intrude import text::flat
 
+redef class Byte
+       # Write self as a string into `ns` at position `pos`
+       private fun add_digest_at(ns: NativeString, pos: Int) do
+               var tmp = (0xF0u8 & self) >> 4
+               ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
+               tmp = 0x0Fu8 & self
+               ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
+       end
+
+       # Is `self` a valid hexadecimal digit (in ASCII)
+       #
+       # ~~~nit
+       # intrude import core::bytes
+       # assert not '/'.ascii.is_valid_hexdigit
+       # assert '0'.ascii.is_valid_hexdigit
+       # assert '9'.ascii.is_valid_hexdigit
+       # assert not ':'.ascii.is_valid_hexdigit
+       # assert not '@'.ascii.is_valid_hexdigit
+       # assert 'A'.ascii.is_valid_hexdigit
+       # assert 'F'.ascii.is_valid_hexdigit
+       # assert not 'G'.ascii.is_valid_hexdigit
+       # assert not '`'.ascii.is_valid_hexdigit
+       # assert 'a'.ascii.is_valid_hexdigit
+       # assert 'f'.ascii.is_valid_hexdigit
+       # assert not 'g'.ascii.is_valid_hexdigit
+       # ~~~
+       private fun is_valid_hexdigit: Bool do
+               return (self >= 0x30u8 and self <= 0x39u8) or
+                      (self >= 0x41u8 and self <= 0x46u8) or
+                      (self >= 0x61u8 and self <= 0x66u8)
+       end
+
+       # `self` as a hexdigit to its byte value
+       #
+       # ~~~nit
+       # intrude import core::bytes
+       # assert 0x39u8.hexdigit_to_byteval == 0x09u8
+       # assert 0x43u8.hexdigit_to_byteval == 0x0Cu8
+       # ~~~
+       #
+       # REQUIRE: `self.is_valid_hexdigit`
+       private fun hexdigit_to_byteval: Byte do
+               if self >= 0x30u8 and self <= 0x39u8 then
+                       return self - 0x30u8
+               else if self >= 0x41u8 and self <= 0x46u8 then
+                       return self - 0x37u8
+               else if self >= 0x61u8 and self <= 0x66u8 then
+                       return self - 0x57u8
+               end
+               # Happens only if the requirement is not met.
+               # i.e. this abort is here to please the compiler
+               abort
+       end
+end
+
 # A buffer containing Byte-manipulation facilities
 #
 # Uses Copy-On-Write when persisted
@@ -26,7 +81,7 @@ class Bytes
        super AbstractArray[Byte]
 
        # A NativeString being a char*, it can be used as underlying representation here.
-       private var items: NativeString
+       var items: NativeString
 
        # Number of bytes in the array
        redef var length
@@ -63,6 +118,20 @@ class Bytes
                return items[i]
        end
 
+       # Returns self as a hexadecimal digest
+       fun hexdigest: String do
+               var elen = length * 2
+               var ns = new NativeString(elen)
+               var i = 0
+               var oi = 0
+               while i < length do
+                       self[i].add_digest_at(ns, oi)
+                       i += 1
+                       oi += 2
+               end
+               return new FlatString.full(ns, elen, 0, elen - 1, elen)
+       end
+
        #     var b = new Bytes.with_capacity(1)
        #     b[0] = 101u8
        #     assert b.to_s == "e"
@@ -146,80 +215,13 @@ class Bytes
        redef fun to_s do
                persisted = true
                var b = self
-               if not is_utf8 then
-                       b = clean_utf8
-                       persisted = false
-               end
-               return new FlatString.with_infos(b.items, b.length, 0, b.length -1)
+               var r = b.items.to_s_with_length(length)
+               if r != items then persisted = false
+               return r
        end
 
        redef fun iterator do return new BytesIterator.with_buffer(self)
 
-       # Is the byte collection valid UTF-8 ?
-       fun is_utf8: Bool do
-               var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
-               var lobounds = once [0, 0x80, 0x800, 0x10000]
-               var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
-               var pos = 0
-               var len = length
-               var mits = items
-               while pos < len do
-                       var nxst = mits.length_of_char_at(pos)
-                       var charst_index = (nxst - 1) * 2
-                       if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
-                               var c = mits.char_at(pos)
-                               var cp = c.ascii
-                               if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
-                                       if cp >= 0xD800 and cp <= 0xDFFF or
-                                          cp == 0xFFFE or cp == 0xFFFF then return false
-                               else
-                                       return false
-                               end
-                       else
-                               return false
-                       end
-                       pos += nxst
-               end
-               return true
-       end
-
-       # Cleans the bytes of `self` to be UTF-8 compliant
-       private fun clean_utf8: Bytes do
-               var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
-               var badchar = once [0xEFu8, 0xBFu8, 0xBDu8]
-               var lobounds = once [0, 0x80, 0x800, 0x10000]
-               var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
-               var pos = 0
-               var len = length
-               var ret = new Bytes.with_capacity(len)
-               var mits = items
-               while pos < len do
-                       var nxst = mits.length_of_char_at(pos)
-                       var charst_index = (nxst - 1) * 2
-                       if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
-                               var c = mits.char_at(pos)
-                               var cp = c.ascii
-                               if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
-                                       if cp >= 0xD800 and cp <= 0xDFFF or
-                                          cp == 0xFFFE or cp == 0xFFFF then
-                                               ret.append badchar
-                                               pos += 1
-                                       else
-                                               var pend = pos + nxst
-                                               for i in [pos .. pend[ do ret.add mits[i]
-                                               pos += nxst
-                                       end
-                               else
-                                       ret.append badchar
-                                       pos += 1
-                               end
-                       else
-                               ret.append badchar
-                               pos += 1
-                       end
-               end
-               return ret
-       end
 end
 
 private class BytesIterator
@@ -231,7 +233,7 @@ private class BytesIterator
 
        var max: Int
 
-       init with_buffer(b: Bytes) do init(b.items, 0, b.length - 1)
+       init with_buffer(b: Bytes) do init(b.items, 0, b.length)
 
        redef fun is_ok do return index < max
 
@@ -253,6 +255,15 @@ redef class Text
                return b
        end
 
+       # Is `self` a valid hexdigest ?
+       #
+       #     assert "0B1d3F".is_valid_hexdigest
+       #     assert not "5G".is_valid_hexdigest
+       fun is_valid_hexdigest: Bool do
+               for i in bytes do if not i.is_valid_hexdigit then return false
+               return true
+       end
+
        # Appends `self.bytes` to `b`
        fun append_to_bytes(b: Bytes) do
                for s in substrings do
@@ -260,6 +271,38 @@ redef class Text
                        b.append_ns_from(s.items, s.bytelen, from)
                end
        end
+
+       # Returns a new `Bytes` instance with the digest as content
+       #
+       #     assert "0B1F4D".hexdigest_to_bytes == [0x0Bu8, 0x1Fu8, 0x4Du8]
+       #
+       # REQUIRE: `self` is a valid hexdigest and hexdigest.length % 2 == 0
+       fun hexdigest_to_bytes: Bytes do
+               var b = bytes
+               var pos = 0
+               var max = bytelen
+               var ret = new Bytes.with_capacity(max / 2)
+               while pos < max do
+                       ret.add((b[pos].hexdigit_to_byteval << 4) |
+                       b[pos + 1].hexdigit_to_byteval)
+                       pos += 2
+               end
+               return ret
+       end
+
+       # Gets the hexdigest of the bytes of `self`
+       #
+       #     assert "&lt;STRING&#47;&rt;".hexdigest == "266C743B535452494E47262334373B2672743B"
+       fun hexdigest: String do
+               var ln = bytelen
+               var outns = new NativeString(ln * 2)
+               var oi = 0
+               for i in [0 .. ln[ do
+                       bytes[i].add_digest_at(outns, oi)
+                       oi += 2
+               end
+               return new FlatString.with_infos(outns, ln * 2, 0, ln * 2 - 1)
+       end
 end
 
 redef class FlatText