Merge: Bytes from hexdigest
authorJean Privat <jean@pryen.org>
Tue, 15 Sep 2015 16:59:10 +0000 (12:59 -0400)
committerJean Privat <jean@pryen.org>
Tue, 15 Sep 2015 16:59:10 +0000 (12:59 -0400)
As @xymus requested in #1705, a new method is available in Text, `hexdigest_to_bytes` which transforms a regular hexdigest in its Bytes counterpart.

Ping @ppepos this could please your Pythonneous eyes

Pull-Request: #1716
Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>
Reviewed-by: Jean Privat <jean@pryen.org>

1  2 
lib/core/bytes.nit

diff --combined lib/core/bytes.nit
@@@ -20,13 -20,50 +20,58 @@@ import collection::arra
  intrude import text::flat
  
  redef class Byte
 +      # Write self as a string into `ns` at position `pos`
 +      private fun add_digest_at(ns: NativeString, pos: Int) do
 +              var tmp = (0xF0u8 & self) >> 4
 +              ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
 +              tmp = 0x0Fu8 & self
 +              ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
 +      end
++
+       # Is `self` a valid hexadecimal digit (in ASCII)
+       #
+       # ~~~nit
+       # intrude import core::bytes
+       # assert not '/'.ascii.to_b.is_valid_hexdigit
+       # assert '0'.ascii.to_b.is_valid_hexdigit
+       # assert '9'.ascii.to_b.is_valid_hexdigit
+       # assert not ':'.ascii.to_b.is_valid_hexdigit
+       # assert not '@'.ascii.to_b.is_valid_hexdigit
+       # assert 'A'.ascii.to_b.is_valid_hexdigit
+       # assert 'F'.ascii.to_b.is_valid_hexdigit
+       # assert not 'G'.ascii.to_b.is_valid_hexdigit
+       # assert not '`'.ascii.to_b.is_valid_hexdigit
+       # assert 'a'.ascii.to_b.is_valid_hexdigit
+       # assert 'f'.ascii.to_b.is_valid_hexdigit
+       # assert not 'g'.ascii.to_b.is_valid_hexdigit
+       # ~~~
+       private fun is_valid_hexdigit: Bool do
+               return (self >= 0x30u8 and self <= 0x39u8) or
+                      (self >= 0x41u8 and self <= 0x46u8) or
+                      (self >= 0x61u8 and self <= 0x66u8)
+       end
+       # `self` as a hexdigit to its byte value
+       #
+       # ~~~nit
+       # intrude import core::bytes
+       # assert 0x39u8.hexdigit_to_byteval == 0x09u8
+       # assert 0x43u8.hexdigit_to_byteval == 0x0Cu8
+       # ~~~
+       #
+       # REQUIRE: `self.is_valid_hexdigit`
+       private fun hexdigit_to_byteval: Byte do
+               if self >= 0x30u8 and self <= 0x39u8 then
+                       return self - 0x30u8
+               else if self >= 0x41u8 and self <= 0x46u8 then
+                       return self - 0x37u8
+               else if self >= 0x61u8 and self <= 0x66u8 then
+                       return self - 0x57u8
+               end
+               # Happens only if the requirement is not met.
+               # i.e. this abort is here to please the compiler
+               abort
+       end
  end
  
  # A buffer containing Byte-manipulation facilities
@@@ -36,7 -73,7 +81,7 @@@ class Byte
        super AbstractArray[Byte]
  
        # A NativeString being a char*, it can be used as underlying representation here.
 -      private var items: NativeString
 +      var items: NativeString
  
        # Number of bytes in the array
        redef var length
                return items[i]
        end
  
 +      # Returns self as a hexadecimal digest
 +      fun hexdigest: String do
 +              var elen = length * 2
 +              var ns = new NativeString(elen)
 +              var i = 0
 +              var oi = 0
 +              while i < length do
 +                      self[i].add_digest_at(ns, oi)
 +                      i += 1
 +                      oi += 2
 +              end
 +              return new FlatString.full(ns, elen, 0, elen - 1, elen)
 +      end
 +
        #     var b = new Bytes.with_capacity(1)
        #     b[0] = 101u8
        #     assert b.to_s == "e"
        redef fun to_s do
                persisted = true
                var b = self
 -              if not is_utf8 then
 -                      b = clean_utf8
 -                      persisted = false
 -              end
 -              return new FlatString.with_infos(b.items, b.length, 0, b.length -1)
 +              var r = b.items.to_s_with_length(length)
 +              if r != items then persisted = false
 +              return r
        end
  
        redef fun iterator do return new BytesIterator.with_buffer(self)
  
 -      # Is the byte collection valid UTF-8 ?
 -      fun is_utf8: Bool do
 -              var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
 -              var lobounds = once [0, 0x80, 0x800, 0x10000]
 -              var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
 -              var pos = 0
 -              var len = length
 -              var mits = items
 -              while pos < len do
 -                      var nxst = mits.length_of_char_at(pos)
 -                      var charst_index = (nxst - 1) * 2
 -                      if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
 -                              var c = mits.char_at(pos)
 -                              var cp = c.ascii
 -                              if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
 -                                      if cp >= 0xD800 and cp <= 0xDFFF or
 -                                         cp == 0xFFFE or cp == 0xFFFF then return false
 -                              else
 -                                      return false
 -                              end
 -                      else
 -                              return false
 -                      end
 -                      pos += nxst
 -              end
 -              return true
 -      end
 -
 -      # Cleans the bytes of `self` to be UTF-8 compliant
 -      private fun clean_utf8: Bytes do
 -              var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
 -              var badchar = once [0xEFu8, 0xBFu8, 0xBDu8]
 -              var lobounds = once [0, 0x80, 0x800, 0x10000]
 -              var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
 -              var pos = 0
 -              var len = length
 -              var ret = new Bytes.with_capacity(len)
 -              var mits = items
 -              while pos < len do
 -                      var nxst = mits.length_of_char_at(pos)
 -                      var charst_index = (nxst - 1) * 2
 -                      if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
 -                              var c = mits.char_at(pos)
 -                              var cp = c.ascii
 -                              if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
 -                                      if cp >= 0xD800 and cp <= 0xDFFF or
 -                                         cp == 0xFFFE or cp == 0xFFFF then
 -                                              ret.append badchar
 -                                              pos += 1
 -                                      else
 -                                              var pend = pos + nxst
 -                                              for i in [pos .. pend[ do ret.add mits[i]
 -                                              pos += nxst
 -                                      end
 -                              else
 -                                      ret.append badchar
 -                                      pos += 1
 -                              end
 -                      else
 -                              ret.append badchar
 -                              pos += 1
 -                      end
 -              end
 -              return ret
 -      end
  end
  
  private class BytesIterator
  
        var max: Int
  
-       init with_buffer(b: Bytes) do init(b.items, 0, b.length - 1)
+       init with_buffer(b: Bytes) do init(b.items, 0, b.length)
  
        redef fun is_ok do return index < max
  
@@@ -210,6 -300,15 +255,15 @@@ redef class Tex
                return b
        end
  
+       # Is `self` a valid hexdigest ?
+       #
+       #     assert "0B1d3F".is_valid_hexdigest
+       #     assert not "5G".is_valid_hexdigest
+       fun is_valid_hexdigest: Bool do
+               for i in bytes do if not i.is_valid_hexdigit then return false
+               return true
+       end
        # Appends `self.bytes` to `b`
        fun append_to_bytes(b: Bytes) do
                for s in substrings do
                        b.append_ns_from(s.items, s.bytelen, from)
                end
        end
+       # Returns a new `Bytes` instance with the digest as content
+       #
+       #     assert "0B1F4D".hexdigest_to_bytes == [0x0Bu8, 0x1Fu8, 0x4Du8]
+       #
+       # REQUIRE: `self` is a valid hexdigest and hexdigest.length % 2 == 0
+       fun hexdigest_to_bytes: Bytes do
+               var b = bytes
+               var pos = 0
+               var max = bytelen
+               var ret = new Bytes.with_capacity(max / 2)
+               while pos < max do
+                       ret.add((b[pos].hexdigit_to_byteval << 4) |
+                       b[pos + 1].hexdigit_to_byteval)
+                       pos += 2
+               end
+               return ret
+       end
  end
  
  redef class FlatText