Merge: Clean UTF-8 string update
authorJean Privat <jean@pryen.org>
Tue, 15 Sep 2015 16:57:16 +0000 (12:57 -0400)
committerJean Privat <jean@pryen.org>
Tue, 15 Sep 2015 16:57:16 +0000 (12:57 -0400)
Since quite some time now we've had the cleaning function for Bytes that ensured that what was coming from the exterior was clean and could be transformed safely to a String.

This is now generalized to any NativeString, and the clean function will be called each time a NativeString is `to_s`'d

At the same time, `clean_utf8` is now better performing (for `Files::read_all`, Ir per call is roughly 40% less than before), which limits the impacts of the new strategy.

Furthermore, the string produced by `NativeString::clean_utf8` has its length calculated which saves time on later operations on the string.

It also limits the number of calls by avoiding allocations if not necessary (if the string is already clean, which should happen a lot more often than not).

As for performances,

Valgrind `./bin/nitc src/nitc.nit`:
Before: 14.040 GIr
After: 13.859 GIr

Time, best of 10 for `./bin/nitc src/nitc.nit -o bin/nitc`:
Before: 0m4.989s
After: 0m4.933s

Time, best of 10 for `./bin/nitc --semi-global src/nitc.nit -o bin/nitc`:
Before: 0m4.696s
After: 0m4.691s

Pretty much equivalent in real time, and a bit better in Valgrind, not bad considering every String is now cleaner than ever !

Pull-Request: #1705
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>

12 files changed:
contrib/opportunity/src/opportunity_controller.nit
contrib/opportunity/src/opportunity_model.nit
examples/rosettacode/sha_1.nit
lib/base64.nit
lib/core/bytes.nit
lib/core/stream.nit
lib/core/text/flat.nit
lib/sha1.nit
lib/websocket/websocket.nit
tests/sav/nitcg/test_text_stat.res
tests/sav/nitserial_args1.res
tests/sav/test_text_stat.res

index 868dd9e..26a4ef6 100644 (file)
@@ -16,7 +16,6 @@
 module opportunity_controller
 
 import nitcorn
-import sha1
 import templates
 import opportunity_model
 
index ef7654d..cc72f23 100644 (file)
@@ -247,7 +247,7 @@ class Meetup
        redef fun commit(db) do
                if id == "" then
                        var time = get_time
-                       var tmpid = (name + date + place + time.to_s).sha1_to_s
+                       var tmpid = (name + date + place + time.to_s).sha1.hexdigest
                        if not db.execute("INSERT INTO meetups (id, name, date, place, answer_mode) VALUES({tmpid.to_sql_string}, {name.html_escape.to_sql_string}, {date.html_escape.to_sql_string}, {place.html_escape.to_sql_string}, {answer_mode});") then
                                print "Error recording entry Meetup {self}"
                                print db.error or else "Null error"
index 2eaf471..8776d48 100644 (file)
@@ -9,4 +9,4 @@ module sha_1
 
 import sha1
 
-print "Rosetta Code".sha1_to_s
+print "Rosetta Code".sha1.hexdigest
index 5f1c920..4eae395 100644 (file)
 # Offers the base 64 encoding and decoding algorithms
 module base64
 
-redef class String
-
+redef class NativeString
        # Alphabet used by the base64 algorithm
-       private fun base64_chars : String
+       private fun base64_chars : SequenceRead[Byte]
        do
-               return "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+               return "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".bytes
        end
+
+       # Reversed alphabet for base64
        private fun inverted_base64_chars : HashMap[Byte, Byte]
        do
                var inv_base64_chars = new HashMap[Byte, Byte]
-               for k in [0..base64_chars.bytelen[ do
-                       inv_base64_chars[base64_chars.bytes[k]] = k.to_b
+               var l = base64_chars.length
+               for k in [0 .. l[ do
+                       inv_base64_chars[base64_chars[k]] = k.to_b
                end
                return inv_base64_chars
        end
 
-       # Encodes the receiver string to base64.
+       # Encodes `self` to base64.
+       #
        # By default, uses "=" for padding.
-       fun encode_base64 : String do return encode_base64_custom_padding('='.ascii.to_b)
-
-       # Encodes the receiver string to base64 using a custom padding character.
        #
-       # If using the default padding character `=`, see `encode_base64`.
-       fun encode_base64_custom_padding(padding : Byte) : String
-       do
-               var base64_bytes = once base64_chars.bytes
-               var length = bytelen
-
+       #     assert "string".encode_base64 == "c3RyaW5n"
+       private fun encode_base64(length: Int, padding: nullable Byte): Bytes do
+               var base64_bytes = once base64_chars
+               if padding == null then padding = '='.ascii.to_b
                var steps = length / 3
                var bytes_in_last_step = length % 3
                var result_length = steps * 4
                if bytes_in_last_step > 0 then result_length += 4
-               var result = new NativeString(result_length + 1)
-               var bytes = self.bytes
-               result[result_length] = 0u8
-
-               var mask_6bit = 0b0011_1111
+               var result = new Bytes.with_capacity(result_length)
 
+               var in_off = 0
                for s in [0 .. steps[ do
-                       var e = 0
-                       for ss in [0 .. 3[ do
-                               e += bytes[s * 3 + ss].to_i << ((2 - ss) * 8)
-                       end
-                       for ss in [0..4[ do
-                               result[s * 4 + 3 - ss] = base64_bytes[(e >> (ss * 6)) & mask_6bit]
-                       end
+                       var ind = ((self[in_off] & 0b1111_1100u8) >> 2).to_i
+                       result.add base64_bytes[ind]
+                       ind = ((self[in_off] & 0b0000_0011u8) << 4).to_i | ((self[in_off + 1] & 0b1111_0000u8) >> 4).to_i
+                       result.add base64_bytes[ind]
+                       ind = ((self[in_off + 1] & 0b0000_1111u8) << 2).to_i | ((self[in_off + 2] & 0b1100_0000u8) >> 6).to_i
+                       result.add base64_bytes[ind]
+                       ind = (self[in_off + 2] & 0b0011_1111u8).to_i
+                       result.add base64_bytes[ind]
+                       in_off += 3
                end
-
-               var out_off = result_length - 4
-               var in_off = length - bytes_in_last_step
                if bytes_in_last_step == 1 then
-                       result[out_off] = base64_bytes[((bytes[in_off] & 0b1111_1100u8) >> 2).to_i]
-                       result[out_off + 1] = base64_bytes[((bytes[in_off] & 0b0000_0011u8) << 4).to_i]
-                       out_off += 2
+                       result.add base64_bytes[((self[in_off] & 0b1111_1100u8) >> 2).to_i]
+                       result.add base64_bytes[((self[in_off] & 0b0000_0011u8) << 4).to_i]
                else if bytes_in_last_step == 2 then
-                       result[out_off] = base64_bytes[((bytes[in_off] & 0b1111_1100u8) >> 2).to_i]
-                       result[out_off + 1] = base64_bytes[(((bytes[in_off] & 0b0000_0011u8) << 4) | ((bytes[in_off + 1] & 0b1111_0000u8) >> 4)).to_i]
-                       result[out_off + 2] = base64_bytes[((bytes[in_off + 1] & 0b0000_1111u8) << 2).to_i]
-                       out_off += 3
-               end
-               if bytes_in_last_step > 0 then
-                       for i in [out_off .. result_length[ do result[i] = padding
+                       result.add base64_bytes[((self[in_off] & 0b1111_1100u8) >> 2).to_i]
+                       result.add base64_bytes[(((self[in_off] & 0b0000_0011u8) << 4) | ((self[in_off + 1] & 0b1111_0000u8) >> 4)).to_i]
+                       result.add base64_bytes[((self[in_off + 1] & 0b0000_1111u8) << 2).to_i]
                end
+               var rempad = if bytes_in_last_step > 0 then 3 - bytes_in_last_step else 0
+               for i in [0 .. rempad[ do result.add padding
 
-               return result.to_s_with_length(result_length)
+               return result
        end
 
-       # Decodes the receiver string from base64.
-       # By default, uses "=" for padding.
-       fun decode_base64 : String do return decode_base64_custom_padding('='.ascii.to_b)
-
-       # Decodes the receiver string to base64 using a custom padding character.
+       # Decodes `self` from base64
        #
-       # If using the default padding character `=`, see `decode_base64`.
-       fun decode_base64_custom_padding(padding : Byte) : String
-       do
+       #      assert "c3RyaW5n".decode_base64 == "string"
+       #
+       # REQUIRE: `length % 4 == 0`
+       private fun decode_base64(length: Int, padding: nullable Byte): Bytes do
+               if padding == null then padding = '='.ascii.to_b
                var inv = once inverted_base64_chars
-               var length = bytelen
-               if length == 0 then return ""
+               if length == 0 then return new Bytes.empty
                assert length % 4 == 0 else print "base64::decode_base64 only supports strings of length multiple of 4"
 
-               var bytes = self.bytes
+               var bytes = self
                var steps = length / 4
                var result_length = steps * 3
 
@@ -113,17 +101,16 @@ redef class String
                if padding_len == 1 then result_length -= 1
                if padding_len == 2 then result_length -= 2
 
-               var result = new NativeString(result_length + 1)
-               result[result_length] = 0u8
+               var result = new Bytes.with_capacity(result_length + 1)
 
                for s in [0 .. steps[ do
                        var c0 = inv[bytes[s * 4]]
                        var c1 = inv[bytes[s * 4 + 1]]
                        var c2 = inv[bytes[s * 4 + 2]]
                        var c3 = inv[bytes[s * 4 + 3]]
-                       result[s * 3] = ((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4)
-                       result[s * 3 + 1] = ((c1 & 0b0000_1111u8) << 4) | ((c2 & 0b0011_1100u8) >> 2)
-                       result[s * 3 + 2] = ((c2 & 0b0000_0011u8) << 6) | (c3 & 0b0011_1111u8)
+                       result.add (((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4))
+                       result.add (((c1 & 0b0000_1111u8) << 4) | ((c2 & 0b0011_1100u8) >> 2))
+                       result.add (((c2 & 0b0000_0011u8) << 6) | (c3 & 0b0011_1111u8))
                end
 
                var last_start = steps * 4
@@ -131,14 +118,52 @@ redef class String
                        var c0 = inv[bytes[last_start]]
                        var c1 = inv[bytes[last_start + 1]]
                        var c2 = inv[bytes[last_start + 2]]
-                       result[result_length - 2] = ((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4)
-                       result[result_length - 1] = ((c1 & 0b0000_1111u8) << 4) | ((c2 & 0b0011_1100u8) >> 2)
+                       result.add (((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4))
+                       result.add (((c1 & 0b0000_1111u8) << 4) | ((c2 & 0b0011_1100u8) >> 2))
                else if padding_len == 2 then
                        var c0 = inv[bytes[last_start]]
                        var c1 = inv[bytes[last_start + 1]]
-                       result[result_length - 1] = ((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4)
+                       result.add (((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4))
                end
 
-               return result.to_s_with_length(result_length)
+               return result
+       end
+end
+
+redef class Bytes
+
+       # Encodes the receiver string to base64 using a custom padding character.
+       #
+       # If using the default padding character `=`, see `encode_base64`.
+       fun encode_base64(padding: nullable Byte): Bytes
+       do
+               return items.encode_base64(length, padding)
+       end
+
+       # Decodes the receiver string to base64 using a custom padding character.
+       #
+       # Default padding character `=`
+       fun decode_base64(padding : nullable Byte) : Bytes
+       do
+               return items.decode_base64(length, padding)
+       end
+end
+
+redef class String
+
+       # Encodes the receiver string to base64 using a custom padding character.
+       #
+       # If using the default padding character `=`, see `encode_base64`.
+       fun encode_base64(padding: nullable Byte): String
+       do
+               return to_cstring.encode_base64(bytelen, padding).to_s
+       end
+
+       # Decodes the receiver string to base64 using a custom padding character.
+       #
+       # Default padding character `=`
+       fun decode_base64(padding : nullable Byte) : String
+       do
+               return to_cstring.decode_base64(bytelen, padding).to_s
        end
 end
index 59c4c5f..d589646 100644 (file)
@@ -19,6 +19,16 @@ import kernel
 import collection::array
 intrude import text::flat
 
+redef class Byte
+       # Write self as a string into `ns` at position `pos`
+       private fun add_digest_at(ns: NativeString, pos: Int) do
+               var tmp = (0xF0u8 & self) >> 4
+               ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
+               tmp = 0x0Fu8 & self
+               ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
+       end
+end
+
 # A buffer containing Byte-manipulation facilities
 #
 # Uses Copy-On-Write when persisted
@@ -26,7 +36,7 @@ class Bytes
        super AbstractArray[Byte]
 
        # A NativeString being a char*, it can be used as underlying representation here.
-       private var items: NativeString
+       var items: NativeString
 
        # Number of bytes in the array
        redef var length
@@ -63,6 +73,20 @@ class Bytes
                return items[i]
        end
 
+       # Returns self as a hexadecimal digest
+       fun hexdigest: String do
+               var elen = length * 2
+               var ns = new NativeString(elen)
+               var i = 0
+               var oi = 0
+               while i < length do
+                       self[i].add_digest_at(ns, oi)
+                       i += 1
+                       oi += 2
+               end
+               return new FlatString.full(ns, elen, 0, elen - 1, elen)
+       end
+
        #     var b = new Bytes.with_capacity(1)
        #     b[0] = 101u8
        #     assert b.to_s == "e"
@@ -146,80 +170,13 @@ class Bytes
        redef fun to_s do
                persisted = true
                var b = self
-               if not is_utf8 then
-                       b = clean_utf8
-                       persisted = false
-               end
-               return new FlatString.with_infos(b.items, b.length, 0, b.length -1)
+               var r = b.items.to_s_with_length(length)
+               if r != items then persisted = false
+               return r
        end
 
        redef fun iterator do return new BytesIterator.with_buffer(self)
 
-       # Is the byte collection valid UTF-8 ?
-       fun is_utf8: Bool do
-               var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
-               var lobounds = once [0, 0x80, 0x800, 0x10000]
-               var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
-               var pos = 0
-               var len = length
-               var mits = items
-               while pos < len do
-                       var nxst = mits.length_of_char_at(pos)
-                       var charst_index = (nxst - 1) * 2
-                       if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
-                               var c = mits.char_at(pos)
-                               var cp = c.ascii
-                               if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
-                                       if cp >= 0xD800 and cp <= 0xDFFF or
-                                          cp == 0xFFFE or cp == 0xFFFF then return false
-                               else
-                                       return false
-                               end
-                       else
-                               return false
-                       end
-                       pos += nxst
-               end
-               return true
-       end
-
-       # Cleans the bytes of `self` to be UTF-8 compliant
-       private fun clean_utf8: Bytes do
-               var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
-               var badchar = once [0xEFu8, 0xBFu8, 0xBDu8]
-               var lobounds = once [0, 0x80, 0x800, 0x10000]
-               var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
-               var pos = 0
-               var len = length
-               var ret = new Bytes.with_capacity(len)
-               var mits = items
-               while pos < len do
-                       var nxst = mits.length_of_char_at(pos)
-                       var charst_index = (nxst - 1) * 2
-                       if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
-                               var c = mits.char_at(pos)
-                               var cp = c.ascii
-                               if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
-                                       if cp >= 0xD800 and cp <= 0xDFFF or
-                                          cp == 0xFFFE or cp == 0xFFFF then
-                                               ret.append badchar
-                                               pos += 1
-                                       else
-                                               var pend = pos + nxst
-                                               for i in [pos .. pend[ do ret.add mits[i]
-                                               pos += nxst
-                                       end
-                               else
-                                       ret.append badchar
-                                       pos += 1
-                               end
-                       else
-                               ret.append badchar
-                               pos += 1
-                       end
-               end
-               return ret
-       end
 end
 
 private class BytesIterator
index 2db319a..4b1e826 100644 (file)
@@ -173,12 +173,13 @@ abstract class Reader
        # ~~~
        fun read_all: String do
                var s = read_all_bytes
-               if not s.is_utf8 then s = s.clean_utf8
                var slen = s.length
                if slen == 0 then return ""
                var rets = ""
                var pos = 0
-               var sits = s.items
+               var str = s.items.clean_utf8(slen)
+               slen = str.bytelen
+               var sits = str.items
                var remsp = slen
                while pos < slen do
                        # The 129 size was decided more or less arbitrarily
index c8b6ecd..52de988 100644 (file)
@@ -985,8 +985,7 @@ redef class NativeString
        redef fun to_s_with_length(length): FlatString
        do
                assert length >= 0
-               var str = new FlatString.with_infos(self, length, 0, length - 1)
-               return str
+               return clean_utf8(length)
        end
 
        redef fun to_s_full(bytelen, unilen) do
@@ -997,6 +996,8 @@ redef class NativeString
        redef fun to_s_with_copy: FlatString
        do
                var length = cstring_length
+               var r = clean_utf8(length)
+               if r.items != self then return r
                var new_self = new NativeString(length + 1)
                copy_to(new_self, length, 0, 0)
                var str = new FlatString.with_infos(new_self, length, 0, length - 1)
@@ -1005,6 +1006,81 @@ redef class NativeString
                return str
        end
 
+       # Cleans a NativeString if necessary
+       fun clean_utf8(len: Int): FlatString do
+               var replacements: nullable Array[Int] = null
+               var end_length = len
+               var pos = 0
+               var chr_ln = 0
+               while pos < len do
+                       var b = self[pos]
+                       var nxst = length_of_char_at(pos)
+                       var ok_st: Bool
+                       if nxst == 1 then
+                               ok_st = b & 0x80u8 == 0u8
+                       else if nxst == 2 then
+                               ok_st = b & 0xE0u8 == 0xC0u8
+                       else if nxst == 3 then
+                               ok_st = b & 0xF0u8 == 0xE0u8
+                       else
+                               ok_st = b & 0xF8u8 == 0xF0u8
+                       end
+                       if not ok_st then
+                               if replacements == null then replacements = new Array[Int]
+                               replacements.add pos
+                               end_length += 2
+                               pos += 1
+                               chr_ln += 1
+                               continue
+                       end
+                       var ok_c: Bool
+                       var c = char_at(pos)
+                       var cp = c.ascii
+                       if nxst == 1 then
+                               ok_c = cp >= 0 and cp <= 0x7F
+                       else if nxst == 2 then
+                               ok_c = cp >= 0x80 and cp <= 0x7FF
+                       else if nxst == 3 then
+                               ok_c = cp >= 0x800 and cp <= 0xFFFF
+                               ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
+                       else
+                               ok_c = cp >= 0x10000 and cp <= 0x10FFFF
+                       end
+                       if not ok_c then
+                               if replacements == null then replacements = new Array[Int]
+                               replacements.add pos
+                               end_length += 2
+                               pos += 1
+                               chr_ln += 1
+                               continue
+                       end
+                       pos += c.u8char_len
+                       chr_ln += 1
+               end
+               var ret = self
+               if end_length != len then
+                       ret = new NativeString(end_length)
+                       var old_repl = 0
+                       var off = 0
+                       var repls = replacements.as(not null)
+                       var r = repls.items.as(not null)
+                       var imax = repls.length
+                       for i in [0 .. imax[ do
+                               var repl_pos = r[i]
+                               var chkln = repl_pos - old_repl
+                               copy_to(ret, chkln, old_repl, off)
+                               off += chkln
+                               ret[off] = 0xEFu8
+                               ret[off + 1] = 0xBFu8
+                               ret[off + 2] = 0xBDu8
+                               old_repl = repl_pos + 1
+                               off += 3
+                       end
+                       copy_to(ret, len - old_repl, old_repl, off)
+               end
+               return new FlatString.full(ret, end_length, 0, end_length - 1, chr_ln)
+       end
+
        # Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
        #
        # Very unsafe, make sure to have room for this char prior to calling this function.
@@ -1109,7 +1185,7 @@ redef class Array[E]
                        end
                        i += 1
                end
-               return ns.to_s_with_length(sl)
+               return new FlatString.with_infos(ns, sl, 0, sl - 1)
        end
 end
 
@@ -1146,7 +1222,7 @@ redef class NativeArray[E]
                        end
                        i += 1
                end
-               return ns.to_s_with_length(sl)
+               return new FlatString.with_infos(ns, sl, 0, sl - 1)
        end
 end
 
index 8a5acc9..2bd8fbe 100644 (file)
@@ -1,7 +1,5 @@
 # This file is part of NIT (http://www.nitlanguage.org).
 #
-# Copyright 2014 Lucas Bajolet <r4pass@hotmail.com>
-#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -219,22 +217,12 @@ in "C Header" `{
        }
 `}
 
-redef class String
-
-       # Computes the SHA1 of the receiver
-       #
-       # Returns a digest of 20 bytes as a String,
-       # note that all the characters are not necessarily ASCII.
-       # If you want the hex string version of the digest, use
-       # sha1_to_s.
-       #
-       #     import base64
-       #     assert "The quick brown fox jumps over the lazy dog".sha1.encode_base64 == "L9ThxnotKPzthJ7hu3bnORuT6xI="
-       fun sha1: String import String.to_cstring, String.length, NativeString.to_s_with_length `{
+redef class NativeString
+       private fun sha1_intern(len: Int): NativeString `{
                sha1nfo s;
 
                sha1_init(&s);
-               sha1_write(&s, String_to_cstring(self), String_length(self));
+               sha1_write(&s, self, len);
                uint8_t* digest = sha1_result(&s);
 
                char* digested = malloc(21);
@@ -243,35 +231,30 @@ redef class String
 
                digested[20] = '\0';
 
-               return NativeString_to_s_with_length(digested, 20);
+               return digested;
        `}
+end
+
+redef class String
+
+       # Computes the SHA1 of the receiver
+       #
+       # Returns a digest of 20 bytes as a NativeString,
+       # note that all the characters are not necessarily ASCII.
+       # If you want the hex string version of the digest, use
+       # sha1_hexdigest.
+       #
+       #     import base64
+       #     assert "The quick brown fox jumps over the lazy dog".sha1 == [0x2Fu8, 0xD4u8, 0xE1u8, 0xC6u8, 0x7Au8, 0x2Du8, 0x28u8, 0xFCu8, 0xEDu8, 0x84u8, 0x9Eu8, 0xE1u8, 0xBBu8, 0x76u8, 0xE7u8, 0x39u8, 0x1Bu8, 0x93u8, 0xEBu8, 0x12u8]
+       fun sha1: Bytes do
+               return new Bytes(to_cstring.sha1_intern(bytelen), 20, 20)
+       end
 
        # Computes the SHA1 of the receiver.
        #
        # Returns a 40 char String containing the Hexadecimal
        # Digest in its Char form.
        #
-       #     assert "The quick brown fox jumps over the lazy dog".sha1_to_s == "2FD4E1C67A2D28FCED849EE1BB76E7391B93EB12"
-       fun sha1_to_s: String import String.to_cstring, String.length, NativeString.to_s_with_length `{
-               sha1nfo s;
-
-               sha1_init(&s);
-               sha1_write(&s, String_to_cstring(self), String_length(self));
-               uint8_t* digest = sha1_result(&s);
-
-               char* ret_str = malloc(41);
-               char* hexmap = "0123456789ABCDEF";
-
-               int i;
-               for(i=0;i<20;i++){
-                       uint8_t q = digest[i];
-                       ret_str[i*2] = hexmap[q >> 4];
-                       ret_str[(i*2)+1] = hexmap[q & 0x0F];
-               }
-               ret_str[40] = '\0';
-
-               return NativeString_to_s_with_length(ret_str, 40);
-       `}
-
+       #     assert "The quick brown fox jumps over the lazy dog".sha1_hexdigest == "2FD4E1C67A2D28FCED849EE1BB76E7391B93EB12"
+       fun sha1_hexdigest: String do return sha1.hexdigest
 end
-
index c3fdaad..2a3c32f 100644 (file)
@@ -114,7 +114,7 @@ class WebsocketConnection
                resp_map["Connection:"] = "Upgrade"
                var key = heads["Sec-WebSocket-Key"]
                key += "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
-               key = key.sha1.encode_base64
+               key = key.sha1.encode_base64.to_s
                resp_map["Sec-WebSocket-Accept:"] = key
                var resp = resp_map.join("\r\n", " ")
                resp += "\r\n\r\n"
index 1066456..ff641cc 100644 (file)
@@ -21,7 +21,7 @@ Calls to bytepos for each type:
        FlatString = 18
 Calls to first_byte on FlatString 153
 Calls to last_byte on FlatString 103
-FlatStrings allocated with length 81 (85.417%)
+FlatStrings allocated with length 82 (86.458%)
 Length of travel for index distribution:
 * null = 20 => occurences 83.333%, cumulative 83.333% 
 * 1 = 8 => occurences 21.053%, cumulative 73.684% 
index fe675df..ce7118b 100644 (file)
@@ -13,6 +13,7 @@ redef class Deserializer
                if name == "Array[nullable Object]" then return new Array[nullable Object].from_deserializer(self)
                if name == "Array[Serializable]" then return new Array[Serializable].from_deserializer(self)
                if name == "Array[Object]" then return new Array[Object].from_deserializer(self)
+               if name == "Array[Int]" then return new Array[Int].from_deserializer(self)
                if name == "Array[Match]" then return new Array[Match].from_deserializer(self)
                if name == "Array[nullable Match]" then return new Array[nullable Match].from_deserializer(self)
                return super
index 49e9adc..f6e7b69 100644 (file)
@@ -21,7 +21,7 @@ Calls to bytepos for each type:
        FlatString = 18
 Calls to first_byte on FlatString 153
 Calls to last_byte on FlatString 103
-FlatStrings allocated with length 81 (85.417%)
+FlatStrings allocated with length 82 (86.458%)
 Length of travel for index distribution:
 * 0 = 20 => occurences 83.333%, cumulative 83.333% 
 * 1 = 8 => occurences 21.053%, cumulative 73.684%