From: Jean Privat <jean@pryen.org>
Date: Tue, 15 Sep 2015 16:57:16 +0000 (-0400)
Subject: Merge: Clean UTF-8 string update
X-Git-Tag: v0.7.8~23
X-Git-Url: http://nitlanguage.org?hp=2b0983a52aa3cc6d98e0c0f4b3b6ab0509cdb944

Merge: Clean UTF-8 string update

Since quite some time now we've had the cleaning function for Bytes that ensured that what was coming from the exterior was clean and could be transformed safely to a String.

This is now generalized to any NativeString, and the clean function will be called each time a NativeString is `to_s`'d

At the same time, `clean_utf8` is now better performing (for `Files::read_all`, Ir per call is roughly 40% less than before), which limits the impacts of the new strategy.

Furthermore, the string produced by `NativeString::clean_utf8` has its length calculated which saves time on later operations on the string.

It also limits the number of calls by avoiding allocations if not necessary (if the string is already clean, which should happen a lot more often than not).

As for performances,

Valgrind `./bin/nitc src/nitc.nit`:
Before: 14.040 GIr
After: 13.859 GIr

Time, best of 10 for `./bin/nitc src/nitc.nit -o bin/nitc`:
Before: 0m4.989s
After: 0m4.933s

Time, best of 10 for `./bin/nitc --semi-global src/nitc.nit -o bin/nitc`:
Before: 0m4.696s
After: 0m4.691s

Pretty much equivalent in real time, and a bit better in Valgrind, not bad considering every String is now cleaner than ever !

Pull-Request: #1705
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>
---

diff --git a/contrib/opportunity/src/opportunity_controller.nit b/contrib/opportunity/src/opportunity_controller.nit
index 868dd9e..26a4ef6 100644
--- a/contrib/opportunity/src/opportunity_controller.nit
+++ b/contrib/opportunity/src/opportunity_controller.nit
@@ -16,7 +16,6 @@
 module opportunity_controller
 
 import nitcorn
-import sha1
 import templates
 import opportunity_model
 
diff --git a/contrib/opportunity/src/opportunity_model.nit b/contrib/opportunity/src/opportunity_model.nit
index ef7654d..cc72f23 100644
--- a/contrib/opportunity/src/opportunity_model.nit
+++ b/contrib/opportunity/src/opportunity_model.nit
@@ -247,7 +247,7 @@ class Meetup
 	redef fun commit(db) do
 		if id == "" then
 			var time = get_time
-			var tmpid = (name + date + place + time.to_s).sha1_to_s
+			var tmpid = (name + date + place + time.to_s).sha1.hexdigest
 			if not db.execute("INSERT INTO meetups (id, name, date, place, answer_mode) VALUES({tmpid.to_sql_string}, {name.html_escape.to_sql_string}, {date.html_escape.to_sql_string}, {place.html_escape.to_sql_string}, {answer_mode});") then
 				print "Error recording entry Meetup {self}"
 				print db.error or else "Null error"
diff --git a/examples/rosettacode/sha_1.nit b/examples/rosettacode/sha_1.nit
index 2eaf471..8776d48 100644
--- a/examples/rosettacode/sha_1.nit
+++ b/examples/rosettacode/sha_1.nit
@@ -9,4 +9,4 @@ module sha_1
 
 import sha1
 
-print "Rosetta Code".sha1_to_s
+print "Rosetta Code".sha1.hexdigest
diff --git a/lib/base64.nit b/lib/base64.nit
index 5f1c920..4eae395 100644
--- a/lib/base64.nit
+++ b/lib/base64.nit
@@ -17,88 +17,76 @@
 # Offers the base 64 encoding and decoding algorithms
 module base64
 
-redef class String
-
+redef class NativeString
 	# Alphabet used by the base64 algorithm
-	private fun base64_chars : String
+	private fun base64_chars : SequenceRead[Byte]
 	do
-		return "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+		return "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".bytes
 	end
+
+	# Reversed alphabet for base64
 	private fun inverted_base64_chars : HashMap[Byte, Byte]
 	do
 		var inv_base64_chars = new HashMap[Byte, Byte]
-		for k in [0..base64_chars.bytelen[ do
-			inv_base64_chars[base64_chars.bytes[k]] = k.to_b
+		var l = base64_chars.length
+		for k in [0 .. l[ do
+			inv_base64_chars[base64_chars[k]] = k.to_b
 		end
 		return inv_base64_chars
 	end
 
-	# Encodes the receiver string to base64.
+	# Encodes `self` to base64.
+	#
 	# By default, uses "=" for padding.
-	fun encode_base64 : String do return encode_base64_custom_padding('='.ascii.to_b)
-
-	# Encodes the receiver string to base64 using a custom padding character.
 	#
-	# If using the default padding character `=`, see `encode_base64`.
-	fun encode_base64_custom_padding(padding : Byte) : String
-	do
-		var base64_bytes = once base64_chars.bytes
-		var length = bytelen
-
+	#     assert "string".encode_base64 == "c3RyaW5n"
+	private fun encode_base64(length: Int, padding: nullable Byte): Bytes do
+		var base64_bytes = once base64_chars
+		if padding == null then padding = '='.ascii.to_b
 		var steps = length / 3
 		var bytes_in_last_step = length % 3
 		var result_length = steps * 4
 		if bytes_in_last_step > 0 then result_length += 4
-		var result = new NativeString(result_length + 1)
-		var bytes = self.bytes
-		result[result_length] = 0u8
-
-		var mask_6bit = 0b0011_1111
+		var result = new Bytes.with_capacity(result_length)
 
+		var in_off = 0
 		for s in [0 .. steps[ do
-			var e = 0
-			for ss in [0 .. 3[ do
-				e += bytes[s * 3 + ss].to_i << ((2 - ss) * 8)
-			end
-			for ss in [0..4[ do
-				result[s * 4 + 3 - ss] = base64_bytes[(e >> (ss * 6)) & mask_6bit]
-			end
+			var ind = ((self[in_off] & 0b1111_1100u8) >> 2).to_i
+			result.add base64_bytes[ind]
+			ind = ((self[in_off] & 0b0000_0011u8) << 4).to_i | ((self[in_off + 1] & 0b1111_0000u8) >> 4).to_i
+			result.add base64_bytes[ind]
+			ind = ((self[in_off + 1] & 0b0000_1111u8) << 2).to_i | ((self[in_off + 2] & 0b1100_0000u8) >> 6).to_i
+			result.add base64_bytes[ind]
+			ind = (self[in_off + 2] & 0b0011_1111u8).to_i
+			result.add base64_bytes[ind]
+			in_off += 3
 		end
-
-		var out_off = result_length - 4
-		var in_off = length - bytes_in_last_step
 		if bytes_in_last_step == 1 then
-			result[out_off] = base64_bytes[((bytes[in_off] & 0b1111_1100u8) >> 2).to_i]
-			result[out_off + 1] = base64_bytes[((bytes[in_off] & 0b0000_0011u8) << 4).to_i]
-			out_off += 2
+			result.add base64_bytes[((self[in_off] & 0b1111_1100u8) >> 2).to_i]
+			result.add base64_bytes[((self[in_off] & 0b0000_0011u8) << 4).to_i]
 		else if bytes_in_last_step == 2 then
-			result[out_off] = base64_bytes[((bytes[in_off] & 0b1111_1100u8) >> 2).to_i]
-			result[out_off + 1] = base64_bytes[(((bytes[in_off] & 0b0000_0011u8) << 4) | ((bytes[in_off + 1] & 0b1111_0000u8) >> 4)).to_i]
-			result[out_off + 2] = base64_bytes[((bytes[in_off + 1] & 0b0000_1111u8) << 2).to_i]
-			out_off += 3
-		end
-		if bytes_in_last_step > 0 then
-			for i in [out_off .. result_length[ do result[i] = padding
+			result.add base64_bytes[((self[in_off] & 0b1111_1100u8) >> 2).to_i]
+			result.add base64_bytes[(((self[in_off] & 0b0000_0011u8) << 4) | ((self[in_off + 1] & 0b1111_0000u8) >> 4)).to_i]
+			result.add base64_bytes[((self[in_off + 1] & 0b0000_1111u8) << 2).to_i]
 		end
+		var rempad = if bytes_in_last_step > 0 then 3 - bytes_in_last_step else 0
+		for i in [0 .. rempad[ do result.add padding
 
-		return result.to_s_with_length(result_length)
+		return result
 	end
 
-	# Decodes the receiver string from base64.
-	# By default, uses "=" for padding.
-	fun decode_base64 : String do return decode_base64_custom_padding('='.ascii.to_b)
-
-	# Decodes the receiver string to base64 using a custom padding character.
+	# Decodes `self` from base64
 	#
-	# If using the default padding character `=`, see `decode_base64`.
-	fun decode_base64_custom_padding(padding : Byte) : String
-	do
+	#      assert "c3RyaW5n".decode_base64 == "string"
+	#
+	# REQUIRE: `length % 4 == 0`
+	private fun decode_base64(length: Int, padding: nullable Byte): Bytes do
+		if padding == null then padding = '='.ascii.to_b
 		var inv = once inverted_base64_chars
-		var length = bytelen
-		if length == 0 then return ""
+		if length == 0 then return new Bytes.empty
 		assert length % 4 == 0 else print "base64::decode_base64 only supports strings of length multiple of 4"
 
-		var bytes = self.bytes
+		var bytes = self
 		var steps = length / 4
 		var result_length = steps * 3
 
@@ -113,17 +101,16 @@ redef class String
 		if padding_len == 1 then result_length -= 1
 		if padding_len == 2 then result_length -= 2
 
-		var result = new NativeString(result_length + 1)
-		result[result_length] = 0u8
+		var result = new Bytes.with_capacity(result_length + 1)
 
 		for s in [0 .. steps[ do
 			var c0 = inv[bytes[s * 4]]
 			var c1 = inv[bytes[s * 4 + 1]]
 			var c2 = inv[bytes[s * 4 + 2]]
 			var c3 = inv[bytes[s * 4 + 3]]
-			result[s * 3] = ((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4)
-			result[s * 3 + 1] = ((c1 & 0b0000_1111u8) << 4) | ((c2 & 0b0011_1100u8) >> 2)
-			result[s * 3 + 2] = ((c2 & 0b0000_0011u8) << 6) | (c3 & 0b0011_1111u8)
+			result.add (((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4))
+			result.add (((c1 & 0b0000_1111u8) << 4) | ((c2 & 0b0011_1100u8) >> 2))
+			result.add (((c2 & 0b0000_0011u8) << 6) | (c3 & 0b0011_1111u8))
 		end
 
 		var last_start = steps * 4
@@ -131,14 +118,52 @@ redef class String
 			var c0 = inv[bytes[last_start]]
 			var c1 = inv[bytes[last_start + 1]]
 			var c2 = inv[bytes[last_start + 2]]
-			result[result_length - 2] = ((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4)
-			result[result_length - 1] = ((c1 & 0b0000_1111u8) << 4) | ((c2 & 0b0011_1100u8) >> 2)
+			result.add (((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4))
+			result.add (((c1 & 0b0000_1111u8) << 4) | ((c2 & 0b0011_1100u8) >> 2))
 		else if padding_len == 2 then
 			var c0 = inv[bytes[last_start]]
 			var c1 = inv[bytes[last_start + 1]]
-			result[result_length - 1] = ((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4)
+			result.add (((c0 & 0b0011_1111u8) << 2) | ((c1 & 0b0011_0000u8) >> 4))
 		end
 
-		return result.to_s_with_length(result_length)
+		return result
+	end
+end
+
+redef class Bytes
+
+	# Encodes the receiver string to base64 using a custom padding character.
+	#
+	# If using the default padding character `=`, see `encode_base64`.
+	fun encode_base64(padding: nullable Byte): Bytes
+	do
+		return items.encode_base64(length, padding)
+	end
+
+	# Decodes the receiver string to base64 using a custom padding character.
+	#
+	# Default padding character `=`
+	fun decode_base64(padding : nullable Byte) : Bytes
+	do
+		return items.decode_base64(length, padding)
+	end
+end
+
+redef class String
+
+	# Encodes the receiver string to base64 using a custom padding character.
+	#
+	# If using the default padding character `=`, see `encode_base64`.
+	fun encode_base64(padding: nullable Byte): String
+	do
+		return to_cstring.encode_base64(bytelen, padding).to_s
+	end
+
+	# Decodes the receiver string to base64 using a custom padding character.
+	#
+	# Default padding character `=`
+	fun decode_base64(padding : nullable Byte) : String
+	do
+		return to_cstring.decode_base64(bytelen, padding).to_s
 	end
 end
diff --git a/lib/core/bytes.nit b/lib/core/bytes.nit
index 59c4c5f..d589646 100644
--- a/lib/core/bytes.nit
+++ b/lib/core/bytes.nit
@@ -19,6 +19,16 @@ import kernel
 import collection::array
 intrude import text::flat
 
+redef class Byte
+	# Write self as a string into `ns` at position `pos`
+	private fun add_digest_at(ns: NativeString, pos: Int) do
+		var tmp = (0xF0u8 & self) >> 4
+		ns[pos] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
+		tmp = 0x0Fu8 & self
+		ns[pos + 1] = if tmp >= 0x0Au8 then tmp + 0x37u8 else tmp + 0x30u8
+	end
+end
+
 # A buffer containing Byte-manipulation facilities
 #
 # Uses Copy-On-Write when persisted
@@ -26,7 +36,7 @@ class Bytes
 	super AbstractArray[Byte]
 
 	# A NativeString being a char*, it can be used as underlying representation here.
-	private var items: NativeString
+	var items: NativeString
 
 	# Number of bytes in the array
 	redef var length
@@ -63,6 +73,20 @@ class Bytes
 		return items[i]
 	end
 
+	# Returns self as a hexadecimal digest
+	fun hexdigest: String do
+		var elen = length * 2
+		var ns = new NativeString(elen)
+		var i = 0
+		var oi = 0
+		while i < length do
+			self[i].add_digest_at(ns, oi)
+			i += 1
+			oi += 2
+		end
+		return new FlatString.full(ns, elen, 0, elen - 1, elen)
+	end
+
 	#     var b = new Bytes.with_capacity(1)
 	#     b[0] = 101u8
 	#     assert b.to_s == "e"
@@ -146,80 +170,13 @@ class Bytes
 	redef fun to_s do
 		persisted = true
 		var b = self
-		if not is_utf8 then
-			b = clean_utf8
-			persisted = false
-		end
-		return new FlatString.with_infos(b.items, b.length, 0, b.length -1)
+		var r = b.items.to_s_with_length(length)
+		if r != items then persisted = false
+		return r
 	end
 
 	redef fun iterator do return new BytesIterator.with_buffer(self)
 
-	# Is the byte collection valid UTF-8 ?
-	fun is_utf8: Bool do
-		var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
-		var lobounds = once [0, 0x80, 0x800, 0x10000]
-		var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
-		var pos = 0
-		var len = length
-		var mits = items
-		while pos < len do
-			var nxst = mits.length_of_char_at(pos)
-			var charst_index = (nxst - 1) * 2
-			if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
-				var c = mits.char_at(pos)
-				var cp = c.ascii
-				if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
-					if cp >= 0xD800 and cp <= 0xDFFF or
-					   cp == 0xFFFE or cp == 0xFFFF then return false
-				else
-					return false
-				end
-			else
-				return false
-			end
-			pos += nxst
-		end
-		return true
-	end
-
-	# Cleans the bytes of `self` to be UTF-8 compliant
-	private fun clean_utf8: Bytes do
-		var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
-		var badchar = once [0xEFu8, 0xBFu8, 0xBDu8]
-		var lobounds = once [0, 0x80, 0x800, 0x10000]
-		var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
-		var pos = 0
-		var len = length
-		var ret = new Bytes.with_capacity(len)
-		var mits = items
-		while pos < len do
-			var nxst = mits.length_of_char_at(pos)
-			var charst_index = (nxst - 1) * 2
-			if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
-				var c = mits.char_at(pos)
-				var cp = c.ascii
-				if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
-					if cp >= 0xD800 and cp <= 0xDFFF or
-					   cp == 0xFFFE or cp == 0xFFFF then
-						ret.append badchar
-						pos += 1
-					else
-						var pend = pos + nxst
-						for i in [pos .. pend[ do ret.add mits[i]
-						pos += nxst
-					end
-				else
-					ret.append badchar
-					pos += 1
-				end
-			else
-				ret.append badchar
-				pos += 1
-			end
-		end
-		return ret
-	end
 end
 
 private class BytesIterator
diff --git a/lib/core/stream.nit b/lib/core/stream.nit
index 2db319a..4b1e826 100644
--- a/lib/core/stream.nit
+++ b/lib/core/stream.nit
@@ -173,12 +173,13 @@ abstract class Reader
 	# ~~~
 	fun read_all: String do
 		var s = read_all_bytes
-		if not s.is_utf8 then s = s.clean_utf8
 		var slen = s.length
 		if slen == 0 then return ""
 		var rets = ""
 		var pos = 0
-		var sits = s.items
+		var str = s.items.clean_utf8(slen)
+		slen = str.bytelen
+		var sits = str.items
 		var remsp = slen
 		while pos < slen do
 			# The 129 size was decided more or less arbitrarily
diff --git a/lib/core/text/flat.nit b/lib/core/text/flat.nit
index c8b6ecd..52de988 100644
--- a/lib/core/text/flat.nit
+++ b/lib/core/text/flat.nit
@@ -985,8 +985,7 @@ redef class NativeString
 	redef fun to_s_with_length(length): FlatString
 	do
 		assert length >= 0
-		var str = new FlatString.with_infos(self, length, 0, length - 1)
-		return str
+		return clean_utf8(length)
 	end
 
 	redef fun to_s_full(bytelen, unilen) do
@@ -997,6 +996,8 @@ redef class NativeString
 	redef fun to_s_with_copy: FlatString
 	do
 		var length = cstring_length
+		var r = clean_utf8(length)
+		if r.items != self then return r
 		var new_self = new NativeString(length + 1)
 		copy_to(new_self, length, 0, 0)
 		var str = new FlatString.with_infos(new_self, length, 0, length - 1)
@@ -1005,6 +1006,81 @@ redef class NativeString
 		return str
 	end
 
+	# Cleans a NativeString if necessary
+	fun clean_utf8(len: Int): FlatString do
+		var replacements: nullable Array[Int] = null
+		var end_length = len
+		var pos = 0
+		var chr_ln = 0
+		while pos < len do
+			var b = self[pos]
+			var nxst = length_of_char_at(pos)
+			var ok_st: Bool
+			if nxst == 1 then
+				ok_st = b & 0x80u8 == 0u8
+			else if nxst == 2 then
+				ok_st = b & 0xE0u8 == 0xC0u8
+			else if nxst == 3 then
+				ok_st = b & 0xF0u8 == 0xE0u8
+			else
+				ok_st = b & 0xF8u8 == 0xF0u8
+			end
+			if not ok_st then
+				if replacements == null then replacements = new Array[Int]
+				replacements.add pos
+				end_length += 2
+				pos += 1
+				chr_ln += 1
+				continue
+			end
+			var ok_c: Bool
+			var c = char_at(pos)
+			var cp = c.ascii
+			if nxst == 1 then
+				ok_c = cp >= 0 and cp <= 0x7F
+			else if nxst == 2 then
+				ok_c = cp >= 0x80 and cp <= 0x7FF
+			else if nxst == 3 then
+				ok_c = cp >= 0x800 and cp <= 0xFFFF
+				ok_c = ok_c and not (cp >= 0xD800 and cp <= 0xDFFF) and cp != 0xFFFE and cp != 0xFFFF
+			else
+				ok_c = cp >= 0x10000 and cp <= 0x10FFFF
+			end
+			if not ok_c then
+				if replacements == null then replacements = new Array[Int]
+				replacements.add pos
+				end_length += 2
+				pos += 1
+				chr_ln += 1
+				continue
+			end
+			pos += c.u8char_len
+			chr_ln += 1
+		end
+		var ret = self
+		if end_length != len then
+			ret = new NativeString(end_length)
+			var old_repl = 0
+			var off = 0
+			var repls = replacements.as(not null)
+			var r = repls.items.as(not null)
+			var imax = repls.length
+			for i in [0 .. imax[ do
+				var repl_pos = r[i]
+				var chkln = repl_pos - old_repl
+				copy_to(ret, chkln, old_repl, off)
+				off += chkln
+				ret[off] = 0xEFu8
+				ret[off + 1] = 0xBFu8
+				ret[off + 2] = 0xBDu8
+				old_repl = repl_pos + 1
+				off += 3
+			end
+			copy_to(ret, len - old_repl, old_repl, off)
+		end
+		return new FlatString.full(ret, end_length, 0, end_length - 1, chr_ln)
+	end
+
 	# Sets the next bytes at position `pos` to the value of `c`, encoded in UTF-8
 	#
 	# Very unsafe, make sure to have room for this char prior to calling this function.
@@ -1109,7 +1185,7 @@ redef class Array[E]
 			end
 			i += 1
 		end
-		return ns.to_s_with_length(sl)
+		return new FlatString.with_infos(ns, sl, 0, sl - 1)
 	end
 end
 
@@ -1146,7 +1222,7 @@ redef class NativeArray[E]
 			end
 			i += 1
 		end
-		return ns.to_s_with_length(sl)
+		return new FlatString.with_infos(ns, sl, 0, sl - 1)
 	end
 end
 
diff --git a/lib/sha1.nit b/lib/sha1.nit
index 8a5acc9..2bd8fbe 100644
--- a/lib/sha1.nit
+++ b/lib/sha1.nit
@@ -1,7 +1,5 @@
 # This file is part of NIT (http://www.nitlanguage.org).
 #
-# Copyright 2014 Lucas Bajolet <r4pass@hotmail.com>
-#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -219,22 +217,12 @@ in "C Header" `{
 	}
 `}
 
-redef class String
-
-	# Computes the SHA1 of the receiver
-	#
-	# Returns a digest of 20 bytes as a String,
-	# note that all the characters are not necessarily ASCII.
-	# If you want the hex string version of the digest, use
-	# sha1_to_s.
-	#
-	#     import base64
-	#     assert "The quick brown fox jumps over the lazy dog".sha1.encode_base64 == "L9ThxnotKPzthJ7hu3bnORuT6xI="
-	fun sha1: String import String.to_cstring, String.length, NativeString.to_s_with_length `{
+redef class NativeString
+	private fun sha1_intern(len: Int): NativeString `{
 		sha1nfo s;
 
 		sha1_init(&s);
-		sha1_write(&s, String_to_cstring(self), String_length(self));
+		sha1_write(&s, self, len);
 		uint8_t* digest = sha1_result(&s);
 
 		char* digested = malloc(21);
@@ -243,35 +231,30 @@ redef class String
 
 		digested[20] = '\0';
 
-		return NativeString_to_s_with_length(digested, 20);
+		return digested;
 	`}
+end
+
+redef class String
+
+	# Computes the SHA1 of the receiver
+	#
+	# Returns a digest of 20 bytes as a NativeString,
+	# note that all the characters are not necessarily ASCII.
+	# If you want the hex string version of the digest, use
+	# sha1_hexdigest.
+	#
+	#     import base64
+	#     assert "The quick brown fox jumps over the lazy dog".sha1 == [0x2Fu8, 0xD4u8, 0xE1u8, 0xC6u8, 0x7Au8, 0x2Du8, 0x28u8, 0xFCu8, 0xEDu8, 0x84u8, 0x9Eu8, 0xE1u8, 0xBBu8, 0x76u8, 0xE7u8, 0x39u8, 0x1Bu8, 0x93u8, 0xEBu8, 0x12u8]
+	fun sha1: Bytes do
+		return new Bytes(to_cstring.sha1_intern(bytelen), 20, 20)
+	end
 
 	# Computes the SHA1 of the receiver.
 	#
 	# Returns a 40 char String containing the Hexadecimal
 	# Digest in its Char form.
 	#
-	#     assert "The quick brown fox jumps over the lazy dog".sha1_to_s == "2FD4E1C67A2D28FCED849EE1BB76E7391B93EB12"
-	fun sha1_to_s: String import String.to_cstring, String.length, NativeString.to_s_with_length `{
-		sha1nfo s;
-
-		sha1_init(&s);
-		sha1_write(&s, String_to_cstring(self), String_length(self));
-		uint8_t* digest = sha1_result(&s);
-
-		char* ret_str = malloc(41);
-		char* hexmap = "0123456789ABCDEF";
-
-		int i;
-		for(i=0;i<20;i++){
-			uint8_t q = digest[i];
-			ret_str[i*2] = hexmap[q >> 4];
-			ret_str[(i*2)+1] = hexmap[q & 0x0F];
-		}
-		ret_str[40] = '\0';
-
-		return NativeString_to_s_with_length(ret_str, 40);
-	`}
-
+	#     assert "The quick brown fox jumps over the lazy dog".sha1_hexdigest == "2FD4E1C67A2D28FCED849EE1BB76E7391B93EB12"
+	fun sha1_hexdigest: String do return sha1.hexdigest
 end
-
diff --git a/lib/websocket/websocket.nit b/lib/websocket/websocket.nit
index c3fdaad..2a3c32f 100644
--- a/lib/websocket/websocket.nit
+++ b/lib/websocket/websocket.nit
@@ -114,7 +114,7 @@ class WebsocketConnection
 		resp_map["Connection:"] = "Upgrade"
 		var key = heads["Sec-WebSocket-Key"]
 		key += "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
-		key = key.sha1.encode_base64
+		key = key.sha1.encode_base64.to_s
 		resp_map["Sec-WebSocket-Accept:"] = key
 		var resp = resp_map.join("\r\n", " ")
 		resp += "\r\n\r\n"
diff --git a/tests/sav/nitcg/test_text_stat.res b/tests/sav/nitcg/test_text_stat.res
index 1066456..ff641cc 100644
--- a/tests/sav/nitcg/test_text_stat.res
+++ b/tests/sav/nitcg/test_text_stat.res
@@ -21,7 +21,7 @@ Calls to bytepos for each type:
 	FlatString = 18
 Calls to first_byte on FlatString 153
 Calls to last_byte on FlatString 103
-FlatStrings allocated with length 81 (85.417%)
+FlatStrings allocated with length 82 (86.458%)
 Length of travel for index distribution:
 * null = 20 => occurences 83.333%, cumulative 83.333% 
 * 1 = 8 => occurences 21.053%, cumulative 73.684% 
diff --git a/tests/sav/nitserial_args1.res b/tests/sav/nitserial_args1.res
index fe675df..ce7118b 100644
--- a/tests/sav/nitserial_args1.res
+++ b/tests/sav/nitserial_args1.res
@@ -13,6 +13,7 @@ redef class Deserializer
 		if name == "Array[nullable Object]" then return new Array[nullable Object].from_deserializer(self)
 		if name == "Array[Serializable]" then return new Array[Serializable].from_deserializer(self)
 		if name == "Array[Object]" then return new Array[Object].from_deserializer(self)
+		if name == "Array[Int]" then return new Array[Int].from_deserializer(self)
 		if name == "Array[Match]" then return new Array[Match].from_deserializer(self)
 		if name == "Array[nullable Match]" then return new Array[nullable Match].from_deserializer(self)
 		return super
diff --git a/tests/sav/test_text_stat.res b/tests/sav/test_text_stat.res
index 49e9adc..f6e7b69 100644
--- a/tests/sav/test_text_stat.res
+++ b/tests/sav/test_text_stat.res
@@ -21,7 +21,7 @@ Calls to bytepos for each type:
 	FlatString = 18
 Calls to first_byte on FlatString 153
 Calls to last_byte on FlatString 103
-FlatStrings allocated with length 81 (85.417%)
+FlatStrings allocated with length 82 (86.458%)
 Length of travel for index distribution:
 * 0 = 20 => occurences 83.333%, cumulative 83.333% 
 * 1 = 8 => occurences 21.053%, cumulative 73.684%