From: Jean Privat <jean@pryen.org>
Date: Fri, 21 Aug 2015 19:22:36 +0000 (-0400)
Subject: Merge: lib/code/standard: hash no more divides object_id by 8
X-Git-Tag: v0.7.8~77
X-Git-Url: http://nitlanguage.org?hp=39b9aa26885fff6f5edd8c0fe96cfdbae53c9d06

Merge: lib/code/standard: hash no more divides object_id by 8

Since objects are allocated on 64bit word boundaries, it made scene to remove the three 000 lower bits and increase the entropy of the hashcode.
Unfortunately, primitive objects like low Int values or Bool have a object_id lower than 8. shifting the 3 last bits was thus a bad idea.

Therefore, programs that used Int or Bool as keys (or part of keys) did have a lot of hash collisions.
For instance, before, for lib/ai/examples/puzzle.nit:

* number of collisions: 525428 (84.49%)
* average length of collisions: 12.44

After:

* number of collisions: 256223 (41.20%)
* average length of collisions: 3.16

The change have a limiting effect on programs that mainly use standard objects as keys.

Before, for nitc:

* number of collisions: 661715 (16.18%)
* average length of collisions: 2.24

After:

* number of collisions: 711614 (17.40%)
* average length of collisions: 2.25

Pull-Request: #1646
Reviewed-by: Lucas Bajolet <r4pass@hotmail.com>
Reviewed-by: Alexis Laferrière <alexis.laf@xymus.net>
---

diff --git a/lib/binary/binary.nit b/lib/binary/binary.nit
index 263ad95..7e5c180 100644
--- a/lib/binary/binary.nit
+++ b/lib/binary/binary.nit
@@ -178,11 +178,13 @@ redef abstract class Reader
 	# Returns a truncated string when an error is pending (`last_error != null`).
 	fun read_string: String
 	do
-		var buf = new FlatBuffer
+		var buf = new Bytes.empty
 		loop
 			var byte = read_byte
-			if byte == null or byte == 0x00u8 then return buf.to_s
-			buf.bytes.add byte
+			if byte == null or byte == 0u8 then
+				return buf.to_s
+			end
+			buf.add byte
 		end
 	end
 
diff --git a/lib/standard/bytes.nit b/lib/standard/bytes.nit
index b8cc1fb..59c4c5f 100644
--- a/lib/standard/bytes.nit
+++ b/lib/standard/bytes.nit
@@ -145,10 +145,81 @@ class Bytes
 
 	redef fun to_s do
 		persisted = true
-		return new FlatString.with_infos(items, length, 0, length -1)
+		var b = self
+		if not is_utf8 then
+			b = clean_utf8
+			persisted = false
+		end
+		return new FlatString.with_infos(b.items, b.length, 0, b.length -1)
 	end
 
 	redef fun iterator do return new BytesIterator.with_buffer(self)
+
+	# Is the byte collection valid UTF-8 ?
+	fun is_utf8: Bool do
+		var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
+		var lobounds = once [0, 0x80, 0x800, 0x10000]
+		var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
+		var pos = 0
+		var len = length
+		var mits = items
+		while pos < len do
+			var nxst = mits.length_of_char_at(pos)
+			var charst_index = (nxst - 1) * 2
+			if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
+				var c = mits.char_at(pos)
+				var cp = c.ascii
+				if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
+					if cp >= 0xD800 and cp <= 0xDFFF or
+					   cp == 0xFFFE or cp == 0xFFFF then return false
+				else
+					return false
+				end
+			else
+				return false
+			end
+			pos += nxst
+		end
+		return true
+	end
+
+	# Cleans the bytes of `self` to be UTF-8 compliant
+	private fun clean_utf8: Bytes do
+		var charst = once [0x80u8, 0u8, 0xE0u8, 0xC0u8, 0xF0u8, 0xE0u8, 0xF8u8, 0xF0u8]
+		var badchar = once [0xEFu8, 0xBFu8, 0xBDu8]
+		var lobounds = once [0, 0x80, 0x800, 0x10000]
+		var hibounds = once [0x7F, 0x7FF, 0xFFFF, 0x10FFFF]
+		var pos = 0
+		var len = length
+		var ret = new Bytes.with_capacity(len)
+		var mits = items
+		while pos < len do
+			var nxst = mits.length_of_char_at(pos)
+			var charst_index = (nxst - 1) * 2
+			if mits[pos] & charst[charst_index] == charst[charst_index + 1] then
+				var c = mits.char_at(pos)
+				var cp = c.ascii
+				if cp <= hibounds[nxst - 1] and cp >= lobounds[nxst - 1] then
+					if cp >= 0xD800 and cp <= 0xDFFF or
+					   cp == 0xFFFE or cp == 0xFFFF then
+						ret.append badchar
+						pos += 1
+					else
+						var pend = pos + nxst
+						for i in [pos .. pend[ do ret.add mits[i]
+						pos += nxst
+					end
+				else
+					ret.append badchar
+					pos += 1
+				end
+			else
+				ret.append badchar
+				pos += 1
+			end
+		end
+		return ret
+	end
 end
 
 private class BytesIterator
@@ -178,21 +249,23 @@ redef class Text
 	# ~~~
 	fun to_bytes: Bytes do
 		var b = new Bytes.with_capacity(bytelen)
+		append_to_bytes b
+		return b
+	end
+
+	# Appends `self.bytes` to `b`
+	fun append_to_bytes(b: Bytes) do
 		for s in substrings do
 			var from = if s isa FlatString then s.first_byte else 0
 			b.append_ns_from(s.items, s.bytelen, from)
 		end
-		return b
 	end
 end
 
 redef class FlatText
-	redef fun to_bytes do
-		var len = bytelen
-		var b = new Bytes.with_capacity(len)
+	redef fun append_to_bytes(b) do
 		var from = if self isa FlatString then first_byte else 0
-		b.append_ns_from(items, len, from)
-		return b
+		b.append_ns_from(items, bytelen, from)
 	end
 end
 
diff --git a/lib/standard/codecs/codec_base.nit b/lib/standard/codecs/codec_base.nit
new file mode 100644
index 0000000..b4a9523
--- /dev/null
+++ b/lib/standard/codecs/codec_base.nit
@@ -0,0 +1,51 @@
+# This file is part of NIT (http://www.nitlanguage.org).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#	 http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Base for codecs to use with streams
+#
+# A Codec (Coder/Decoder) is a tranformer from a byte-format to another
+#
+# As Nit Strings are UTF-8, a codec works as :
+# - Coder: From a UTF-8 string to a specified format (writing)
+# - Decoder: From a specified format to a UTF-8 string (reading)
+module codec_base
+
+import text
+import bytes
+
+# Codes UTF-8 entities to an external format
+abstract class Coder
+
+	# Transforms `c` to its representation in the format of `self`
+	fun code_char(c: Char): Bytes is abstract
+
+	# Adds a char `c` to bytes `s`
+	fun add_char_to(c: Char, s: Bytes) is abstract
+
+	# Transforms `s` to the format of `self`
+	fun code_string(s: Text): Bytes is abstract
+
+	# Adds a string `s` to bytes `b`
+	fun add_string_to(s: Text, b: Bytes) is abstract
+end
+
+# Decodes entities in an external format to UTF-8
+abstract class Decoder
+
+	# Decodes a char from `b` to a Unicode code-point
+	fun decode_char(b: Bytes): Char is abstract
+
+	# Decodes a string `b` to UTF-8
+	fun decode_string(b: Bytes): String is abstract
+end
diff --git a/lib/standard/codecs/codecs.nit b/lib/standard/codecs/codecs.nit
new file mode 100644
index 0000000..25e9931
--- /dev/null
+++ b/lib/standard/codecs/codecs.nit
@@ -0,0 +1,19 @@
+# This file is part of NIT (http://www.nitlanguage.org).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#	 http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Group module for all codec-related manipulations
+module codecs
+
+import codec_base
+import utf8
diff --git a/lib/standard/codecs/utf8.nit b/lib/standard/codecs/utf8.nit
new file mode 100644
index 0000000..65f2fc9
--- /dev/null
+++ b/lib/standard/codecs/utf8.nit
@@ -0,0 +1,50 @@
+# This file is part of NIT (http://www.nitlanguage.org).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#	 http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Codec for UTF-8 I/O
+module utf8
+
+import codec_base
+
+# Returns UTF-8 entities as-is
+private class UTF8Coder
+	super Coder
+
+	redef fun code_char(c) do return c.to_s.to_bytes
+
+	redef fun add_char_to(c, stream) do c.to_s.append_to_bytes(stream)
+
+	redef fun code_string(s) do return s.to_bytes
+
+	redef fun add_string_to(s, b) do s.append_to_bytes(b)
+end
+
+# Decodes entities in an external format to UTF-8
+private class UTF8Decoder
+	super Decoder
+
+	redef fun decode_char(b) do
+		var s = b.to_s
+		return s[0]
+	end
+
+	redef fun decode_string(b) do
+		return b.to_s
+	end
+end
+
+# Returns the instance of a UTF-8 Coder
+fun utf8_coder: Coder do return once new UTF8Coder
+# Returns the instance of a UTF-8 Decoder
+fun utf8_decoder: Decoder do return once new UTF8Decoder
diff --git a/lib/standard/stream.nit b/lib/standard/stream.nit
index 0eabcba..2db319a 100644
--- a/lib/standard/stream.nit
+++ b/lib/standard/stream.nit
@@ -14,6 +14,7 @@ module stream
 intrude import text::ropes
 import error
 intrude import bytes
+import codecs
 
 in "C" `{
 	#include <unistd.h>
@@ -43,6 +44,10 @@ end
 # A `Stream` that can be read from
 abstract class Reader
 	super Stream
+
+	# Decoder used to transform input bytes to UTF-8
+	var decoder: Decoder = utf8_decoder is writable
+
 	# Reads a character. Returns `null` on EOF or timeout
 	fun read_char: nullable Char is abstract
 
@@ -168,6 +173,7 @@ abstract class Reader
 	# ~~~
 	fun read_all: String do
 		var s = read_all_bytes
+		if not s.is_utf8 then s = s.clean_utf8
 		var slen = s.length
 		if slen == 0 then return ""
 		var rets = ""
@@ -378,6 +384,9 @@ end
 abstract class Writer
 	super Stream
 
+	# The coder from a nit UTF-8 String to the output file
+	var coder: Coder = utf8_coder is writable
+
 	# Writes bytes from `s`
 	fun write_bytes(s: Bytes) is abstract
 
@@ -448,6 +457,7 @@ abstract class BufferedReader
 		return c
 	end
 
+	# Resets the internal buffer
 	fun buffer_reset do
 		_buffer_length = 0
 		_buffer_pos = 0
@@ -532,6 +542,7 @@ abstract class BufferedReader
 
 	redef fun append_line_to(s)
 	do
+		var lb = new Bytes.with_capacity(10)
 		loop
 			# First phase: look for a '\n'
 			var i = _buffer_pos
@@ -550,27 +561,29 @@ abstract class BufferedReader
 
 			# if there is something to append
 			if i > _buffer_pos then
-				# Enlarge the string (if needed)
-				s.enlarge(s.bytelen + i - _buffer_pos)
-
 				# Copy from the buffer to the string
 				var j = _buffer_pos
 				while j < i do
-					s.bytes.add(_buffer[j])
+					lb.add(_buffer[j])
 					j += 1
 				end
 				_buffer_pos = i
 			else
 				assert end_reached
+				s.append lb.to_s
 				return
 			end
 
 			if eol then
 				# so \n is found
+				s.append lb.to_s
 				return
 			else
 				# so \n is not found
-				if end_reached then return
+				if end_reached then
+					s.append lb.to_s
+					return
+				end
 				fill_buffer
 			end
 		end
diff --git a/lib/standard/text/abstract_text.nit b/lib/standard/text/abstract_text.nit
index 8de0af9..1faaad3 100644
--- a/lib/standard/text/abstract_text.nit
+++ b/lib/standard/text/abstract_text.nit
@@ -1353,10 +1353,6 @@ abstract class Buffer
 	# In Buffers, the internal sequence of character is mutable
 	# Thus, `chars` can be used to modify the buffer.
 	redef fun chars: Sequence[Char] is abstract
-
-	# In Buffers, the internal sequence of bytes is mutable
-	# Thus, `bytes` can be used to modify the buffer.
-	redef fun bytes: Sequence[Byte] is abstract
 end
 
 # View for chars on Buffer objects, extends Sequence
@@ -1373,7 +1369,6 @@ end
 # for mutation operations
 private abstract class BufferByteView
 	super StringByteView
-	super Sequence[Byte]
 
 	redef type SELFTYPE: Buffer
 end
diff --git a/lib/standard/text/flat.nit b/lib/standard/text/flat.nit
index 87381e6..917b0e5 100644
--- a/lib/standard/text/flat.nit
+++ b/lib/standard/text/flat.nit
@@ -34,41 +34,18 @@ private class FlatSubstringsIter
 	redef fun next do tgt = null
 end
 
-# Immutable strings of characters.
-class FlatString
-	super FlatText
-	super String
-
-	# Index at which `self` begins in `items`, inclusively
-	private var first_byte: Int is noinit
+redef class FlatText
 
-	# Index at which `self` ends in `items`, inclusively
-	private var last_byte: Int is noinit
-
-	redef var chars = new FlatStringCharView(self) is lazy
+	private fun first_byte: Int do return 0
 
-	redef var bytes = new FlatStringByteView(self) is lazy
+	private fun last_byte: Int do return bytelen - 1
 
 	# Cache of the latest position (char) explored in the string
 	var position: Int = 0
+
 	# Cached position (bytes) in the NativeString underlying the String
 	var bytepos: Int = first_byte is lateinit
 
-	redef var length is lazy do
-		if bytelen == 0 then return 0
-		var st = first_byte
-		var its = items
-		var ln = 0
-		var lst = last_byte
-		while st <= lst do
-			st += its.length_of_char_at(st)
-			ln += 1
-		end
-		return ln
-	end
-
-	redef fun [](index) do return items.char_at(char_to_byte_index(index))
-
 	# Index of the character `index` in `items`
 	private fun char_to_byte_index(index: Int): Int do
 		var ln = length
@@ -107,6 +84,37 @@ class FlatString
 		return ns_i
 	end
 
+	redef fun [](index) do return items.char_at(char_to_byte_index(index))
+end
+
+# Immutable strings of characters.
+class FlatString
+	super FlatText
+	super String
+
+	# Index at which `self` begins in `items`, inclusively
+	redef var first_byte is noinit
+
+	# Index at which `self` ends in `items`, inclusively
+	redef var last_byte is noinit
+
+	redef var chars = new FlatStringCharView(self) is lazy
+
+	redef var bytes = new FlatStringByteView(self) is lazy
+
+	redef var length is lazy do
+		if bytelen == 0 then return 0
+		var st = first_byte
+		var its = items
+		var ln = 0
+		var lst = last_byte
+		while st <= lst do
+			st += its.length_of_char_at(st)
+			ln += 1
+		end
+		return ln
+	end
+
 	redef fun reversed
 	do
 		var b = new FlatBuffer.with_capacity(bytelen + 1)
@@ -280,7 +288,7 @@ class FlatString
 		var mifrom = first_byte
 		if s isa FlatText then
 			var sits = s.items
-			var sifrom = s.as(FlatString).first_byte
+			var sifrom = s.first_byte
 			var ns = new NativeString(nlen + 1)
 			mits.copy_to(ns, mlen, mifrom, 0)
 			sits.copy_to(ns, slen, sifrom, mlen)
@@ -471,23 +479,15 @@ class FlatBuffer
 
 	redef var chars: Sequence[Char] = new FlatBufferCharView(self) is lazy
 
-	redef var bytes: Sequence[Byte] = new FlatBufferByteView(self) is lazy
+	redef var bytes = new FlatBufferByteView(self) is lazy
 
 	redef var bytelen = 0
 
-	# O(n)
-	redef fun length do
-		var max = bytelen
-		if max == 0 then return 0
-		var pos = 0
-		var ln = 0
-		var its = items
-		while pos < max do
-			pos += its.length_of_char_at(pos)
-			ln += 1
-		end
-		return ln
-	end
+	redef var length = 0
+
+	private var char_cache: Int = -1
+
+	private var byte_cache: Int = -1
 
 	private var capacity = 0
 
@@ -527,12 +527,6 @@ class FlatBuffer
 		items.copy_to(items, bytelen - from, from, from - len)
 	end
 
-	redef fun [](i)
-	do
-		assert i < length and i >= 0
-		return items.char_at(items.char_to_byte_index(i))
-	end
-
 	redef fun []=(index, item)
 	do
 		assert index >= 0 and index <= length
@@ -553,6 +547,7 @@ class FlatBuffer
 			lshift_bytes(ip + clen, -size_diff)
 		end
 		bytelen += size_diff
+		bytepos += size_diff
 		items.set_char_at(ip, item)
 	end
 
@@ -564,21 +559,14 @@ class FlatBuffer
 		enlarge(bytelen + clen)
 		items.set_char_at(bytelen, c)
 		bytelen += clen
-	end
-
-	private fun add_byte(b: Byte) do
-		if written then reset
-		is_dirty = true
-		enlarge(bytelen + 1)
-		items[bytelen] = b
-		# FIXME: Might trigger errors
-		bytelen += 1
+		length += 1
 	end
 
 	redef fun clear do
 		is_dirty = true
 		if written then reset
 		bytelen = 0
+		length = 0
 	end
 
 	redef fun empty do return new Buffer
@@ -626,11 +614,12 @@ class FlatBuffer
 	#
 	# If `items` is shared, `written` should be set to true after the creation
 	# so that a modification will do a copy-on-write.
-	private init with_infos(items: NativeString, capacity, bytelen: Int)
+	private init with_infos(items: NativeString, capacity, bytelen, length: Int)
 	do
 		self.items = items
 		self.capacity = capacity
 		self.bytelen = bytelen
+		self.length = length
 	end
 
 	# Create a new string copied from `s`.
@@ -643,6 +632,7 @@ class FlatBuffer
 			for i in substrings do i.as(FlatString).items.copy_to(items, i.bytelen, 0, 0)
 		end
 		bytelen = s.bytelen
+		length = s.length
 		capacity = s.bytelen
 		written = true
 	end
@@ -662,15 +652,14 @@ class FlatBuffer
 		is_dirty = true
 		var sl = s.bytelen
 		enlarge(bytelen + sl)
-		if s isa FlatString then
+		if s isa FlatText then
 			s.items.copy_to(items, sl, s.first_byte, bytelen)
-		else if s isa FlatBuffer then
-			s.items.copy_to(items, sl, 0, bytelen)
 		else
 			for i in s.substrings do append i
 			return
 		end
 		bytelen += sl
+		length += s.length
 	end
 
 	# Copies the content of self in `dest`
@@ -695,7 +684,7 @@ class FlatBuffer
 			var byte_length = byteto - bytefrom + 1
 			var r_items = new NativeString(byte_length)
 			items.copy_to(r_items, byte_length, bytefrom, 0)
-			return new FlatBuffer.with_infos(r_items, byte_length, byte_length)
+			return new FlatBuffer.with_infos(r_items, byte_length, byte_length, count)
 		else
 			return new Buffer
 		end
@@ -761,39 +750,6 @@ private class FlatBufferByteView
 
 	redef fun [](index) do return target.items[index]
 
-	redef fun []=(index, item)
-	do
-		assert index >= 0 and index <= target.bytelen
-		if index == target.bytelen then
-			add(item)
-			return
-		end
-		target.items[index] = item
-	end
-
-	redef fun push(c)
-	do
-		target.add_byte(c)
-	end
-
-	fun enlarge(cap: Int)
-	do
-		target.enlarge(cap)
-	end
-
-	redef fun append(s)
-	do
-		var s_length = s.length
-		if target.capacity < (target.length + s_length) then enlarge(s_length + target.length)
-		var pos = target.length
-		var its = target.items
-		for i in s do
-			its[pos] = i
-			pos += 1
-		end
-		target.length += s.length
-	end
-
 	redef fun iterator_from(pos) do return new FlatBufferByteIterator.with_pos(target, pos)
 
 	redef fun reverse_iterator_from(pos) do return new FlatBufferByteReverseIterator.with_pos(target, pos)
diff --git a/lib/standard/text/ropes.nit b/lib/standard/text/ropes.nit
index 08611b7..2b3ff28 100644
--- a/lib/standard/text/ropes.nit
+++ b/lib/standard/text/ropes.nit
@@ -270,7 +270,7 @@ class RopeBuffer
 
 	redef var chars: Sequence[Char] is lazy do return new RopeBufferChars(self)
 
-	redef var bytes: Sequence[Byte] is lazy do return new RopeBufferBytes(self)
+	redef var bytes is lazy do return new RopeBufferBytes(self)
 
 	# The final string being built on the fly
 	private var str: String = ""
@@ -281,6 +281,9 @@ class RopeBuffer
 	# Next available (e.g. unset) character in the `Buffer`
 	private var rpos = 0
 
+	# Length (in chars) of the buffered part
+	private var nslen = 0
+
 	# Keeps track of the buffer's currently dumped part
 	#
 	# This might happen if for instance, a String was being
@@ -436,7 +439,7 @@ class RopeBuffer
 		end
 		if s isa FlatText then
 			var oits = s.items
-			var from = if s isa FlatString then s.first_byte else 0
+			var from = s.first_byte
 			var remsp = buf_size - rpos
 			if slen <= remsp then
 				oits.copy_to(ns, slen, from, rpos)
@@ -467,18 +470,6 @@ class RopeBuffer
 		rpos = rp
 	end
 
-	private fun add_byte(b: Byte) do
-		var rp = rpos
-		if rp >= buf_size then
-			dump_buffer
-			rp = 0
-		end
-		ns[rp] = b
-		rp += 1
-		bytelen += 1
-		rpos = rp
-	end
-
 	# Converts the Buffer to a FlatString, appends it to
 	# the final String and re-allocates a new larger Buffer.
 	private fun dump_buffer do
@@ -1237,23 +1228,6 @@ class RopeBufferBytes
 		end
 	end
 
-	redef fun []=(i,c) do
-		if i == target.length then target.add_byte c
-		if i < target.str.length then
-			# FIXME: Will need to be optimized and rewritten with Unicode
-			var s = target.str
-			var l = s.substring(0, i)
-			var r = s.substring_from(i + 1)
-			target.str = l + c.to_i.ascii.to_s + r
-		else
-			target.ns[i - target.str.length] = c
-		end
-	end
-
-	redef fun add(c) do target.add_byte c
-
-	redef fun push(c) do target.add_byte c
-
 	redef fun iterator_from(i) do return new RopeBufferByteIterator.from(target, i)
 
 	redef fun reverse_iterator_from(i) do return new RopeBufferByteReverseIterator.from(target, i)
diff --git a/src/doc/vim_autocomplete.nit b/src/doc/vim_autocomplete.nit
index 290ed9d..ad5e891 100644
--- a/src/doc/vim_autocomplete.nit
+++ b/src/doc/vim_autocomplete.nit
@@ -201,28 +201,7 @@ redef class MClassType
 		alpha_comparator.sort props
 		for prop in props do
 			if mclass.name == "Object" or prop.intro.mclassdef.mclass.name != "Object" then
-
-				if prop.visibility == public_visibility then
-					stream.write "+ "
-				else stream.write "~ " # protected_visibility
-
-				if prop isa MMethod then
-					if prop.is_init and prop.name != "init" then stream.write "init "
-					if prop.is_new and prop.name != "new" then stream.write "new "
-				end
-
-				stream.write prop.name
-
-				if prop isa MMethod then
-					stream.write prop.intro.msignature.to_s
-				end
-
-				var mdoc = prop.intro.mdoc
-				if mdoc != null then
-					stream.write "  # "
-					stream.write mdoc.content.first
-				end
-				stream.write line_separator
+				prop.write_synopsis(mainmodule, stream)
 			end
 		end
 	end
@@ -307,3 +286,72 @@ private class AutocompletePhase
 		end
 	end
 end
+
+redef class MModule
+	redef fun write_extra_doc(mainmodule, stream)
+	do
+		# Introduced classes
+		var class_intros = collect_intro_mclasses(protected_visibility).to_a
+		if class_intros.not_empty then
+			alpha_comparator.sort class_intros
+			stream.write line_separator*2
+			stream.write "## Introduced classes"
+
+			for c in class_intros do
+				stream.write line_separator
+				stream.write "* {c.name}"
+				var doc = c.intro.mdoc
+				if doc != null then stream.write ": {doc.content.first}"
+			end
+		end
+
+		# Introduced properties
+		var prop_intros = new Array[MPropDef]
+		for c in mclassdefs do
+			prop_intros.add_all c.collect_intro_mpropdefs(protected_visibility)
+		end
+
+		if prop_intros.not_empty then
+			alpha_comparator.sort prop_intros
+			stream.write line_separator*2
+			stream.write "## Introduced properties"
+			stream.write line_separator
+
+			for p in prop_intros do
+				p.mproperty.write_synopsis(mainmodule, stream)
+			end
+		end
+	end
+end
+
+redef class MProperty
+	private fun write_synopsis(mainmodule: MModule, stream: Writer)
+	do
+		if visibility == public_visibility then
+			stream.write "+ "
+		else stream.write "~ " # protected_visibility
+
+		if self isa MMethod then
+			if is_new and name != "new" then
+				stream.write "new "
+			else if is_init and name != "init" then
+				stream.write "init "
+			end
+		end
+
+		stream.write name
+
+		if self isa MMethod then
+			var intro = intro
+			assert intro isa MMethodDef
+			stream.write intro.msignature.to_s
+		end
+
+		var mdoc = intro.mdoc
+		if mdoc != null then
+			stream.write "  # "
+			stream.write mdoc.content.first
+		end
+		stream.write line_separator
+	end
+end
diff --git a/tests/UTF-8-test.txt b/tests/UTF-8-test.txt
new file mode 100644
index 0000000..abd16f7
Binary files /dev/null and b/tests/UTF-8-test.txt differ
diff --git a/tests/sav/nitpick_args1.res b/tests/sav/nitpick_args1.res
index f9e2cbb..84ff5ce 100644
--- a/tests/sav/nitpick_args1.res
+++ b/tests/sav/nitpick_args1.res
@@ -1,4 +1,3 @@
-../lib/standard/stream.nit:451,6--17: Documentation warning: Undocumented property `buffer_reset`
 test_advice_repeated_types.nit:36,15--20: Warning: useless type repetition on redefined attribute `_a`
 test_advice_repeated_types.nit:37,18--20: Warning: useless type repetition on parameter `b1` for redefined method `b`
 test_advice_repeated_types.nit:38,18--20: Warning: useless type repetition on parameter `c1` for redefined method `c`
diff --git a/tests/sav/test_read_all.res b/tests/sav/test_read_all.res
new file mode 100644
index 0000000..2f4cc2a
--- /dev/null
+++ b/tests/sav/test_read_all.res
@@ -0,0 +1 @@
+usage ./test_read_all file
diff --git a/tests/sav/test_read_all_args1.res b/tests/sav/test_read_all_args1.res
new file mode 100644
index 0000000..6228c6e
Binary files /dev/null and b/tests/sav/test_read_all_args1.res differ
diff --git a/tests/sav/test_string_bytes.res b/tests/sav/test_string_bytes.res
index 8d98756..2984735 100644
--- a/tests/sav/test_string_bytes.res
+++ b/tests/sav/test_string_bytes.res
@@ -4,9 +4,7 @@
 [0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
 [0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
 [0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54,0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
-This string is cool
-This string is coolA
-Ahis string is coolA
-This string is cool
-This string is coolA
-Ahis string is coolA
+[0x54,0x68,0x69,0x73,0x20,0x73,0x74,0x72,0x69,0x6e,0x67,0x20,0x69,0x73,0x20,0x63,0x6f,0x6f,0x6c]
+[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
+[0x54,0x68,0x69,0x73,0x20,0x73,0x74,0x72,0x69,0x6e,0x67,0x20,0x69,0x73,0x20,0x63,0x6f,0x6f,0x6c]
+[0x6c,0x6f,0x6f,0x63,0x20,0x73,0x69,0x20,0x67,0x6e,0x69,0x72,0x74,0x73,0x20,0x73,0x69,0x68,0x54]
diff --git a/tests/test_read_all.args b/tests/test_read_all.args
new file mode 100644
index 0000000..e497886
--- /dev/null
+++ b/tests/test_read_all.args
@@ -0,0 +1 @@
+UTF-8-test.txt
diff --git a/tests/test_read_all.nit b/tests/test_read_all.nit
new file mode 100644
index 0000000..4f2d7e0
--- /dev/null
+++ b/tests/test_read_all.nit
@@ -0,0 +1,26 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if args.is_empty then
+	print "usage ./test_read_all file"
+	exit -1
+end
+
+var file = new FileReader.open(args[0])
+
+var s = file.read_all
+
+for i in s do
+	printn i
+end
diff --git a/tests/test_string_bytes.nit b/tests/test_string_bytes.nit
index 875f5dc..13048d8 100644
--- a/tests/test_string_bytes.nit
+++ b/tests/test_string_bytes.nit
@@ -28,24 +28,10 @@ print z.bytes.reverse_iterator.to_a
 
 var b = new FlatBuffer.from(x)
 
-print b
-
-b.bytes.add 0x41u8
-
-print b
-
-b.bytes[0] = 0x41u8
-
-print b
+print b.bytes.to_a
+print b.bytes.reverse_iterator.to_a
 
 var c = new RopeBuffer.from(x)
 
-print c
-
-c.bytes.add 0x41u8
-
-print c
-
-c.bytes[0] = 0x41u8
-
-print c
+print c.bytes
+print c.bytes.reverse_iterator.to_a