From: Jean Privat Date: Thu, 20 Aug 2015 14:45:44 +0000 (-0400) Subject: Merge: Intro Codec X-Git-Tag: v0.7.8~80 X-Git-Url: http://nitlanguage.org?hp=-c Merge: Intro Codec As UTF-8 is now part of Nit, the standard imposes conforming implementations to properly handle borderline cases like overlong sequences and such. The codec defined here sanitizes an input before letting Nit play with it, avoiding potential security [issues](https://www.owasp.org/index.php/Canonicalization,_locale_and_Unicode) The codec architecture can also be used later to handle different codings for source files (that or we decide that all that is not UTF-8 is to be rejected/misinterpreted) or text. Pull-Request: #1628 Reviewed-by: Jean Privat Reviewed-by: Alexandre Terrasa --- a672b1d158eeb2cbf1dfb5316f6f37c558971832 diff --combined lib/standard/stream.nit index 0eabcba,deb42d8..2db319a --- a/lib/standard/stream.nit +++ b/lib/standard/stream.nit @@@ -14,6 -14,7 +14,7 @@@ module strea intrude import text::ropes import error intrude import bytes + import codecs in "C" `{ #include @@@ -43,6 -44,10 +44,10 @@@ en # A `Stream` that can be read from abstract class Reader super Stream + + # Decoder used to transform input bytes to UTF-8 + var decoder: Decoder = utf8_decoder is writable + # Reads a character. Returns `null` on EOF or timeout fun read_char: nullable Char is abstract @@@ -168,6 -173,7 +173,7 @@@ # ~~~ fun read_all: String do var s = read_all_bytes + if not s.is_utf8 then s = s.clean_utf8 var slen = s.length if slen == 0 then return "" var rets = "" @@@ -378,6 -384,9 +384,9 @@@ en abstract class Writer super Stream + # The coder from a nit UTF-8 String to the output file + var coder: Coder = utf8_coder is writable + # Writes bytes from `s` fun write_bytes(s: Bytes) is abstract @@@ -448,6 -457,7 +457,7 @@@ abstract class BufferedReade return c end + # Resets the internal buffer fun buffer_reset do _buffer_length = 0 _buffer_pos = 0 @@@ -518,13 -528,14 +528,13 @@@ do if last_error != null then return new Bytes.empty var s = new Bytes.with_capacity(10) + var b = _buffer while not eof do var j = _buffer_pos var k = _buffer_length - while j < k do - s.add(_buffer[j]) - j += 1 - end - _buffer_pos = j + var rd_sz = k - j + s.append_ns_from(b, rd_sz, j) + _buffer_pos = k fill_buffer end return s @@@ -532,6 -543,7 +542,7 @@@ redef fun append_line_to(s) do + var lb = new Bytes.with_capacity(10) loop # First phase: look for a '\n' var i = _buffer_pos @@@ -550,27 -562,29 +561,29 @@@ # if there is something to append if i > _buffer_pos then # Copy from the buffer to the string var j = _buffer_pos while j < i do - s.bytes.add(_buffer[j]) + lb.add(_buffer[j]) j += 1 end _buffer_pos = i else assert end_reached + s.append lb.to_s return end if eol then # so \n is found + s.append lb.to_s return else # so \n is not found - if end_reached then return + if end_reached then + s.append lb.to_s + return + end fill_buffer end end