Merge: Intro Codec
[nit.git] / lib / standard / stream.nit
index 7df1bd1..2db319a 100644 (file)
 # Input and output streams of characters
 module stream
 
-intrude import ropes
+intrude import text::ropes
 import error
+intrude import bytes
+import codecs
 
 in "C" `{
        #include <unistd.h>
@@ -42,22 +44,33 @@ end
 # A `Stream` that can be read from
 abstract class Reader
        super Stream
-       # Read a character. Return its ASCII value, -1 on EOF or timeout
-       fun read_char: Int is abstract
+
+       # Decoder used to transform input bytes to UTF-8
+       var decoder: Decoder = utf8_decoder is writable
+
+       # Reads a character. Returns `null` on EOF or timeout
+       fun read_char: nullable Char is abstract
+
+       # Reads a byte. Returns `null` on EOF or timeout
+       fun read_byte: nullable Byte is abstract
+
+       # Reads a String of at most `i` length
+       fun read(i: Int): String do return read_bytes(i).to_s
 
        # Read at most i bytes
-       fun read(i: Int): String
+       fun read_bytes(i: Int): Bytes
        do
-               if last_error != null then return ""
-               var s = new FlatBuffer.with_capacity(i)
+               if last_error != null then return new Bytes.empty
+               var s = new NativeString(i)
+               var buf = new Bytes(s, 0, 0)
                while i > 0 and not eof do
-                       var c = read_char
-                       if c >= 0 then
-                               s.add(c.ascii)
+                       var c = read_byte
+                       if c != null then
+                               buf.add c
                                i -= 1
                        end
                end
-               return s.to_s
+               return buf
        end
 
        # Read a string until the end of the line.
@@ -151,22 +164,53 @@ abstract class Reader
 
        # Read all the stream until the eof.
        #
-       # The content of the file is returned verbatim.
+       # The content of the file is returned as a String.
        #
        # ~~~
        # var txt = "Hello\n\nWorld\n"
        # var i = new StringReader(txt)
        # assert i.read_all == txt
        # ~~~
-       fun read_all: String
+       fun read_all: String do
+               var s = read_all_bytes
+               if not s.is_utf8 then s = s.clean_utf8
+               var slen = s.length
+               if slen == 0 then return ""
+               var rets = ""
+               var pos = 0
+               var sits = s.items
+               var remsp = slen
+               while pos < slen do
+                       # The 129 size was decided more or less arbitrarily
+                       # It will require some more benchmarking to compute
+                       # if this is the best size or not
+                       var chunksz = 129
+                       if chunksz > remsp then
+                               rets += new FlatString.with_infos(sits, remsp, pos, pos + remsp - 1)
+                               break
+                       end
+                       var st = sits.find_beginning_of_char_at(pos + chunksz - 1)
+                       var bytelen = st - pos
+                       rets += new FlatString.with_infos(sits, bytelen, pos, st - 1)
+                       pos = st
+                       remsp -= bytelen
+               end
+               if rets isa Concat then return rets.balance
+               return rets
+       end
+
+       # Read all the stream until the eof.
+       #
+       # The content of the file is returned verbatim.
+       fun read_all_bytes: Bytes
        do
-               if last_error != null then return ""
-               var s = new FlatBuffer
+               if last_error != null then return new Bytes.empty
+               var s = new Bytes.empty
                while not eof do
-                       var c = read_char
-                       if c >= 0 then s.add(c.ascii)
+                       var c = read_byte
+                       if c != null then s.add(c)
                end
-               return s.to_s
+               return s
        end
 
        # Read a string until the end of the line and append it to `s`.
@@ -207,12 +251,11 @@ abstract class Reader
                if last_error != null then return
                loop
                        var x = read_char
-                       if x == -1 then
+                       if x == null then
                                if eof then return
                        else
-                               var c = x.ascii
-                               s.chars.push(c)
-                               if c == '\n' then return
+                               s.chars.push(x)
+                               if x == '\n' then return
                        end
                end
        end
@@ -241,14 +284,13 @@ abstract class Reader
        do
                var buf = new FlatBuffer
                var c = read_nonwhitespace
-               if c > 0 then
-                       buf.add(c.ascii)
+               if c != null then
+                       buf.add(c)
                        while not eof do
                                c = read_char
-                               if c < 0 then break
-                               var a = c.ascii
-                               if a.is_whitespace then break
-                               buf.add(a)
+                               if c == null then break
+                               if c.is_whitespace then break
+                               buf.add(c)
                        end
                end
                var res = buf.to_s
@@ -258,25 +300,25 @@ abstract class Reader
        # Skip whitespace characters (if any) then return the following non-whitespace character.
        #
        # Returns the code point of the character.
-       # Return -1 on end of file or error.
+       # Returns `null` on end of file or error.
        #
        # In fact, this method works like `read_char` except it skips whitespace.
        #
        # ~~~
        # var w = new StringReader(" \nab\tc")
-       # assert w.read_nonwhitespace == 'a'.ascii
-       # assert w.read_nonwhitespace == 'b'.ascii
-       # assert w.read_nonwhitespace == 'c'.ascii
-       # assert w.read_nonwhitespace == -1
+       # assert w.read_nonwhitespace == 'a'
+       # assert w.read_nonwhitespace == 'b'
+       # assert w.read_nonwhitespace == 'c'
+       # assert w.read_nonwhitespace == null
        # ~~~
        #
        # `Char::is_whitespace` determines what is a whitespace.
-       fun read_nonwhitespace: Int
+       fun read_nonwhitespace: nullable Char
        do
-               var c = -1
+               var c: nullable Char = null
                while not eof do
                        c = read_char
-                       if c < 0 or not c.ascii.is_whitespace then break
+                       if c == null or not c.is_whitespace then break
                end
                return c
        end
@@ -341,9 +383,19 @@ end
 # A `Stream` that can be written to
 abstract class Writer
        super Stream
+
+       # The coder from a nit UTF-8 String to the output file
+       var coder: Coder = utf8_coder is writable
+
+       # Writes bytes from `s`
+       fun write_bytes(s: Bytes) is abstract
+
        # write a string
        fun write(s: Text) is abstract
 
+       # Write a single byte
+       fun write_byte(value: Byte) is abstract
+
        # Can the stream be used to write
        fun is_writable: Bool is abstract
 end
@@ -361,7 +413,7 @@ interface Writable
 
        # Like `write_to` but return a new String (may be quite large)
        #
-       # This funtionnality is anectodical, since the point
+       # This funtionality is anectodical, since the point
        # of streamable object to to be efficienlty written to a
        # stream without having to allocate and concatenate strings
        fun write_to_string: String
@@ -382,62 +434,125 @@ abstract class BufferedReader
        super Reader
        redef fun read_char
        do
-               if last_error != null then return -1
+               if last_error != null then return null
                if eof then
                        last_error = new IOError("Stream has reached eof")
-                       return -1
+                       return null
                end
-               var c = _buffer.chars[_buffer_pos]
+               # TODO: Fix when supporting UTF-8
+               var c = _buffer[_buffer_pos].to_i.ascii
                _buffer_pos += 1
-               return c.ascii
+               return c
        end
 
-       redef fun read(i)
+       redef fun read_byte
        do
-               if last_error != null then return ""
-               if _buffer.length == _buffer_pos then
-                       if not eof then
-                               return read(i)
-                       end
-                       return ""
+               if last_error != null then return null
+               if eof then
+                       last_error = new IOError("Stream has reached eof")
+                       return null
+               end
+               var c = _buffer[_buffer_pos]
+               _buffer_pos += 1
+               return c
+       end
+
+       # Resets the internal buffer
+       fun buffer_reset do
+               _buffer_length = 0
+               _buffer_pos = 0
+       end
+
+       # Peeks up to `n` bytes in the buffer
+       #
+       # The operation does not consume the buffer
+       #
+       # ~~~nitish
+       # var x = new FileReader.open("File.txt")
+       # assert x.peek(5) == x.read(5)
+       # ~~~
+       fun peek(i: Int): Bytes do
+               if eof then return new Bytes.empty
+               var remsp = _buffer_length - _buffer_pos
+               if i <= remsp then
+                       var bf = new Bytes.with_capacity(i)
+                       bf.append_ns_from(_buffer, i, _buffer_pos)
+                       return bf
                end
-               if _buffer_pos + i >= _buffer.length then
-                       var from = _buffer_pos
-                       _buffer_pos = _buffer.length
-                       if from == 0 then return _buffer.to_s
-                       return _buffer.substring_from(from).to_s
+               var bf = new Bytes.with_capacity(i)
+               bf.append_ns_from(_buffer, remsp, _buffer_pos)
+               _buffer_pos = _buffer_length
+               read_intern(i - bf.length, bf)
+               remsp = _buffer_length - _buffer_pos
+               var full_len = bf.length + remsp
+               if full_len > _buffer_capacity then
+                       var c = _buffer_capacity
+                       while c < full_len do c = c * 2 + 2
+                       _buffer_capacity = c
                end
-               _buffer_pos += i
-               return _buffer.substring(_buffer_pos - i, i).to_s
+               var nns = new NativeString(_buffer_capacity)
+               bf.items.copy_to(nns, bf.length, 0, 0)
+               _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
+               _buffer = nns
+               _buffer_pos = 0
+               _buffer_length = full_len
+               return bf
        end
 
-       redef fun read_all
+       redef fun read_bytes(i)
        do
-               if last_error != null then return ""
-               var s = new FlatBuffer
+               if last_error != null then return new Bytes.empty
+               var buf = new Bytes.with_capacity(i)
+               read_intern(i, buf)
+               return buf
+       end
+
+       # Fills `buf` with at most `i` bytes read from `self`
+       private fun read_intern(i: Int, buf: Bytes): Int do
+               if eof then return 0
+               var p = _buffer_pos
+               var bufsp = _buffer_length - p
+               if bufsp >= i then
+                       _buffer_pos += i
+                       buf.append_ns_from(_buffer, i, p)
+                       return i
+               end
+               _buffer_pos = _buffer_length
+               var readln = _buffer_length - p
+               buf.append_ns_from(_buffer, readln, p)
+               var rd = read_intern(i - readln, buf)
+               return rd + readln
+       end
+
+       redef fun read_all_bytes
+       do
+               if last_error != null then return new Bytes.empty
+               var s = new Bytes.with_capacity(10)
+               var b = _buffer
                while not eof do
                        var j = _buffer_pos
-                       var k = _buffer.length
-                       while j < k do
-                               s.add(_buffer[j])
-                               j += 1
-                       end
-                       _buffer_pos = j
+                       var k = _buffer_length
+                       var rd_sz = k - j
+                       s.append_ns_from(b, rd_sz, j)
+                       _buffer_pos = k
                        fill_buffer
                end
-               return s.to_s
+               return s
        end
 
        redef fun append_line_to(s)
        do
+               var lb = new Bytes.with_capacity(10)
                loop
                        # First phase: look for a '\n'
                        var i = _buffer_pos
-                       while i < _buffer.length and _buffer.chars[i] != '\n' do i += 1
+                       while i < _buffer_length and _buffer[i] != 0xAu8 do
+                               i += 1
+                       end
 
                        var eol
-                       if i < _buffer.length then
-                               assert _buffer.chars[i] == '\n'
+                       if i < _buffer_length then
+                               assert _buffer[i] == 0xAu8
                                i += 1
                                eol = true
                        else
@@ -446,27 +561,29 @@ abstract class BufferedReader
 
                        # if there is something to append
                        if i > _buffer_pos then
-                               # Enlarge the string (if needed)
-                               s.enlarge(s.length + i - _buffer_pos)
-
                                # Copy from the buffer to the string
                                var j = _buffer_pos
                                while j < i do
-                                       s.add(_buffer.chars[j])
+                                       lb.add(_buffer[j])
                                        j += 1
                                end
                                _buffer_pos = i
                        else
                                assert end_reached
+                               s.append lb.to_s
                                return
                        end
 
                        if eol then
                                # so \n is found
+                               s.append lb.to_s
                                return
                        else
                                # so \n is not found
-                               if end_reached then return
+                               if end_reached then
+                                       s.append lb.to_s
+                                       return
+                               end
                                fill_buffer
                        end
                end
@@ -474,29 +591,37 @@ abstract class BufferedReader
 
        redef fun eof
        do
-               if _buffer_pos < _buffer.length then return false
+               if _buffer_pos < _buffer_length then return false
                if end_reached then return true
                fill_buffer
-               return _buffer_pos >= _buffer.length and end_reached
+               return _buffer_pos >= _buffer_length and end_reached
        end
 
        # The buffer
-       private var buffer: nullable FlatBuffer = null
+       private var buffer: NativeString = new NativeString(0)
 
        # The current position in the buffer
-       private var buffer_pos: Int = 0
+       private var buffer_pos = 0
+
+       # Length of the current buffer (i.e. nuber of bytes in the buffer)
+       private var buffer_length = 0
+
+       # Capacity of the buffer
+       private var buffer_capacity = 0
 
        # Fill the buffer
        protected fun fill_buffer is abstract
 
-       # Is the last fill_buffer reach the end
+       # Has the last fill_buffer reached the end
        protected fun end_reached: Bool is abstract
 
        # Allocate a `_buffer` for a given `capacity`.
        protected fun prepare_buffer(capacity: Int)
        do
-               _buffer = new FlatBuffer.with_capacity(capacity)
+               _buffer = new NativeString(capacity)
                _buffer_pos = 0 # need to read
+               _buffer_length = 0
+               _buffer_capacity = capacity
        end
 end
 
@@ -513,8 +638,13 @@ class StringWriter
        super Writer
 
        private var content = new Array[String]
-       redef fun to_s do return content.to_s
+       redef fun to_s do return content.plain_to_s
        redef fun is_writable do return not closed
+
+       redef fun write_bytes(b) do
+               content.add(b.to_s)
+       end
+
        redef fun write(str)
        do
                assert not closed
@@ -536,17 +666,27 @@ class StringReader
        # The string to read from.
        var source: String
 
-       # The current position in the string.
+       # The current position in the string (bytewise).
        private var cursor: Int = 0
 
        redef fun read_char do
                if cursor < source.length then
-                       var c = source[cursor].ascii
+                       # Fix when supporting UTF-8
+                       var c = source[cursor]
+                       cursor += 1
+                       return c
+               else
+                       return null
+               end
+       end
 
+       redef fun read_byte do
+               if cursor < source.length then
+                       var c = source.bytes[cursor]
                        cursor += 1
                        return c
                else
-                       return -1
+                       return null
                end
        end
 
@@ -554,12 +694,12 @@ class StringReader
                source = ""
        end
 
-       redef fun read_all do
-               var c = cursor
-               cursor = source.length
-               if c == 0 then return source
-               return source.substring_from(c)
+       redef fun read_all_bytes do
+               var nslen = source.length - cursor
+               var nns = new NativeString(nslen)
+               source.copy_to_native(nns, nslen, cursor, 0)
+               return new Bytes(nns, nslen, nslen)
        end
 
-       redef fun eof do return cursor >= source.length
+       redef fun eof do return cursor >= source.bytelen
 end