stream: add `read_word` and `read_nonwhitespace`
[nit.git] / lib / standard / stream.nit
index f928fac..1e4ba18 100644 (file)
@@ -25,11 +25,11 @@ class IOError
        super Error
 end
 
-# Abstract stream class
-abstract class IOS
+# Any kind of stream to read/write/both to or from a source
+abstract class Stream
        # Error produced by the file stream
        #
-       #     var ifs = new IFStream.open("donotmakethisfile.binx")
+       #     var ifs = new FileReader.open("donotmakethisfile.binx")
        #     ifs.read_all
        #     ifs.close
        #     assert ifs.last_error != null
@@ -39,9 +39,9 @@ abstract class IOS
        fun close is abstract
 end
 
-# Abstract input streams
-abstract class IStream
-       super IOS
+# A `Stream` that can be read from
+abstract class Reader
+       super Stream
        # Read a character. Return its ASCII value, -1 on EOF or timeout
        fun read_char: Int is abstract
 
@@ -61,16 +61,103 @@ abstract class IStream
        end
 
        # Read a string until the end of the line.
+       #
+       # The line terminator '\n' and '\r\n', if any, is removed in each line.
+       #
+       # ~~~
+       # var txt = "Hello\n\nWorld\n"
+       # var i = new StringReader(txt)
+       # assert i.read_line == "Hello"
+       # assert i.read_line == ""
+       # assert i.read_line == "World"
+       # assert i.eof
+       # ~~~
+       #
+       # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
+       # the end or file (EOF) is considered to delimit the end of lines.
+       # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
+       #
+       # ~~~
+       # var txt2 = "Hello\r\n\n\rWorld"
+       # var i2 = new StringReader(txt2)
+       # assert i2.read_line == "Hello"
+       # assert i2.read_line == ""
+       # assert i2.read_line == "\rWorld"
+       # assert i2.eof
+       # ~~~
+       #
+       # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
        fun read_line: String
        do
                if last_error != null then return ""
-               assert not eof
+               if eof then return ""
                var s = new FlatBuffer
                append_line_to(s)
-               return s.to_s
+               return s.to_s.chomp
+       end
+
+       # Read all the lines until the eof.
+       #
+       # The line terminator '\n' and `\r\n` is removed in each line,
+       #
+       # ~~~
+       # var txt = "Hello\n\nWorld\n"
+       # var i = new StringReader(txt)
+       # assert i.read_lines == ["Hello", "", "World"]
+       # ~~~
+       #
+       # This method is more efficient that splitting
+       # the result of `read_all`.
+       #
+       # NOTE: SEE `read_line` for details.
+       fun read_lines: Array[String]
+       do
+               var res = new Array[String]
+               while not eof do
+                       res.add read_line
+               end
+               return res
        end
 
+       # Return an iterator that read each line.
+       #
+       # The line terminator '\n' and `\r\n` is removed in each line,
+       # The line are read with `read_line`. See this method for details.
+       #
+       # ~~~
+       # var txt = "Hello\n\nWorld\n"
+       # var i = new StringReader(txt)
+       # assert i.each_line.to_a == ["Hello", "", "World"]
+       # ~~~
+       #
+       # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
+       # Therefore, the stream should no be closed until the end of the stream.
+       #
+       # ~~~
+       # i = new StringReader(txt)
+       # var el = i.each_line
+       #
+       # assert el.item == "Hello"
+       # el.next
+       # assert el.item == ""
+       # el.next
+       #
+       # i.close
+       #
+       # assert not el.is_ok
+       # # closed before "world" is read
+       # ~~~
+       fun each_line: LineIterator do return new LineIterator(self)
+
        # Read all the stream until the eof.
+       #
+       # The content of the file is returned verbatim.
+       #
+       # ~~~
+       # var txt = "Hello\n\nWorld\n"
+       # var i = new StringReader(txt)
+       # assert i.read_all == txt
+       # ~~~
        fun read_all: String
        do
                if last_error != null then return ""
@@ -83,6 +170,38 @@ abstract class IStream
        end
 
        # Read a string until the end of the line and append it to `s`.
+       #
+       # Unlike `read_line` and other related methods,
+       # the line terminator '\n', if any, is preserved in each line.
+       # Use the method `Text::chomp` to safely remove it.
+       #
+       # ~~~
+       # var txt = "Hello\n\nWorld\n"
+       # var i = new StringReader(txt)
+       # var b = new FlatBuffer
+       # i.append_line_to(b)
+       # assert b == "Hello\n"
+       # i.append_line_to(b)
+       # assert b == "Hello\n\n"
+       # i.append_line_to(b)
+       # assert b == txt
+       # assert i.eof
+       # ~~~
+       #
+       # If `\n` is not present at the end of the result, it means that
+       # a non-eol terminated last line was returned.
+       #
+       # ~~~
+       # var i2 = new StringReader("hello")
+       # assert not i2.eof
+       # var b2 = new FlatBuffer
+       # i2.append_line_to(b2)
+       # assert b2 == "hello"
+       # assert i2.eof
+       # ~~~
+       #
+       # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
+       # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
        fun append_line_to(s: Buffer)
        do
                if last_error != null then return
@@ -101,20 +220,127 @@ abstract class IStream
        # Is there something to read.
        # This function returns 'false' if there is something to read.
        fun eof: Bool is abstract
+
+       # Read the next sequence of non whitespace characters.
+       #
+       # Leading whitespace characters are skipped.
+       # The first whitespace character that follows the result is consumed.
+       #
+       # An empty string is returned if the end of the file or an error is encounter.
+       #
+       # ~~~
+       # var w = new StringReader(" Hello, \n\t World!")
+       # assert w.read_word == "Hello,"
+       # assert w.read_char == '\n'.ascii
+       # assert w.read_word == "World!"
+       # assert w.read_word == ""
+       # ~~~
+       #
+       # `Char::is_whitespace` determines what is a whitespace.
+       fun read_word: String
+       do
+               var buf = new FlatBuffer
+               var c = read_nonwhitespace
+               if c > 0 then
+                       buf.add(c.ascii)
+                       while not eof do
+                               c = read_char
+                               if c < 0 then break
+                               var a = c.ascii
+                               if a.is_whitespace then break
+                               buf.add(a)
+                       end
+               end
+               var res = buf.to_s
+               return res
+       end
+
+       # Skip whitespace characters (if any) then return the following non-whitespace character.
+       #
+       # Returns the code point of the character.
+       # Return -1 on end of file or error.
+       #
+       # In fact, this method works like `read_char` except it skips whitespace.
+       #
+       # ~~~
+       # var w = new StringReader(" \nab\tc")
+       # assert w.read_nonwhitespace == 'a'.ascii
+       # assert w.read_nonwhitespace == 'b'.ascii
+       # assert w.read_nonwhitespace == 'c'.ascii
+       # assert w.read_nonwhitespace == -1
+       # ~~~
+       #
+       # `Char::is_whitespace` determines what is a whitespace.
+       fun read_nonwhitespace: Int
+       do
+               var c = -1
+               while not eof do
+                       c = read_char
+                       if c < 0 or not c.ascii.is_whitespace then break
+               end
+               return c
+       end
 end
 
-# IStream capable of declaring if readable without blocking
-abstract class PollableIStream
-       super IStream
+# Iterator returned by `Reader::each_line`.
+# See the aforementioned method for details.
+class LineIterator
+       super Iterator[String]
+
+       # The original stream
+       var stream: Reader
+
+       redef fun is_ok
+       do
+               var res = not stream.eof
+               if not res and close_on_finish then stream.close
+               return res
+       end
+
+       redef fun item
+       do
+               var line = self.line
+               if line == null then
+                       line = stream.read_line
+               end
+               self.line = line
+               return line
+       end
+
+       # The last line read (cache)
+       private var line: nullable String = null
+
+       redef fun next
+       do
+               # force the read
+               if line == null then item
+               # drop the line
+               line = null
+       end
+
+       # Close the stream when the stream is at the EOF.
+       #
+       # Default is false.
+       var close_on_finish = false is writable
+
+       redef fun finish
+       do
+               if close_on_finish then stream.close
+       end
+end
+
+# `Reader` capable of declaring if readable without blocking
+abstract class PollableReader
+       super Reader
 
        # Is there something to read? (without blocking)
        fun poll_in: Bool is abstract
 
 end
 
-# Abstract output stream
-abstract class OStream
-       super IOS
+# A `Stream` that can be written to
+abstract class Writer
+       super Stream
        # write a string
        fun write(s: Text) is abstract
 
@@ -122,16 +348,16 @@ abstract class OStream
        fun is_writable: Bool is abstract
 end
 
-# Things that can be efficienlty writen to a OStream
+# Things that can be efficienlty written to a `Writer`
 #
-# The point of this interface it to allow is instance to be efficenty
-# writen into a OStream without having to allocate a big String object
+# The point of this interface is to allow the instance to be efficiently
+# written into a `Writer`.
 #
-# ready-to-save documents usually provide this interface.
-interface Streamable
+# Ready-to-save documents usually provide this interface.
+interface Writable
        # Write itself to a `stream`
        # The specific logic it let to the concrete subclasses
-       fun write_to(stream: OStream) is abstract
+       fun write_to(stream: Writer) is abstract
 
        # Like `write_to` but return a new String (may be quite large)
        #
@@ -140,28 +366,25 @@ interface Streamable
        # stream without having to allocate and concatenate strings
        fun write_to_string: String
        do
-               var stream = new StringOStream
+               var stream = new StringWriter
                write_to(stream)
                return stream.to_s
        end
 end
 
 redef class Text
-       super Streamable
+       super Writable
        redef fun write_to(stream) do stream.write(self)
 end
 
-# Input streams with a buffer
-abstract class BufferedIStream
-       super IStream
+# Input streams with a buffered input for efficiency purposes
+abstract class BufferedReader
+       super Reader
        redef fun read_char
        do
-               if last_error != null then return 0
-               if eof then last_error = new IOError("Stream has reached eof")
-               if _buffer_pos >= _buffer.length then
-                       fill_buffer
-               end
-               if _buffer_pos >= _buffer.length then
+               if last_error != null then return -1
+               if eof then
+                       last_error = new IOError("Stream has reached eof")
                        return -1
                end
                var c = _buffer.chars[_buffer_pos]
@@ -174,7 +397,6 @@ abstract class BufferedIStream
                if last_error != null then return ""
                if _buffer.length == _buffer_pos then
                        if not eof then
-                               fill_buffer
                                return read(i)
                        end
                        return ""
@@ -212,6 +434,15 @@ abstract class BufferedIStream
                        var i = _buffer_pos
                        while i < _buffer.length and _buffer.chars[i] != '\n' do i += 1
 
+                       var eol
+                       if i < _buffer.length then
+                               assert _buffer.chars[i] == '\n'
+                               i += 1
+                               eol = true
+                       else
+                               eol = false
+                       end
+
                        # if there is something to append
                        if i > _buffer_pos then
                                # Enlarge the string (if needed)
@@ -223,25 +454,30 @@ abstract class BufferedIStream
                                        s.add(_buffer.chars[j])
                                        j += 1
                                end
+                               _buffer_pos = i
+                       else
+                               assert end_reached
+                               return
                        end
 
-                       if i < _buffer.length then
-                               # so \n is in _buffer[i]
-                               _buffer_pos = i + 1 # skip \n
+                       if eol then
+                               # so \n is found
                                return
                        else
                                # so \n is not found
-                               _buffer_pos = i
-                               if end_reached then
-                                       return
-                               else
-                                       fill_buffer
-                               end
+                               if end_reached then return
+                               fill_buffer
                        end
                end
        end
 
-       redef fun eof do return _buffer_pos >= _buffer.length and end_reached
+       redef fun eof
+       do
+               if _buffer_pos < _buffer.length then return false
+               if end_reached then return true
+               fill_buffer
+               return _buffer_pos >= _buffer.length and end_reached
+       end
 
        # The buffer
        private var buffer: nullable FlatBuffer = null
@@ -263,16 +499,17 @@ abstract class BufferedIStream
        end
 end
 
-abstract class IOStream
-       super IStream
-       super OStream
+# A `Stream` that can be written to and read from
+abstract class Duplex
+       super Reader
+       super Writer
 end
 
-# Stream to a String.
+# `Stream` that can be used to write to a `String`
 #
-# Mainly used for compatibility with OStream type and tests.
-class StringOStream
-       super OStream
+# Mainly used for compatibility with Writer type and tests.
+class StringWriter
+       super Writer
 
        private var content = new Array[String]
        redef fun to_s do return content.to_s
@@ -283,15 +520,17 @@ class StringOStream
                content.add(str.to_s)
        end
 
+       # Is the stream closed?
        protected var closed = false
+
        redef fun close do closed = true
 end
 
-# Stream from a String.
+# `Stream` used to read from a `String`
 #
-# Mainly used for compatibility with IStream type and tests.
-class StringIStream
-       super IStream
+# Mainly used for compatibility with Reader type and tests.
+class StringReader
+       super Reader
 
        # The string to read from.
        var source: String
@@ -314,5 +553,12 @@ class StringIStream
                source = ""
        end
 
+       redef fun read_all do
+               var c = cursor
+               cursor = source.length
+               if c == 0 then return source
+               return source.substring_from(c)
+       end
+
        redef fun eof do return cursor >= source.length
 end