# Input and output streams of characters
module stream
-intrude import ropes
+intrude import text::ropes
import error
+intrude import bytes
+import codecs
in "C" `{
#include <unistd.h>
# A `Stream` that can be read from
abstract class Reader
super Stream
- # Read a character. Return its ASCII value, -1 on EOF or timeout
- fun read_char: Int is abstract
+
+ # Decoder used to transform input bytes to UTF-8
+ var decoder: Decoder = utf8_decoder is writable
+
+ # Reads a character. Returns `null` on EOF or timeout
+ fun read_char: nullable Char is abstract
+
+ # Reads a byte. Returns `null` on EOF or timeout
+ fun read_byte: nullable Byte is abstract
+
+ # Reads a String of at most `i` length
+ fun read(i: Int): String do return read_bytes(i).to_s
# Read at most i bytes
- fun read(i: Int): String
+ fun read_bytes(i: Int): Bytes
do
- if last_error != null then return ""
- var s = new FlatBuffer.with_capacity(i)
+ if last_error != null then return new Bytes.empty
+ var s = new NativeString(i)
+ var buf = new Bytes(s, 0, 0)
while i > 0 and not eof do
- var c = read_char
- if c >= 0 then
- s.add(c.ascii)
+ var c = read_byte
+ if c != null then
+ buf.add c
i -= 1
end
end
- return s.to_s
+ return buf
end
# Read a string until the end of the line.
# Read all the stream until the eof.
#
- # The content of the file is returned verbatim.
+ # The content of the file is returned as a String.
#
# ~~~
# var txt = "Hello\n\nWorld\n"
# var i = new StringReader(txt)
# assert i.read_all == txt
# ~~~
- fun read_all: String
+ fun read_all: String do
+ var s = read_all_bytes
+ if not s.is_utf8 then s = s.clean_utf8
+ var slen = s.length
+ if slen == 0 then return ""
+ var rets = ""
+ var pos = 0
+ var sits = s.items
+ var remsp = slen
+ while pos < slen do
+ # The 129 size was decided more or less arbitrarily
+ # It will require some more benchmarking to compute
+ # if this is the best size or not
+ var chunksz = 129
+ if chunksz > remsp then
+ rets += new FlatString.with_infos(sits, remsp, pos, pos + remsp - 1)
+ break
+ end
+ var st = sits.find_beginning_of_char_at(pos + chunksz - 1)
+ var bytelen = st - pos
+ rets += new FlatString.with_infos(sits, bytelen, pos, st - 1)
+ pos = st
+ remsp -= bytelen
+ end
+ if rets isa Concat then return rets.balance
+ return rets
+ end
+
+ # Read all the stream until the eof.
+ #
+ # The content of the file is returned verbatim.
+ fun read_all_bytes: Bytes
do
- if last_error != null then return ""
- var s = new FlatBuffer
+ if last_error != null then return new Bytes.empty
+ var s = new Bytes.empty
while not eof do
- var c = read_char
- if c >= 0 then s.add(c.ascii)
+ var c = read_byte
+ if c != null then s.add(c)
end
- return s.to_s
+ return s
end
# Read a string until the end of the line and append it to `s`.
if last_error != null then return
loop
var x = read_char
- if x == -1 then
+ if x == null then
if eof then return
else
- var c = x.ascii
- s.chars.push(c)
- if c == '\n' then return
+ s.chars.push(x)
+ if x == '\n' then return
end
end
end
do
var buf = new FlatBuffer
var c = read_nonwhitespace
- if c > 0 then
- buf.add(c.ascii)
+ if c != null then
+ buf.add(c)
while not eof do
c = read_char
- if c < 0 then break
- var a = c.ascii
- if a.is_whitespace then break
- buf.add(a)
+ if c == null then break
+ if c.is_whitespace then break
+ buf.add(c)
end
end
var res = buf.to_s
# Skip whitespace characters (if any) then return the following non-whitespace character.
#
# Returns the code point of the character.
- # Return -1 on end of file or error.
+ # Returns `null` on end of file or error.
#
# In fact, this method works like `read_char` except it skips whitespace.
#
# ~~~
# var w = new StringReader(" \nab\tc")
- # assert w.read_nonwhitespace == 'a'.ascii
- # assert w.read_nonwhitespace == 'b'.ascii
- # assert w.read_nonwhitespace == 'c'.ascii
- # assert w.read_nonwhitespace == -1
+ # assert w.read_nonwhitespace == 'a'
+ # assert w.read_nonwhitespace == 'b'
+ # assert w.read_nonwhitespace == 'c'
+ # assert w.read_nonwhitespace == null
# ~~~
#
# `Char::is_whitespace` determines what is a whitespace.
- fun read_nonwhitespace: Int
+ fun read_nonwhitespace: nullable Char
do
- var c = -1
+ var c: nullable Char = null
while not eof do
c = read_char
- if c < 0 or not c.ascii.is_whitespace then break
+ if c == null or not c.is_whitespace then break
end
return c
end
# A `Stream` that can be written to
abstract class Writer
super Stream
+
+ # The coder from a nit UTF-8 String to the output file
+ var coder: Coder = utf8_coder is writable
+
+ # Writes bytes from `s`
+ fun write_bytes(s: Bytes) is abstract
+
# write a string
fun write(s: Text) is abstract
+ # Write a single byte
+ fun write_byte(value: Byte) is abstract
+
# Can the stream be used to write
fun is_writable: Bool is abstract
end
# Like `write_to` but return a new String (may be quite large)
#
- # This funtionnality is anectodical, since the point
+ # This funtionality is anectodical, since the point
# of streamable object to to be efficienlty written to a
# stream without having to allocate and concatenate strings
fun write_to_string: String
super Reader
redef fun read_char
do
- if last_error != null then return -1
+ if last_error != null then return null
if eof then
last_error = new IOError("Stream has reached eof")
- return -1
+ return null
end
- var c = _buffer.chars[_buffer_pos]
+ # TODO: Fix when supporting UTF-8
+ var c = _buffer[_buffer_pos].to_i.ascii
_buffer_pos += 1
- return c.ascii
+ return c
end
- redef fun read(i)
+ redef fun read_byte
do
- if last_error != null then return ""
- if _buffer.length == _buffer_pos then
- if not eof then
- return read(i)
- end
- return ""
+ if last_error != null then return null
+ if eof then
+ last_error = new IOError("Stream has reached eof")
+ return null
+ end
+ var c = _buffer[_buffer_pos]
+ _buffer_pos += 1
+ return c
+ end
+
+ # Resets the internal buffer
+ fun buffer_reset do
+ _buffer_length = 0
+ _buffer_pos = 0
+ end
+
+ # Peeks up to `n` bytes in the buffer
+ #
+ # The operation does not consume the buffer
+ #
+ # ~~~nitish
+ # var x = new FileReader.open("File.txt")
+ # assert x.peek(5) == x.read(5)
+ # ~~~
+ fun peek(i: Int): Bytes do
+ if eof then return new Bytes.empty
+ var remsp = _buffer_length - _buffer_pos
+ if i <= remsp then
+ var bf = new Bytes.with_capacity(i)
+ bf.append_ns_from(_buffer, i, _buffer_pos)
+ return bf
end
- if _buffer_pos + i >= _buffer.length then
- var from = _buffer_pos
- _buffer_pos = _buffer.length
- if from == 0 then return _buffer.to_s
- return _buffer.substring_from(from).to_s
+ var bf = new Bytes.with_capacity(i)
+ bf.append_ns_from(_buffer, remsp, _buffer_pos)
+ _buffer_pos = _buffer_length
+ read_intern(i - bf.length, bf)
+ remsp = _buffer_length - _buffer_pos
+ var full_len = bf.length + remsp
+ if full_len > _buffer_capacity then
+ var c = _buffer_capacity
+ while c < full_len do c = c * 2 + 2
+ _buffer_capacity = c
end
- _buffer_pos += i
- return _buffer.substring(_buffer_pos - i, i).to_s
+ var nns = new NativeString(_buffer_capacity)
+ bf.items.copy_to(nns, bf.length, 0, 0)
+ _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
+ _buffer = nns
+ _buffer_pos = 0
+ _buffer_length = full_len
+ return bf
end
- redef fun read_all
+ redef fun read_bytes(i)
do
- if last_error != null then return ""
- var s = new FlatBuffer
+ if last_error != null then return new Bytes.empty
+ var buf = new Bytes.with_capacity(i)
+ read_intern(i, buf)
+ return buf
+ end
+
+ # Fills `buf` with at most `i` bytes read from `self`
+ private fun read_intern(i: Int, buf: Bytes): Int do
+ if eof then return 0
+ var p = _buffer_pos
+ var bufsp = _buffer_length - p
+ if bufsp >= i then
+ _buffer_pos += i
+ buf.append_ns_from(_buffer, i, p)
+ return i
+ end
+ _buffer_pos = _buffer_length
+ var readln = _buffer_length - p
+ buf.append_ns_from(_buffer, readln, p)
+ var rd = read_intern(i - readln, buf)
+ return rd + readln
+ end
+
+ redef fun read_all_bytes
+ do
+ if last_error != null then return new Bytes.empty
+ var s = new Bytes.with_capacity(10)
+ var b = _buffer
while not eof do
var j = _buffer_pos
- var k = _buffer.length
- while j < k do
- s.add(_buffer[j])
- j += 1
- end
- _buffer_pos = j
+ var k = _buffer_length
+ var rd_sz = k - j
+ s.append_ns_from(b, rd_sz, j)
+ _buffer_pos = k
fill_buffer
end
- return s.to_s
+ return s
end
redef fun append_line_to(s)
do
+ var lb = new Bytes.with_capacity(10)
loop
# First phase: look for a '\n'
var i = _buffer_pos
- while i < _buffer.length and _buffer.chars[i] != '\n' do i += 1
+ while i < _buffer_length and _buffer[i] != 0xAu8 do
+ i += 1
+ end
var eol
- if i < _buffer.length then
- assert _buffer.chars[i] == '\n'
+ if i < _buffer_length then
+ assert _buffer[i] == 0xAu8
i += 1
eol = true
else
# if there is something to append
if i > _buffer_pos then
- # Enlarge the string (if needed)
- s.enlarge(s.length + i - _buffer_pos)
-
# Copy from the buffer to the string
var j = _buffer_pos
while j < i do
- s.add(_buffer.chars[j])
+ lb.add(_buffer[j])
j += 1
end
_buffer_pos = i
else
assert end_reached
+ s.append lb.to_s
return
end
if eol then
# so \n is found
+ s.append lb.to_s
return
else
# so \n is not found
- if end_reached then return
+ if end_reached then
+ s.append lb.to_s
+ return
+ end
fill_buffer
end
end
redef fun eof
do
- if _buffer_pos < _buffer.length then return false
+ if _buffer_pos < _buffer_length then return false
if end_reached then return true
fill_buffer
- return _buffer_pos >= _buffer.length and end_reached
+ return _buffer_pos >= _buffer_length and end_reached
end
# The buffer
- private var buffer: nullable FlatBuffer = null
+ private var buffer: NativeString = new NativeString(0)
# The current position in the buffer
- private var buffer_pos: Int = 0
+ private var buffer_pos = 0
+
+ # Length of the current buffer (i.e. nuber of bytes in the buffer)
+ private var buffer_length = 0
+
+ # Capacity of the buffer
+ private var buffer_capacity = 0
# Fill the buffer
protected fun fill_buffer is abstract
- # Is the last fill_buffer reach the end
+ # Has the last fill_buffer reached the end
protected fun end_reached: Bool is abstract
# Allocate a `_buffer` for a given `capacity`.
protected fun prepare_buffer(capacity: Int)
do
- _buffer = new FlatBuffer.with_capacity(capacity)
+ _buffer = new NativeString(capacity)
_buffer_pos = 0 # need to read
+ _buffer_length = 0
+ _buffer_capacity = capacity
end
end
super Writer
private var content = new Array[String]
- redef fun to_s do return content.to_s
+ redef fun to_s do return content.plain_to_s
redef fun is_writable do return not closed
+
+ redef fun write_bytes(b) do
+ content.add(b.to_s)
+ end
+
redef fun write(str)
do
assert not closed
# The string to read from.
var source: String
- # The current position in the string.
+ # The current position in the string (bytewise).
private var cursor: Int = 0
redef fun read_char do
if cursor < source.length then
- var c = source[cursor].ascii
+ # Fix when supporting UTF-8
+ var c = source[cursor]
+ cursor += 1
+ return c
+ else
+ return null
+ end
+ end
+ redef fun read_byte do
+ if cursor < source.length then
+ var c = source.bytes[cursor]
cursor += 1
return c
else
- return -1
+ return null
end
end
source = ""
end
- redef fun read_all do
- var c = cursor
- cursor = source.length
- if c == 0 then return source
- return source.substring_from(c)
+ redef fun read_all_bytes do
+ var nslen = source.length - cursor
+ var nns = new NativeString(nslen)
+ source.copy_to_native(nns, nslen, cursor, 0)
+ return new Bytes(nns, nslen, nslen)
end
- redef fun eof do return cursor >= source.length
+ redef fun eof do return cursor >= source.bytelen
end