# Input and output streams of characters
module stream
-intrude import text::ropes
import error
intrude import bytes
import codecs
# Any kind of stream to read/write/both to or from a source
abstract class Stream
+ # Codec used to transform raw data to text
+ #
+ # Note: defaults to UTF-8
+ var codec: Codec = utf8_codec is protected writable(set_codec)
+
+ # Lookahead buffer for codecs
+ #
+ # Since some codecs are multibyte, a lookahead may be required
+ # to store the next bytes and consume them only if a valid character
+ # is read.
+ protected var lookahead: CString is noinit
+
+ # Capacity of the lookahead
+ protected var lookahead_capacity = 0
+
+ # Current occupation of the lookahead
+ protected var lookahead_length = 0
+
+ # Buffer for writing data to a stream
+ protected var write_buffer: CString is noinit
+
+ init do
+ var lcap = codec.max_lookahead
+ lookahead = new CString(lcap)
+ write_buffer = new CString(lcap)
+ lookahead_length = 0
+ lookahead_capacity = lcap
+ end
+
+ # Change the codec for this stream.
+ fun codec=(c: Codec) do
+ if c.max_lookahead > lookahead_capacity then
+ var lcap = codec.max_lookahead
+ var lk = new CString(lcap)
+ var llen = lookahead_length
+ if llen > 0 then
+ lookahead.copy_to(lk, llen, 0, 0)
+ end
+ lookahead = lk
+ lookahead_capacity = lcap
+ write_buffer = new CString(lcap)
+ end
+ set_codec(c)
+ end
+
# Error produced by the file stream
#
# var ifs = new FileReader.open("donotmakethisfile.binx")
# close the stream
fun close is abstract
+
+ # Pre-work hook.
+ #
+ # Used to inform `self` that operations will start.
+ # Specific streams can use this to prepare some resources.
+ #
+ # Is automatically invoked at the beginning of `with` structures.
+ #
+ # Do nothing by default.
+ fun start do end
+
+ # Post-work hook.
+ #
+ # Used to inform `self` that the operations are over.
+ # Specific streams can use this to free some resources.
+ #
+ # Is automatically invoked at the end of `with` structures.
+ #
+ # call `close` by default.
+ fun finish do close
end
# A `Stream` that can be read from
abstract class Reader
super Stream
- # Decoder used to transform input bytes to UTF-8
- var decoder: Decoder = utf8_decoder is writable
+ # Read a byte directly from the underlying stream, without
+ # considering any eventual buffer
+ protected fun raw_read_byte: Int is abstract
+
+ # Read at most `max` bytes from the underlying stream into `buf`,
+ # without considering any eventual buffer
+ #
+ # Returns how many bytes were read
+ protected fun raw_read_bytes(buf: CString, max: Int): Int do
+ var rd = 0
+ for i in [0 .. max[ do
+ var b = raw_read_byte
+ if b < 0 then break
+ buf[i] = b
+ rd += 1
+ end
+ return rd
+ end
# Reads a character. Returns `null` on EOF or timeout
- fun read_char: nullable Char is abstract
+ #
+ # Returns unicode replacement character '�' if an
+ # invalid byte sequence is read.
+ #
+ # `read_char` may block if:
+ #
+ # * No byte could be read from the current buffer
+ # * An incomplete char is partially read, and more bytes are
+ # required for full decoding.
+ fun read_char: nullable Char do
+ if eof then return null
+ var cod = codec
+ var codet_sz = cod.codet_size
+ var lk = lookahead
+ var llen = lookahead_length
+ if llen < codet_sz then
+ llen += raw_read_bytes(lk.fast_cstring(llen), codet_sz - llen)
+ end
+ if llen < codet_sz then
+ lookahead_length = 0
+ return 0xFFFD.code_point
+ end
+ var ret = cod.is_valid_char(lk, codet_sz)
+ var max_llen = cod.max_lookahead
+ while ret == 1 and llen < max_llen do
+ var rd = raw_read_bytes(lk.fast_cstring(llen), codet_sz)
+ if rd < codet_sz then
+ llen -= codet_sz
+ if llen > 0 then
+ lookahead.lshift(codet_sz, llen, codet_sz)
+ end
+ lookahead_length = llen.max(0)
+ return 0xFFFD.code_point
+ end
+ llen += codet_sz
+ ret = cod.is_valid_char(lk, llen)
+ end
+ if ret == 0 then
+ var c = cod.decode_char(lk)
+ var clen = c.u8char_len
+ llen -= clen
+ if llen > 0 then
+ lookahead.lshift(clen, llen, clen)
+ end
+ lookahead_length = llen
+ return c
+ end
+ if ret == 2 or ret == 1 then
+ llen -= codet_sz
+ if llen > 0 then
+ lookahead.lshift(codet_sz, llen, codet_sz)
+ end
+ lookahead_length = llen
+ return 0xFFFD.code_point
+ end
+ # Should not happen if the decoder works properly
+ var arr = new Array[Object]
+ arr.push "Decoder error: could not decode nor recover from byte sequence ["
+ for i in [0 .. llen[ do
+ arr.push lk[i]
+ arr.push ", "
+ end
+ arr.push "]"
+ var err = new IOError(arr.plain_to_s)
+ err.cause = last_error
+ last_error = err
+ return 0xFFFD.code_point
+ end
- # Reads a byte. Returns `null` on EOF or timeout
- fun read_byte: nullable Byte is abstract
+ # Reads a byte. Returns a negative value on error
+ fun read_byte: Int do
+ var llen = lookahead_length
+ if llen == 0 then return raw_read_byte
+ var lk = lookahead
+ var b = lk[0].to_i
+ if llen == 1 then
+ lookahead_length = 0
+ else
+ lk.lshift(1, llen - 1, 1)
+ lookahead_length -= 1
+ end
+ return b
+ end
# Reads a String of at most `i` length
- fun read(i: Int): String do return read_bytes(i).to_s
+ fun read(i: Int): String do
+ assert i >= 0
+ var cs = new CString(i)
+ var rd = read_bytes_to_cstring(cs, i)
+ if rd < 0 then return ""
+ return codec.decode_string(cs, rd)
+ end
- # Read at most i bytes
- fun read_bytes(i: Int): Bytes
- do
- if last_error != null then return new Bytes.empty
- var s = new NativeString(i)
- var buf = new Bytes(s, 0, 0)
- while i > 0 and not eof do
- var c = read_byte
- if c != null then
- buf.add c
- i -= 1
- end
+ # Reads up to `max` bytes from source
+ fun read_bytes(max: Int): Bytes do
+ assert max >= 0
+ var cs = new CString(max)
+ var rd = read_bytes_to_cstring(cs, max)
+ return new Bytes(cs, rd, max)
+ end
+
+ # Reads up to `max` bytes from source and stores them in `bytes`
+ fun read_bytes_to_cstring(bytes: CString, max: Int): Int do
+ var llen = lookahead_length
+ if llen == 0 then return raw_read_bytes(bytes, max)
+ var rd = max.min(llen)
+ var lk = lookahead
+ lk.copy_to(bytes, rd, 0, 0)
+ if rd < llen then
+ lk.lshift(rd, llen - rd, rd)
+ lookahead_length -= rd
+ else
+ lookahead_length = 0
end
- return buf
+ return rd + raw_read_bytes(bytes.fast_cstring(rd), max - rd)
end
# Read a string until the end of the line.
var s = read_all_bytes
var slen = s.length
if slen == 0 then return ""
- var rets = ""
- var pos = 0
- var str = s.items.clean_utf8(slen)
- slen = str.bytelen
- var sits = str.items
- var remsp = slen
- while pos < slen do
- # The 129 size was decided more or less arbitrarily
- # It will require some more benchmarking to compute
- # if this is the best size or not
- var chunksz = 129
- if chunksz > remsp then
- rets += new FlatString.with_infos(sits, remsp, pos, pos + remsp - 1)
- break
- end
- var st = sits.find_beginning_of_char_at(pos + chunksz - 1)
- var bytelen = st - pos
- rets += new FlatString.with_infos(sits, bytelen, pos, st - 1)
- pos = st
- remsp -= bytelen
- end
- if rets isa Concat then return rets.balance
- return rets
+ return codec.decode_string(s.items, s.length)
end
# Read all the stream until the eof.
do
if last_error != null then return new Bytes.empty
var s = new Bytes.empty
+ var buf = new CString(4096)
while not eof do
- var c = read_byte
- if c != null then s.add(c)
+ var rd = read_bytes_to_cstring(buf, 4096)
+ s.append_ns(buf, rd)
end
return s
end
# Is there something to read.
# This function returns 'false' if there is something to read.
- fun eof: Bool is abstract
+ fun eof: Bool do
+ if lookahead_length > 0 then return false
+ lookahead_length = raw_read_bytes(lookahead, 1)
+ return lookahead_length <= 0
+ end
# Read the next sequence of non whitespace characters.
#
# Iterator returned by `Reader::each_line`.
# See the aforementioned method for details.
class LineIterator
- super Iterator[String]
+ super CachedIterator[String]
# The original stream
var stream: Reader
- redef fun is_ok
+ redef fun next_item
do
- var res = not stream.eof
- if not res and close_on_finish then stream.close
- return res
- end
-
- redef fun item
- do
- var line = self.line
- if line == null then
- line = stream.read_line
+ if stream.eof then
+ if close_on_finish then stream.close
+ return null
end
- self.line = line
- return line
- end
-
- # The last line read (cache)
- private var line: nullable String = null
-
- redef fun next
- do
- # force the read
- if line == null then item
- # drop the line
- line = null
+ return stream.read_line
end
# Close the stream when the stream is at the EOF.
abstract class Writer
super Stream
- # The coder from a nit UTF-8 String to the output file
- var coder: Coder = utf8_coder is writable
+ # Write bytes from `s`
+ fun write_bytes(s: Bytes) do write_bytes_from_cstring(s.items, s.length)
- # Writes bytes from `s`
- fun write_bytes(s: Bytes) is abstract
+ # Write `len` bytes from `ns`
+ fun write_bytes_from_cstring(ns: CString, len: Int) is abstract
- # write a string
+ # Write a string
fun write(s: Text) is abstract
# Write a single byte
- fun write_byte(value: Byte) is abstract
+ fun write_byte(value: Int) is abstract
+
+ # Write a single char
+ fun write_char(c: Char) do
+ var ln = codec.add_char_to(c, write_buffer)
+ write_bytes_from_cstring(write_buffer, ln)
+ end
# Can the stream be used to write
fun is_writable: Bool is abstract
# The specific logic it let to the concrete subclasses
fun write_to(stream: Writer) is abstract
- # Like `write_to` but return a new String (may be quite large)
+ # Like `write_to` but return a new String (may be quite large).
#
- # This funtionality is anectodical, since the point
- # of streamable object to to be efficienlty written to a
- # stream without having to allocate and concatenate strings
+ # This functionality is anecdotal, since the point
+ # of a streamable object is to be efficiently written to a
+ # stream without having to allocate and concatenate strings.
fun write_to_string: String
do
var stream = new StringWriter
write_to(stream)
return stream.to_s
end
+
+ # Like `write_to` but return a new Bytes (may be quite large)
+ #
+ # This functionality is anecdotal, since the point
+ # of a streamable object is to be efficiently written to a
+ # stream without having to allocate and concatenate buffers.
+ #
+ # Nevertheless, you might need this method if you want to know
+ # the byte size of a writable object.
+ fun write_to_bytes: Bytes
+ do
+ var stream = new BytesWriter
+ write_to(stream)
+ return stream.bytes
+ end
+end
+
+redef class Bytes
+ super Writable
+ redef fun write_to(s) do s.write_bytes(self)
+
+ redef fun write_to_string do return to_s
end
redef class Text
redef fun write_to(stream) do stream.write(self)
end
-# Input streams with a buffered input for efficiency purposes
-abstract class BufferedReader
+# A `Stream` that can be written to and read from
+abstract class Duplex
super Reader
- redef fun read_char
- do
- if last_error != null then return null
- if eof then
- last_error = new IOError("Stream has reached eof")
- return null
- end
- # TODO: Fix when supporting UTF-8
- var c = _buffer[_buffer_pos].to_i.ascii
- _buffer_pos += 1
- return c
- end
+ super Writer
+end
- redef fun read_byte
- do
- if last_error != null then return null
- if eof then
- last_error = new IOError("Stream has reached eof")
- return null
- end
- var c = _buffer[_buffer_pos]
- _buffer_pos += 1
- return c
- end
+# Write to `bytes` in memory
+#
+# ~~~
+# var writer = new BytesWriter
+#
+# writer.write "Strings "
+# writer.write_char '&'
+# writer.write_byte 0x20
+# writer.write_bytes "bytes".to_bytes
+#
+# assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73"
+# assert writer.bytes.to_s == "Strings & bytes"
+# ~~~
+#
+# As with any binary data, UTF-8 code points encoded on two bytes or more
+# can be constructed byte by byte.
+#
+# ~~~
+# writer = new BytesWriter
+#
+# # Write just the character first half
+# writer.write_byte 0xC2
+# assert writer.to_s == "\\xC2"
+# assert writer.bytes.to_s == "�"
+#
+# # Complete the character
+# writer.write_byte 0xA2
+# assert writer.to_s == "\\xC2\\xA2"
+# assert writer.bytes.to_s == "¢"
+# ~~~
+class BytesWriter
+ super Writer
- # Resets the internal buffer
- fun buffer_reset do
- _buffer_length = 0
- _buffer_pos = 0
- end
+ # Written memory
+ var bytes = new Bytes.empty
- # Peeks up to `n` bytes in the buffer
- #
- # The operation does not consume the buffer
- #
- # ~~~nitish
- # var x = new FileReader.open("File.txt")
- # assert x.peek(5) == x.read(5)
- # ~~~
- fun peek(i: Int): Bytes do
- if eof then return new Bytes.empty
- var remsp = _buffer_length - _buffer_pos
- if i <= remsp then
- var bf = new Bytes.with_capacity(i)
- bf.append_ns_from(_buffer, i, _buffer_pos)
- return bf
- end
- var bf = new Bytes.with_capacity(i)
- bf.append_ns_from(_buffer, remsp, _buffer_pos)
- _buffer_pos = _buffer_length
- read_intern(i - bf.length, bf)
- remsp = _buffer_length - _buffer_pos
- var full_len = bf.length + remsp
- if full_len > _buffer_capacity then
- var c = _buffer_capacity
- while c < full_len do c = c * 2 + 2
- _buffer_capacity = c
- end
- var nns = new NativeString(_buffer_capacity)
- bf.items.copy_to(nns, bf.length, 0, 0)
- _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
- _buffer = nns
- _buffer_pos = 0
- _buffer_length = full_len
- return bf
- end
+ redef fun to_s do return bytes.chexdigest
- redef fun read_bytes(i)
+ redef fun write(str)
do
- if last_error != null then return new Bytes.empty
- var buf = new Bytes.with_capacity(i)
- read_intern(i, buf)
- return buf
+ if closed then return
+ str.append_to_bytes bytes
end
- # Fills `buf` with at most `i` bytes read from `self`
- private fun read_intern(i: Int, buf: Bytes): Int do
- if eof then return 0
- var p = _buffer_pos
- var bufsp = _buffer_length - p
- if bufsp >= i then
- _buffer_pos += i
- buf.append_ns_from(_buffer, i, p)
- return i
- end
- _buffer_pos = _buffer_length
- var readln = _buffer_length - p
- buf.append_ns_from(_buffer, readln, p)
- var rd = read_intern(i - readln, buf)
- return rd + readln
- end
-
- redef fun read_all_bytes
+ redef fun write_char(c)
do
- if last_error != null then return new Bytes.empty
- var s = new Bytes.with_capacity(10)
- var b = _buffer
- while not eof do
- var j = _buffer_pos
- var k = _buffer_length
- var rd_sz = k - j
- s.append_ns_from(b, rd_sz, j)
- _buffer_pos = k
- fill_buffer
- end
- return s
+ if closed then return
+ bytes.add_char c
end
- redef fun append_line_to(s)
+ redef fun write_byte(value)
do
- var lb = new Bytes.with_capacity(10)
- loop
- # First phase: look for a '\n'
- var i = _buffer_pos
- while i < _buffer_length and _buffer[i] != 0xAu8 do
- i += 1
- end
-
- var eol
- if i < _buffer_length then
- assert _buffer[i] == 0xAu8
- i += 1
- eol = true
- else
- eol = false
- end
-
- # if there is something to append
- if i > _buffer_pos then
- # Copy from the buffer to the string
- var j = _buffer_pos
- while j < i do
- lb.add(_buffer[j])
- j += 1
- end
- _buffer_pos = i
- else
- assert end_reached
- s.append lb.to_s
- return
- end
-
- if eol then
- # so \n is found
- s.append lb.to_s
- return
- else
- # so \n is not found
- if end_reached then
- s.append lb.to_s
- return
- end
- fill_buffer
- end
- end
+ if closed then return
+ bytes.add value
end
- redef fun eof
- do
- if _buffer_pos < _buffer_length then return false
- if end_reached then return true
- fill_buffer
- return _buffer_pos >= _buffer_length and end_reached
+ redef fun write_bytes_from_cstring(ns, len) do
+ if closed then return
+ bytes.append_ns(ns, len)
end
- # The buffer
- private var buffer: NativeString = new NativeString(0)
-
- # The current position in the buffer
- private var buffer_pos = 0
-
- # Length of the current buffer (i.e. nuber of bytes in the buffer)
- private var buffer_length = 0
-
- # Capacity of the buffer
- private var buffer_capacity = 0
+ # Is the stream closed?
+ protected var closed = false
- # Fill the buffer
- protected fun fill_buffer is abstract
+ redef fun close do closed = true
+ redef fun is_writable do return not closed
+end
- # Has the last fill_buffer reached the end
- protected fun end_reached: Bool is abstract
+# `Stream` writing to a `String`
+#
+# This class has the same behavior as `BytesWriter`
+# except for `to_s` which decodes `bytes` to a string.
+#
+# ~~~
+# var writer = new StringWriter
+#
+# writer.write "Strings "
+# writer.write_char '&'
+# writer.write_byte 0x20
+# writer.write_bytes "bytes".to_bytes
+#
+# assert writer.to_s == "Strings & bytes"
+# ~~~
+class StringWriter
+ super BytesWriter
- # Allocate a `_buffer` for a given `capacity`.
- protected fun prepare_buffer(capacity: Int)
- do
- _buffer = new NativeString(capacity)
- _buffer_pos = 0 # need to read
- _buffer_length = 0
- _buffer_capacity = capacity
- end
+ redef fun to_s do return bytes.to_s
end
-# A `Stream` that can be written to and read from
-abstract class Duplex
+# Read from `bytes` in memory
+#
+# ~~~
+# var reader = new BytesReader(b"a…b")
+# assert reader.read_char == 'a'
+# assert reader.read_byte == 0xE2 # 1st byte of '…'
+# assert reader.read_byte == 0x80 # 2nd byte of '…'
+# assert reader.read_char == '�' # Reads the last byte as an invalid char
+# assert reader.read_all_bytes == b"b"
+# ~~~
+class BytesReader
super Reader
- super Writer
-end
-# `Stream` that can be used to write to a `String`
-#
-# Mainly used for compatibility with Writer type and tests.
-class StringWriter
- super Writer
+ # Source data to read
+ var bytes: Bytes
- private var content = new Array[String]
- redef fun to_s do return content.plain_to_s
- redef fun is_writable do return not closed
+ # The current position in `bytes`
+ private var cursor = 0
- redef fun write_bytes(b) do
- content.add(b.to_s)
+ redef fun raw_read_byte
+ do
+ if cursor >= bytes.length then return -1
+
+ var c = bytes[cursor]
+ cursor += 1
+ return c.to_i
end
- redef fun write(str)
+ redef fun close do bytes = new Bytes.empty
+
+ redef fun read_all_bytes
do
- assert not closed
- content.add(str.to_s)
+ var res = bytes.slice_from(cursor)
+ cursor = bytes.length
+ return res
end
- # Is the stream closed?
- protected var closed = false
+ redef fun raw_read_bytes(ns, max) do
+ if cursor >= bytes.length then return 0
- redef fun close do closed = true
+ var copy = max.min(bytes.length - cursor)
+ bytes.items.copy_to(ns, copy, cursor, 0)
+ cursor += copy
+ return copy
+ end
+
+ redef fun eof do return cursor >= bytes.length
end
-# `Stream` used to read from a `String`
+# `Stream` reading from a `String` source
#
-# Mainly used for compatibility with Reader type and tests.
+# This class has the same behavior as `BytesReader`
+# except for its constructor accepting a `String`.
+#
+# ~~~
+# var reader = new StringReader("a…b")
+# assert reader.read_char == 'a'
+# assert reader.read_byte == 0xE2 # 1st byte of '…'
+# assert reader.read_byte == 0x80 # 2nd byte of '…'
+# assert reader.read_char == '�' # Reads the last byte as an invalid char
+# assert reader.read_all == "b"
+# ~~~
class StringReader
- super Reader
+ super BytesReader
- # The string to read from.
- var source: String
+ autoinit source
- # The current position in the string (bytewise).
- private var cursor: Int = 0
-
- redef fun read_char do
- if cursor < source.length then
- # Fix when supporting UTF-8
- var c = source[cursor]
- cursor += 1
- return c
- else
- return null
- end
- end
+ # Source data to read
+ var source: String
- redef fun read_byte do
- if cursor < source.length then
- var c = source.bytes[cursor]
- cursor += 1
- return c
- else
- return null
- end
- end
+ init do bytes = source.to_bytes
- redef fun close do
+ redef fun close
+ do
source = ""
+ super
end
-
- redef fun read_all_bytes do
- var nslen = source.length - cursor
- var nns = new NativeString(nslen)
- source.copy_to_native(nns, nslen, cursor, 0)
- return new Bytes(nns, nslen, nslen)
- end
-
- redef fun eof do return cursor >= source.bytelen
end