core :: Reader :: defaultinit
# A `Stream` that can be read from
abstract class Reader
super Stream
# Read a byte directly from the underlying stream, without
# considering any eventual buffer
protected fun raw_read_byte: Int is abstract
# Read at most `max` bytes from the underlying stream into `buf`,
# without considering any eventual buffer
#
# Returns how many bytes were read
protected fun raw_read_bytes(buf: CString, max: Int): Int do
var rd = 0
for i in [0 .. max[ do
var b = raw_read_byte
if b < 0 then break
buf[i] = b
rd += 1
end
return rd
end
# Reads a character. Returns `null` on EOF or timeout
#
# Returns unicode replacement character '�' if an
# invalid byte sequence is read.
#
# `read_char` may block if:
#
# * No byte could be read from the current buffer
# * An incomplete char is partially read, and more bytes are
# required for full decoding.
fun read_char: nullable Char do
if eof then return null
var cod = codec
var codet_sz = cod.codet_size
var lk = lookahead
var llen = lookahead_length
if llen < codet_sz then
llen += raw_read_bytes(lk.fast_cstring(llen), codet_sz - llen)
end
if llen < codet_sz then
lookahead_length = 0
return 0xFFFD.code_point
end
var ret = cod.is_valid_char(lk, codet_sz)
var max_llen = cod.max_lookahead
while ret == 1 and llen < max_llen do
var rd = raw_read_bytes(lk.fast_cstring(llen), codet_sz)
if rd < codet_sz then
llen -= codet_sz
if llen > 0 then
lookahead.lshift(codet_sz, llen, codet_sz)
end
lookahead_length = llen.max(0)
return 0xFFFD.code_point
end
llen += codet_sz
ret = cod.is_valid_char(lk, llen)
end
if ret == 0 then
var c = cod.decode_char(lk)
var clen = c.u8char_len
llen -= clen
if llen > 0 then
lookahead.lshift(clen, llen, clen)
end
lookahead_length = llen
return c
end
if ret == 2 or ret == 1 then
llen -= codet_sz
if llen > 0 then
lookahead.lshift(codet_sz, llen, codet_sz)
end
lookahead_length = llen
return 0xFFFD.code_point
end
# Should not happen if the decoder works properly
var arr = new Array[Object]
arr.push "Decoder error: could not decode nor recover from byte sequence ["
for i in [0 .. llen[ do
arr.push lk[i]
arr.push ", "
end
arr.push "]"
var err = new IOError(arr.plain_to_s)
err.cause = last_error
last_error = err
return 0xFFFD.code_point
end
# Reads a byte. Returns a negative value on error
fun read_byte: Int do
var llen = lookahead_length
if llen == 0 then return raw_read_byte
var lk = lookahead
var b = lk[0].to_i
if llen == 1 then
lookahead_length = 0
else
lk.lshift(1, llen - 1, 1)
lookahead_length -= 1
end
return b
end
# Reads a String of at most `i` length
fun read(i: Int): String do
assert i >= 0
var cs = new CString(i)
var rd = read_bytes_to_cstring(cs, i)
if rd < 0 then return ""
return codec.decode_string(cs, rd)
end
# Reads up to `max` bytes from source
fun read_bytes(max: Int): Bytes do
assert max >= 0
var cs = new CString(max)
var rd = read_bytes_to_cstring(cs, max)
return new Bytes(cs, rd, max)
end
# Reads up to `max` bytes from source and stores them in `bytes`
fun read_bytes_to_cstring(bytes: CString, max: Int): Int do
var llen = lookahead_length
if llen == 0 then return raw_read_bytes(bytes, max)
var rd = max.min(llen)
var lk = lookahead
lk.copy_to(bytes, rd, 0, 0)
if rd < llen then
lk.lshift(rd, llen - rd, rd)
lookahead_length -= rd
else
lookahead_length = 0
end
return rd + raw_read_bytes(bytes.fast_cstring(rd), max - rd)
end
# Read a string until the end of the line.
#
# The line terminator '\n' and '\r\n', if any, is removed in each line.
#
# ~~~
# var txt = "Hello\n\nWorld\n"
# var i = new StringReader(txt)
# assert i.read_line == "Hello"
# assert i.read_line == ""
# assert i.read_line == "World"
# assert i.eof
# ~~~
#
# Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
# the end or file (EOF) is considered to delimit the end of lines.
# CARRIAGE RETURN (`\r`) alone is not used for the end of line.
#
# ~~~
# var txt2 = "Hello\r\n\n\rWorld"
# var i2 = new StringReader(txt2)
# assert i2.read_line == "Hello"
# assert i2.read_line == ""
# assert i2.read_line == "\rWorld"
# assert i2.eof
# ~~~
#
# NOTE: Use `append_line_to` if the line terminator needs to be preserved.
fun read_line: String
do
if last_error != null then return ""
if eof then return ""
var s = new FlatBuffer
append_line_to(s)
return s.to_s.chomp
end
# Read all the lines until the eof.
#
# The line terminator '\n' and `\r\n` is removed in each line,
#
# ~~~
# var txt = "Hello\n\nWorld\n"
# var i = new StringReader(txt)
# assert i.read_lines == ["Hello", "", "World"]
# ~~~
#
# This method is more efficient that splitting
# the result of `read_all`.
#
# NOTE: SEE `read_line` for details.
fun read_lines: Array[String]
do
var res = new Array[String]
while not eof do
res.add read_line
end
return res
end
# Return an iterator that read each line.
#
# The line terminator '\n' and `\r\n` is removed in each line,
# The line are read with `read_line`. See this method for details.
#
# ~~~
# var txt = "Hello\n\nWorld\n"
# var i = new StringReader(txt)
# assert i.each_line.to_a == ["Hello", "", "World"]
# ~~~
#
# Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
# Therefore, the stream should no be closed until the end of the stream.
#
# ~~~
# i = new StringReader(txt)
# var el = i.each_line
#
# assert el.item == "Hello"
# el.next
# assert el.item == ""
# el.next
#
# i.close
#
# assert not el.is_ok
# # closed before "world" is read
# ~~~
fun each_line: LineIterator do return new LineIterator(self)
# Read all the stream until the eof.
#
# The content of the file is returned as a String.
#
# ~~~
# var txt = "Hello\n\nWorld\n"
# var i = new StringReader(txt)
# assert i.read_all == txt
# ~~~
fun read_all: String do
var s = read_all_bytes
var slen = s.length
if slen == 0 then return ""
return codec.decode_string(s.items, s.length)
end
# Read all the stream until the eof.
#
# The content of the file is returned verbatim.
fun read_all_bytes: Bytes
do
if last_error != null then return new Bytes.empty
var s = new Bytes.empty
var buf = new CString(4096)
while not eof do
var rd = read_bytes_to_cstring(buf, 4096)
s.append_ns(buf, rd)
end
return s
end
# Read a string until the end of the line and append it to `s`.
#
# Unlike `read_line` and other related methods,
# the line terminator '\n', if any, is preserved in each line.
# Use the method `Text::chomp` to safely remove it.
#
# ~~~
# var txt = "Hello\n\nWorld\n"
# var i = new StringReader(txt)
# var b = new FlatBuffer
# i.append_line_to(b)
# assert b == "Hello\n"
# i.append_line_to(b)
# assert b == "Hello\n\n"
# i.append_line_to(b)
# assert b == txt
# assert i.eof
# ~~~
#
# If `\n` is not present at the end of the result, it means that
# a non-eol terminated last line was returned.
#
# ~~~
# var i2 = new StringReader("hello")
# assert not i2.eof
# var b2 = new FlatBuffer
# i2.append_line_to(b2)
# assert b2 == "hello"
# assert i2.eof
# ~~~
#
# NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
# Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
fun append_line_to(s: Buffer)
do
if last_error != null then return
loop
var x = read_char
if x == null then
if eof then return
else
s.chars.push(x)
if x == '\n' then return
end
end
end
# Is there something to read.
# This function returns 'false' if there is something to read.
fun eof: Bool do
if lookahead_length > 0 then return false
lookahead_length = raw_read_bytes(lookahead, 1)
return lookahead_length <= 0
end
# Read the next sequence of non whitespace characters.
#
# Leading whitespace characters are skipped.
# The first whitespace character that follows the result is consumed.
#
# An empty string is returned if the end of the file or an error is encounter.
#
# ~~~
# var w = new StringReader(" Hello, \n\t World!")
# assert w.read_word == "Hello,"
# assert w.read_char == '\n'
# assert w.read_word == "World!"
# assert w.read_word == ""
# ~~~
#
# `Char::is_whitespace` determines what is a whitespace.
fun read_word: String
do
var buf = new FlatBuffer
var c = read_nonwhitespace
if c != null then
buf.add(c)
while not eof do
c = read_char
if c == null then break
if c.is_whitespace then break
buf.add(c)
end
end
var res = buf.to_s
return res
end
# Skip whitespace characters (if any) then return the following non-whitespace character.
#
# Returns the code point of the character.
# Returns `null` on end of file or error.
#
# In fact, this method works like `read_char` except it skips whitespace.
#
# ~~~
# var w = new StringReader(" \nab\tc")
# assert w.read_nonwhitespace == 'a'
# assert w.read_nonwhitespace == 'b'
# assert w.read_nonwhitespace == 'c'
# assert w.read_nonwhitespace == null
# ~~~
#
# `Char::is_whitespace` determines what is a whitespace.
fun read_nonwhitespace: nullable Char
do
var c: nullable Char = null
while not eof do
c = read_char
if c == null or not c.is_whitespace then break
end
return c
end
end
lib/core/stream.nit:108,1--482,3