# This file is part of NIT ( http://www.nitlanguage.org ). # # This file is free software, which comes along with NIT. This software is # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. You can modify it is you want, provided this header # is kept unaltered, and a notification of the changes is added. # You are allowed to redistribute it and sell it, alone or is a part of # another product. # Input and output streams of characters module stream import error intrude import bytes import codecs in "C" `{ #include #include #include `} # Any kind of error that could be produced by an operation on Streams class IOError super Error end # Any kind of stream to read/write/both to or from a source abstract class Stream # Codec used to transform raw data to text # # Note: defaults to UTF-8 var codec: Codec = utf8_codec is protected writable(set_codec) # Lookahead buffer for codecs # # Since some codecs are multibyte, a lookahead may be required # to store the next bytes and consume them only if a valid character # is read. protected var lookahead: CString is noinit # Capacity of the lookahead protected var lookahead_capacity = 0 # Current occupation of the lookahead protected var lookahead_length = 0 # Buffer for writing data to a stream protected var write_buffer: CString is noinit init do var lcap = codec.max_lookahead lookahead = new CString(lcap) write_buffer = new CString(lcap) lookahead_length = 0 lookahead_capacity = lcap end # Change the codec for this stream. fun codec=(c: Codec) do if c.max_lookahead > lookahead_capacity then var lcap = codec.max_lookahead var lk = new CString(lcap) var llen = lookahead_length if llen > 0 then lookahead.copy_to(lk, llen, 0, 0) end lookahead = lk lookahead_capacity = lcap write_buffer = new CString(lcap) end set_codec(c) end # Error produced by the file stream # # var ifs = new FileReader.open("donotmakethisfile.binx") # ifs.read_all # ifs.close # assert ifs.last_error != null var last_error: nullable IOError = null # close the stream fun close is abstract # Pre-work hook. # # Used to inform `self` that operations will start. # Specific streams can use this to prepare some resources. # # Is automatically invoked at the beginning of `with` structures. # # Do nothing by default. fun start do end # Post-work hook. # # Used to inform `self` that the operations are over. # Specific streams can use this to free some resources. # # Is automatically invoked at the end of `with` structures. # # call `close` by default. fun finish do close end # A `Stream` that can be read from abstract class Reader super Stream # Read a byte directly from the underlying stream, without # considering any eventual buffer protected fun raw_read_byte: Int is abstract # Read at most `max` bytes from the underlying stream into `buf`, # without considering any eventual buffer # # Returns how many bytes were read protected fun raw_read_bytes(buf: CString, max: Int): Int do var rd = 0 for i in [0 .. max[ do var b = raw_read_byte if b < 0 then break buf[i] = b rd += 1 end return rd end # Reads a character. Returns `null` on EOF or timeout # # Returns unicode replacement character '�' if an # invalid byte sequence is read. # # `read_char` may block if: # # * No byte could be read from the current buffer # * An incomplete char is partially read, and more bytes are # required for full decoding. fun read_char: nullable Char do if eof then return null var cod = codec var codet_sz = cod.codet_size var lk = lookahead var llen = lookahead_length if llen < codet_sz then llen += raw_read_bytes(lk.fast_cstring(llen), codet_sz - llen) end if llen < codet_sz then lookahead_length = 0 return 0xFFFD.code_point end var ret = cod.is_valid_char(lk, codet_sz) var max_llen = cod.max_lookahead while ret == 1 and llen < max_llen do var rd = raw_read_bytes(lk.fast_cstring(llen), codet_sz) if rd < codet_sz then llen -= codet_sz if llen > 0 then lookahead.lshift(codet_sz, llen, codet_sz) end lookahead_length = llen.max(0) return 0xFFFD.code_point end llen += codet_sz ret = cod.is_valid_char(lk, llen) end if ret == 0 then var c = cod.decode_char(lk) var clen = c.u8char_len llen -= clen if llen > 0 then lookahead.lshift(clen, llen, clen) end lookahead_length = llen return c end if ret == 2 or ret == 1 then llen -= codet_sz if llen > 0 then lookahead.lshift(codet_sz, llen, codet_sz) end lookahead_length = llen return 0xFFFD.code_point end # Should not happen if the decoder works properly var arr = new Array[Object] arr.push "Decoder error: could not decode nor recover from byte sequence [" for i in [0 .. llen[ do arr.push lk[i] arr.push ", " end arr.push "]" var err = new IOError(arr.plain_to_s) err.cause = last_error last_error = err return 0xFFFD.code_point end # Reads a byte. Returns a negative value on error fun read_byte: Int do var llen = lookahead_length if llen == 0 then return raw_read_byte var lk = lookahead var b = lk[0].to_i if llen == 1 then lookahead_length = 0 else lk.lshift(1, llen - 1, 1) lookahead_length -= 1 end return b end # Reads a String of at most `i` length fun read(i: Int): String do assert i >= 0 var cs = new CString(i) var rd = read_bytes_to_cstring(cs, i) if rd < 0 then return "" return codec.decode_string(cs, rd) end # Reads up to `max` bytes from source fun read_bytes(max: Int): Bytes do assert max >= 0 var cs = new CString(max) var rd = read_bytes_to_cstring(cs, max) return new Bytes(cs, rd, max) end # Reads up to `max` bytes from source and stores them in `bytes` fun read_bytes_to_cstring(bytes: CString, max: Int): Int do var llen = lookahead_length if llen == 0 then return raw_read_bytes(bytes, max) var rd = max.min(llen) var lk = lookahead lk.copy_to(bytes, rd, 0, 0) if rd < llen then lk.lshift(rd, llen - rd, rd) lookahead_length -= rd else lookahead_length = 0 end return rd + raw_read_bytes(bytes.fast_cstring(rd), max - rd) end # Read a string until the end of the line. # # The line terminator '\n' and '\r\n', if any, is removed in each line. # # ~~~ # var txt = "Hello\n\nWorld\n" # var i = new StringReader(txt) # assert i.read_line == "Hello" # assert i.read_line == "" # assert i.read_line == "World" # assert i.eof # ~~~ # # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and # the end or file (EOF) is considered to delimit the end of lines. # CARRIAGE RETURN (`\r`) alone is not used for the end of line. # # ~~~ # var txt2 = "Hello\r\n\n\rWorld" # var i2 = new StringReader(txt2) # assert i2.read_line == "Hello" # assert i2.read_line == "" # assert i2.read_line == "\rWorld" # assert i2.eof # ~~~ # # NOTE: Use `append_line_to` if the line terminator needs to be preserved. fun read_line: String do if last_error != null then return "" if eof then return "" var s = new FlatBuffer append_line_to(s) return s.to_s.chomp end # Read all the lines until the eof. # # The line terminator '\n' and `\r\n` is removed in each line, # # ~~~ # var txt = "Hello\n\nWorld\n" # var i = new StringReader(txt) # assert i.read_lines == ["Hello", "", "World"] # ~~~ # # This method is more efficient that splitting # the result of `read_all`. # # NOTE: SEE `read_line` for details. fun read_lines: Array[String] do var res = new Array[String] while not eof do res.add read_line end return res end # Return an iterator that read each line. # # The line terminator '\n' and `\r\n` is removed in each line, # The line are read with `read_line`. See this method for details. # # ~~~ # var txt = "Hello\n\nWorld\n" # var i = new StringReader(txt) # assert i.each_line.to_a == ["Hello", "", "World"] # ~~~ # # Unlike `read_lines` that read all lines at the call, `each_line` is lazy. # Therefore, the stream should no be closed until the end of the stream. # # ~~~ # i = new StringReader(txt) # var el = i.each_line # # assert el.item == "Hello" # el.next # assert el.item == "" # el.next # # i.close # # assert not el.is_ok # # closed before "world" is read # ~~~ fun each_line: LineIterator do return new LineIterator(self) # Read all the stream until the eof. # # The content of the file is returned as a String. # # ~~~ # var txt = "Hello\n\nWorld\n" # var i = new StringReader(txt) # assert i.read_all == txt # ~~~ fun read_all: String do var s = read_all_bytes var slen = s.length if slen == 0 then return "" return codec.decode_string(s.items, s.length) end # Read all the stream until the eof. # # The content of the file is returned verbatim. fun read_all_bytes: Bytes do if last_error != null then return new Bytes.empty var s = new Bytes.empty var buf = new CString(4096) while not eof do var rd = read_bytes_to_cstring(buf, 4096) s.append_ns(buf, rd) end return s end # Read a string until the end of the line and append it to `s`. # # Unlike `read_line` and other related methods, # the line terminator '\n', if any, is preserved in each line. # Use the method `Text::chomp` to safely remove it. # # ~~~ # var txt = "Hello\n\nWorld\n" # var i = new StringReader(txt) # var b = new FlatBuffer # i.append_line_to(b) # assert b == "Hello\n" # i.append_line_to(b) # assert b == "Hello\n\n" # i.append_line_to(b) # assert b == txt # assert i.eof # ~~~ # # If `\n` is not present at the end of the result, it means that # a non-eol terminated last line was returned. # # ~~~ # var i2 = new StringReader("hello") # assert not i2.eof # var b2 = new FlatBuffer # i2.append_line_to(b2) # assert b2 == "hello" # assert i2.eof # ~~~ # # NOTE: The single character LINE FEED (`\n`) delimits the end of lines. # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized. fun append_line_to(s: Buffer) do if last_error != null then return loop var x = read_char if x == null then if eof then return else s.chars.push(x) if x == '\n' then return end end end # Is there something to read. # This function returns 'false' if there is something to read. fun eof: Bool do if lookahead_length > 0 then return false lookahead_length = raw_read_bytes(lookahead, 1) return lookahead_length <= 0 end # Read the next sequence of non whitespace characters. # # Leading whitespace characters are skipped. # The first whitespace character that follows the result is consumed. # # An empty string is returned if the end of the file or an error is encounter. # # ~~~ # var w = new StringReader(" Hello, \n\t World!") # assert w.read_word == "Hello," # assert w.read_char == '\n' # assert w.read_word == "World!" # assert w.read_word == "" # ~~~ # # `Char::is_whitespace` determines what is a whitespace. fun read_word: String do var buf = new FlatBuffer var c = read_nonwhitespace if c != null then buf.add(c) while not eof do c = read_char if c == null then break if c.is_whitespace then break buf.add(c) end end var res = buf.to_s return res end # Skip whitespace characters (if any) then return the following non-whitespace character. # # Returns the code point of the character. # Returns `null` on end of file or error. # # In fact, this method works like `read_char` except it skips whitespace. # # ~~~ # var w = new StringReader(" \nab\tc") # assert w.read_nonwhitespace == 'a' # assert w.read_nonwhitespace == 'b' # assert w.read_nonwhitespace == 'c' # assert w.read_nonwhitespace == null # ~~~ # # `Char::is_whitespace` determines what is a whitespace. fun read_nonwhitespace: nullable Char do var c: nullable Char = null while not eof do c = read_char if c == null or not c.is_whitespace then break end return c end end # Iterator returned by `Reader::each_line`. # See the aforementioned method for details. class LineIterator super CachedIterator[String] # The original stream var stream: Reader redef fun next_item do if stream.eof then if close_on_finish then stream.close return null end return stream.read_line end # Close the stream when the stream is at the EOF. # # Default is false. var close_on_finish = false is writable redef fun finish do if close_on_finish then stream.close end end # `Reader` capable of declaring if readable without blocking abstract class PollableReader super Reader # Is there something to read? (without blocking) fun poll_in: Bool is abstract end # A `Stream` that can be written to abstract class Writer super Stream # Write bytes from `s` fun write_bytes(s: Bytes) do write_bytes_from_cstring(s.items, s.length) # Write `len` bytes from `ns` fun write_bytes_from_cstring(ns: CString, len: Int) is abstract # Write a string fun write(s: Text) is abstract # Write a single byte fun write_byte(value: Int) is abstract # Write a single char fun write_char(c: Char) do var ln = codec.add_char_to(c, write_buffer) write_bytes_from_cstring(write_buffer, ln) end # Can the stream be used to write fun is_writable: Bool is abstract end # Things that can be efficienlty written to a `Writer` # # The point of this interface is to allow the instance to be efficiently # written into a `Writer`. # # Ready-to-save documents usually provide this interface. interface Writable # Write itself to a `stream` # The specific logic it let to the concrete subclasses fun write_to(stream: Writer) is abstract # Like `write_to` but return a new String (may be quite large). # # This functionality is anecdotal, since the point # of a streamable object is to be efficiently written to a # stream without having to allocate and concatenate strings. fun write_to_string: String do var stream = new StringWriter write_to(stream) return stream.to_s end # Like `write_to` but return a new Bytes (may be quite large) # # This functionality is anecdotal, since the point # of a streamable object is to be efficiently written to a # stream without having to allocate and concatenate buffers. # # Nevertheless, you might need this method if you want to know # the byte size of a writable object. fun write_to_bytes: Bytes do var stream = new BytesWriter write_to(stream) return stream.bytes end end redef class Bytes super Writable redef fun write_to(s) do s.write_bytes(self) redef fun write_to_string do return to_s end redef class Text super Writable redef fun write_to(stream) do stream.write(self) end # A `Stream` that can be written to and read from abstract class Duplex super Reader super Writer end # Write to `bytes` in memory # # ~~~ # var writer = new BytesWriter # # writer.write "Strings " # writer.write_char '&' # writer.write_byte 0x20 # writer.write_bytes "bytes".to_bytes # # assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73" # assert writer.bytes.to_s == "Strings & bytes" # ~~~ # # As with any binary data, UTF-8 code points encoded on two bytes or more # can be constructed byte by byte. # # ~~~ # writer = new BytesWriter # # # Write just the character first half # writer.write_byte 0xC2 # assert writer.to_s == "\\xC2" # assert writer.bytes.to_s == "�" # # # Complete the character # writer.write_byte 0xA2 # assert writer.to_s == "\\xC2\\xA2" # assert writer.bytes.to_s == "¢" # ~~~ class BytesWriter super Writer # Written memory var bytes = new Bytes.empty redef fun to_s do return bytes.chexdigest redef fun write(str) do if closed then return str.append_to_bytes bytes end redef fun write_char(c) do if closed then return bytes.add_char c end redef fun write_byte(value) do if closed then return bytes.add value end redef fun write_bytes_from_cstring(ns, len) do if closed then return bytes.append_ns(ns, len) end # Is the stream closed? protected var closed = false redef fun close do closed = true redef fun is_writable do return not closed end # `Stream` writing to a `String` # # This class has the same behavior as `BytesWriter` # except for `to_s` which decodes `bytes` to a string. # # ~~~ # var writer = new StringWriter # # writer.write "Strings " # writer.write_char '&' # writer.write_byte 0x20 # writer.write_bytes "bytes".to_bytes # # assert writer.to_s == "Strings & bytes" # ~~~ class StringWriter super BytesWriter redef fun to_s do return bytes.to_s end # Read from `bytes` in memory # # ~~~ # var reader = new BytesReader(b"a…b") # assert reader.read_char == 'a' # assert reader.read_byte == 0xE2 # 1st byte of '…' # assert reader.read_byte == 0x80 # 2nd byte of '…' # assert reader.read_char == '�' # Reads the last byte as an invalid char # assert reader.read_all_bytes == b"b" # ~~~ class BytesReader super Reader # Source data to read var bytes: Bytes # The current position in `bytes` private var cursor = 0 redef fun raw_read_byte do if cursor >= bytes.length then return -1 var c = bytes[cursor] cursor += 1 return c.to_i end redef fun close do bytes = new Bytes.empty redef fun read_all_bytes do var res = bytes.slice_from(cursor) cursor = bytes.length return res end redef fun raw_read_bytes(ns, max) do if cursor >= bytes.length then return 0 var copy = max.min(bytes.length - cursor) bytes.items.copy_to(ns, copy, cursor, 0) cursor += copy return copy end redef fun eof do return cursor >= bytes.length end # `Stream` reading from a `String` source # # This class has the same behavior as `BytesReader` # except for its constructor accepting a `String`. # # ~~~ # var reader = new StringReader("a…b") # assert reader.read_char == 'a' # assert reader.read_byte == 0xE2 # 1st byte of '…' # assert reader.read_byte == 0x80 # 2nd byte of '…' # assert reader.read_char == '�' # Reads the last byte as an invalid char # assert reader.read_all == "b" # ~~~ class StringReader super BytesReader autoinit source # Source data to read var source: String init do bytes = source.to_bytes redef fun close do source = "" super end end