1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # Input and output streams of characters
14 intrude import text
::ropes
25 # Any kind of error that could be produced by an operation on Streams
30 # Any kind of stream to read/write/both to or from a source
32 # Codec used to transform raw data to text
34 # Note: defaults to UTF-8
35 var codec
: Codec = utf8_codec
is protected writable(set_codec
)
37 # Lookahead buffer for codecs
39 # Since some codecs are multibyte, a lookahead may be required
40 # to store the next bytes and consume them only if a valid character
42 protected var lookahead
: CString is noinit
44 # Capacity of the lookahead
45 protected var lookahead_capacity
= 0
47 # Current occupation of the lookahead
48 protected var lookahead_length
= 0
50 # Buffer for writing data to a stream
51 protected var write_buffer
: CString is noinit
54 var lcap
= codec
.max_lookahead
55 lookahead
= new CString(lcap
)
56 write_buffer
= new CString(lcap
)
58 lookahead_capacity
= lcap
61 # Change the codec for this stream.
62 fun codec
=(c
: Codec) do
63 if c
.max_lookahead
> lookahead_capacity
then
64 var lcap
= codec
.max_lookahead
65 var lk
= new CString(lcap
)
66 var llen
= lookahead_length
68 lookahead
.copy_to
(lk
, llen
, 0, 0)
71 lookahead_capacity
= lcap
72 write_buffer
= new CString(lcap
)
77 # Error produced by the file stream
79 # var ifs = new FileReader.open("donotmakethisfile.binx")
82 # assert ifs.last_error != null
83 var last_error
: nullable IOError = null
90 # Used to inform `self` that operations will start.
91 # Specific streams can use this to prepare some resources.
93 # Is automatically invoked at the beginning of `with` structures.
95 # Do nothing by default.
100 # Used to inform `self` that the operations are over.
101 # Specific streams can use this to free some resources.
103 # Is automatically invoked at the end of `with` structures.
105 # call `close` by default.
109 # A `Stream` that can be read from
110 abstract class Reader
113 # Reads a character. Returns `null` on EOF or timeout
114 fun read_char
: nullable Char is abstract
116 # Reads a byte. Returns `null` on EOF or timeout
117 fun read_byte
: nullable Byte is abstract
119 # Reads a String of at most `i` length
120 fun read
(i
: Int): String do return read_bytes
(i
).to_s
122 # Read at most i bytes
123 fun read_bytes
(i
: Int): Bytes
125 if last_error
!= null then return new Bytes.empty
126 var s
= new CString(i
)
127 var buf
= new Bytes(s
, 0, 0)
128 while i
> 0 and not eof
do
138 # Read a string until the end of the line.
140 # The line terminator '\n' and '\r\n', if any, is removed in each line.
143 # var txt = "Hello\n\nWorld\n"
144 # var i = new StringReader(txt)
145 # assert i.read_line == "Hello"
146 # assert i.read_line == ""
147 # assert i.read_line == "World"
151 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
152 # the end or file (EOF) is considered to delimit the end of lines.
153 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
156 # var txt2 = "Hello\r\n\n\rWorld"
157 # var i2 = new StringReader(txt2)
158 # assert i2.read_line == "Hello"
159 # assert i2.read_line == ""
160 # assert i2.read_line == "\rWorld"
164 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
165 fun read_line
: String
167 if last_error
!= null then return ""
168 if eof
then return ""
169 var s
= new FlatBuffer
174 # Read all the lines until the eof.
176 # The line terminator '\n' and `\r\n` is removed in each line,
179 # var txt = "Hello\n\nWorld\n"
180 # var i = new StringReader(txt)
181 # assert i.read_lines == ["Hello", "", "World"]
184 # This method is more efficient that splitting
185 # the result of `read_all`.
187 # NOTE: SEE `read_line` for details.
188 fun read_lines
: Array[String]
190 var res
= new Array[String]
197 # Return an iterator that read each line.
199 # The line terminator '\n' and `\r\n` is removed in each line,
200 # The line are read with `read_line`. See this method for details.
203 # var txt = "Hello\n\nWorld\n"
204 # var i = new StringReader(txt)
205 # assert i.each_line.to_a == ["Hello", "", "World"]
208 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
209 # Therefore, the stream should no be closed until the end of the stream.
212 # i = new StringReader(txt)
213 # var el = i.each_line
215 # assert el.item == "Hello"
217 # assert el.item == ""
222 # assert not el.is_ok
223 # # closed before "world" is read
225 fun each_line
: LineIterator do return new LineIterator(self)
227 # Read all the stream until the eof.
229 # The content of the file is returned as a String.
232 # var txt = "Hello\n\nWorld\n"
233 # var i = new StringReader(txt)
234 # assert i.read_all == txt
236 fun read_all
: String do
237 var s
= read_all_bytes
239 if slen
== 0 then return ""
242 var str
= s
.items
.clean_utf8
(slen
)
243 slen
= str
.byte_length
247 # The 129 size was decided more or less arbitrarily
248 # It will require some more benchmarking to compute
249 # if this is the best size or not
251 if chunksz
> remsp
then
252 rets
+= new FlatString.with_infos
(sits
, remsp
, pos
)
255 var st
= sits
.find_beginning_of_char_at
(pos
+ chunksz
- 1)
256 var byte_length
= st
- pos
257 rets
+= new FlatString.with_infos
(sits
, byte_length
, pos
)
261 if rets
isa Concat then return rets
.balance
265 # Read all the stream until the eof.
267 # The content of the file is returned verbatim.
268 fun read_all_bytes
: Bytes
270 if last_error
!= null then return new Bytes.empty
271 var s
= new Bytes.empty
274 if c
!= null then s
.add
(c
)
279 # Read a string until the end of the line and append it to `s`.
281 # Unlike `read_line` and other related methods,
282 # the line terminator '\n', if any, is preserved in each line.
283 # Use the method `Text::chomp` to safely remove it.
286 # var txt = "Hello\n\nWorld\n"
287 # var i = new StringReader(txt)
288 # var b = new FlatBuffer
289 # i.append_line_to(b)
290 # assert b == "Hello\n"
291 # i.append_line_to(b)
292 # assert b == "Hello\n\n"
293 # i.append_line_to(b)
298 # If `\n` is not present at the end of the result, it means that
299 # a non-eol terminated last line was returned.
302 # var i2 = new StringReader("hello")
304 # var b2 = new FlatBuffer
305 # i2.append_line_to(b2)
306 # assert b2 == "hello"
310 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
311 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
312 fun append_line_to
(s
: Buffer)
314 if last_error
!= null then return
321 if x
== '\n' then return
326 # Is there something to read.
327 # This function returns 'false' if there is something to read.
328 fun eof
: Bool is abstract
330 # Read the next sequence of non whitespace characters.
332 # Leading whitespace characters are skipped.
333 # The first whitespace character that follows the result is consumed.
335 # An empty string is returned if the end of the file or an error is encounter.
338 # var w = new StringReader(" Hello, \n\t World!")
339 # assert w.read_word == "Hello,"
340 # assert w.read_char == '\n'
341 # assert w.read_word == "World!"
342 # assert w.read_word == ""
345 # `Char::is_whitespace` determines what is a whitespace.
346 fun read_word
: String
348 var buf
= new FlatBuffer
349 var c
= read_nonwhitespace
354 if c
== null then break
355 if c
.is_whitespace
then break
363 # Skip whitespace characters (if any) then return the following non-whitespace character.
365 # Returns the code point of the character.
366 # Returns `null` on end of file or error.
368 # In fact, this method works like `read_char` except it skips whitespace.
371 # var w = new StringReader(" \nab\tc")
372 # assert w.read_nonwhitespace == 'a'
373 # assert w.read_nonwhitespace == 'b'
374 # assert w.read_nonwhitespace == 'c'
375 # assert w.read_nonwhitespace == null
378 # `Char::is_whitespace` determines what is a whitespace.
379 fun read_nonwhitespace
: nullable Char
381 var c
: nullable Char = null
384 if c
== null or not c
.is_whitespace
then break
390 # Iterator returned by `Reader::each_line`.
391 # See the aforementioned method for details.
393 super Iterator[String]
395 # The original stream
400 var res
= not stream
.eof
401 if not res
and close_on_finish
then stream
.close
409 line
= stream
.read_line
415 # The last line read (cache)
416 private var line
: nullable String = null
421 if line
== null then item
426 # Close the stream when the stream is at the EOF.
429 var close_on_finish
= false is writable
433 if close_on_finish
then stream
.close
437 # `Reader` capable of declaring if readable without blocking
438 abstract class PollableReader
441 # Is there something to read? (without blocking)
442 fun poll_in
: Bool is abstract
446 # A `Stream` that can be written to
447 abstract class Writer
450 # Writes bytes from `s`
451 fun write_bytes
(s
: Bytes) is abstract
454 fun write
(s
: Text) is abstract
456 # Write a single byte
457 fun write_byte
(value
: Byte) is abstract
459 # Writes a single char
460 fun write_char
(c
: Char) do write
(c
.to_s
)
462 # Can the stream be used to write
463 fun is_writable
: Bool is abstract
466 # Things that can be efficienlty written to a `Writer`
468 # The point of this interface is to allow the instance to be efficiently
469 # written into a `Writer`.
471 # Ready-to-save documents usually provide this interface.
473 # Write itself to a `stream`
474 # The specific logic it let to the concrete subclasses
475 fun write_to
(stream
: Writer) is abstract
477 # Like `write_to` but return a new String (may be quite large)
479 # This funtionality is anectodical, since the point
480 # of streamable object to to be efficienlty written to a
481 # stream without having to allocate and concatenate strings
482 fun write_to_string
: String
484 var stream
= new StringWriter
492 redef fun write_to
(s
) do s
.write_bytes
(self)
494 redef fun write_to_string
do return to_s
499 redef fun write_to
(stream
) do stream
.write
(self)
502 # Input streams with a buffered input for efficiency purposes
503 abstract class BufferedReader
507 if last_error
!= null then return null
509 last_error
= new IOError("Stream has reached eof")
512 # TODO: Fix when supporting UTF-8
513 var c
= _buffer
[_buffer_pos
].to_i
.code_point
520 if last_error
!= null then return null
522 last_error
= new IOError("Stream has reached eof")
525 var c
= _buffer
[_buffer_pos
]
530 # Resets the internal buffer
536 # Peeks up to `n` bytes in the buffer
538 # The operation does not consume the buffer
541 # var x = new FileReader.open("File.txt")
542 # assert x.peek(5) == x.read(5)
544 fun peek
(i
: Int): Bytes do
545 if eof
then return new Bytes.empty
546 var remsp
= _buffer_length
- _buffer_pos
548 var bf
= new Bytes.with_capacity
(i
)
549 bf
.append_ns_from
(_buffer
, i
, _buffer_pos
)
552 var bf
= new Bytes.with_capacity
(i
)
553 bf
.append_ns_from
(_buffer
, remsp
, _buffer_pos
)
554 _buffer_pos
= _buffer_length
555 read_intern
(i
- bf
.length
, bf
)
556 remsp
= _buffer_length
- _buffer_pos
557 var full_len
= bf
.length
+ remsp
558 if full_len
> _buffer_capacity
then
559 var c
= _buffer_capacity
560 while c
< full_len
do c
= c
* 2 + 2
563 var nns
= new CString(_buffer_capacity
)
564 bf
.items
.copy_to
(nns
, bf
.length
, 0, 0)
565 _buffer
.copy_to
(nns
, remsp
, _buffer_pos
, bf
.length
)
568 _buffer_length
= full_len
572 redef fun read_bytes
(i
)
574 if last_error
!= null then return new Bytes.empty
575 var buf
= new Bytes.with_capacity
(i
)
580 # Fills `buf` with at most `i` bytes read from `self`
581 private fun read_intern
(i
: Int, buf
: Bytes): Int do
584 var bufsp
= _buffer_length
- p
587 buf
.append_ns_from
(_buffer
, i
, p
)
590 _buffer_pos
= _buffer_length
591 var readln
= _buffer_length
- p
592 buf
.append_ns_from
(_buffer
, readln
, p
)
593 var rd
= read_intern
(i
- readln
, buf
)
597 redef fun read_all_bytes
599 if last_error
!= null then return new Bytes.empty
600 var s
= new Bytes.with_capacity
(10)
604 var k
= _buffer_length
606 s
.append_ns_from
(b
, rd_sz
, j
)
613 redef fun append_line_to
(s
)
615 var lb
= new Bytes.with_capacity
(10)
617 # First phase: look for a '\n'
619 while i
< _buffer_length
and _buffer
[i
] != 0xAu
8 do
624 if i
< _buffer_length
then
625 assert _buffer
[i
] == 0xAu
8
632 # if there is something to append
633 if i
> _buffer_pos
then
634 # Copy from the buffer to the string
664 if _buffer_pos
< _buffer_length
then return false
665 if end_reached
then return true
667 return _buffer_pos
>= _buffer_length
and end_reached
671 private var buffer
: CString = new CString(0)
673 # The current position in the buffer
674 private var buffer_pos
= 0
676 # Length of the current buffer (i.e. nuber of bytes in the buffer)
677 private var buffer_length
= 0
679 # Capacity of the buffer
680 private var buffer_capacity
= 0
683 protected fun fill_buffer
is abstract
685 # Has the last fill_buffer reached the end
686 protected fun end_reached
: Bool is abstract
688 # Allocate a `_buffer` for a given `capacity`.
689 protected fun prepare_buffer
(capacity
: Int)
691 _buffer
= new CString(capacity
)
692 _buffer_pos
= 0 # need to read
694 _buffer_capacity
= capacity
698 # A `Stream` that can be written to and read from
699 abstract class Duplex
704 # Write to `bytes` in memory
707 # var writer = new BytesWriter
709 # writer.write "Strings "
710 # writer.write_char '&'
711 # writer.write_byte 0x20u8
712 # writer.write_bytes "bytes".to_bytes
714 # assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73"
715 # assert writer.bytes.to_s == "Strings & bytes"
718 # As with any binary data, UTF-8 code points encoded on two bytes or more
719 # can be constructed byte by byte.
722 # writer = new BytesWriter
724 # # Write just the character first half
725 # writer.write_byte 0xC2u8
726 # assert writer.to_s == "\\xC2"
727 # assert writer.bytes.to_s == "�"
729 # # Complete the character
730 # writer.write_byte 0xA2u8
731 # assert writer.to_s == "\\xC2\\xA2"
732 # assert writer.bytes.to_s == "¢"
738 var bytes
= new Bytes.empty
740 redef fun to_s
do return bytes
.chexdigest
744 if closed
then return
745 str
.append_to_bytes bytes
748 redef fun write_char
(c
)
750 if closed
then return
754 redef fun write_byte
(value
)
756 if closed
then return
760 redef fun write_bytes
(b
)
762 if closed
then return
766 # Is the stream closed?
767 protected var closed
= false
769 redef fun close
do closed
= true
770 redef fun is_writable
do return not closed
773 # `Stream` writing to a `String`
775 # This class has the same behavior as `BytesWriter`
776 # except for `to_s` which decodes `bytes` to a string.
779 # var writer = new StringWriter
781 # writer.write "Strings "
782 # writer.write_char '&'
783 # writer.write_byte 0x20u8
784 # writer.write_bytes "bytes".to_bytes
786 # assert writer.to_s == "Strings & bytes"
791 redef fun to_s
do return bytes
.to_s
794 # Read from `bytes` in memory
797 # var reader = new BytesReader(b"a…b")
798 # assert reader.read_char == 'a'
799 # assert reader.read_byte == 0xE2u8 # 1st byte of '…'
800 # assert reader.read_byte == 0x80u8 # 2nd byte of '…'
801 # assert reader.read_char == '�' # Reads the last byte as an invalid char
802 # assert reader.read_all_bytes == b"b"
807 # Source data to read
810 # The current position in `bytes`
811 private var cursor
= 0
815 if cursor
>= bytes
.length
then return null
817 var len
= bytes
.items
.length_of_char_at
(cursor
)
818 var char
= bytes
.items
.char_at
(cursor
)
825 if cursor
>= bytes
.length
then return null
827 var c
= bytes
[cursor
]
832 redef fun close
do bytes
= new Bytes.empty
834 redef fun read_all_bytes
836 var res
= bytes
.slice_from
(cursor
)
837 cursor
= bytes
.length
841 redef fun eof
do return cursor
>= bytes
.length
844 # `Stream` reading from a `String` source
846 # This class has the same behavior as `BytesReader`
847 # except for its constructor accepting a `String`.
850 # var reader = new StringReader("a…b")
851 # assert reader.read_char == 'a'
852 # assert reader.read_byte == 0xE2u8 # 1st byte of '…'
853 # assert reader.read_byte == 0x80u8 # 2nd byte of '…'
854 # assert reader.read_char == '�' # Reads the last byte as an invalid char
855 # assert reader.read_all == "b"
862 # Source data to read
865 init do bytes
= source
.to_bytes