1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # Input and output streams of characters
14 intrude import text
::ropes
25 # Any kind of error that could be produced by an operation on Streams
30 # Any kind of stream to read/write/both to or from a source
32 # Codec used to transform raw data to text
34 # Note: defaults to UTF-8
35 var codec
: Codec = utf8_codec
is protected writable(set_codec
)
37 # Lookahead buffer for codecs
39 # Since some codecs are multibyte, a lookahead may be required
40 # to store the next bytes and consume them only if a valid character
42 protected var lookahead
: CString is noinit
44 # Capacity of the lookahead
45 protected var lookahead_capacity
= 0
47 # Current occupation of the lookahead
48 protected var lookahead_length
= 0
50 # Buffer for writing data to a stream
51 protected var write_buffer
: CString is noinit
54 var lcap
= codec
.max_lookahead
55 lookahead
= new CString(lcap
)
56 write_buffer
= new CString(lcap
)
58 lookahead_capacity
= lcap
61 # Change the codec for this stream.
62 fun codec
=(c
: Codec) do
63 if c
.max_lookahead
> lookahead_capacity
then
64 var lcap
= codec
.max_lookahead
65 var lk
= new CString(lcap
)
66 var llen
= lookahead_length
68 lookahead
.copy_to
(lk
, llen
, 0, 0)
71 lookahead_capacity
= lcap
72 write_buffer
= new CString(lcap
)
77 # Error produced by the file stream
79 # var ifs = new FileReader.open("donotmakethisfile.binx")
82 # assert ifs.last_error != null
83 var last_error
: nullable IOError = null
90 # Used to inform `self` that operations will start.
91 # Specific streams can use this to prepare some resources.
93 # Is automatically invoked at the beginning of `with` structures.
95 # Do nothing by default.
100 # Used to inform `self` that the operations are over.
101 # Specific streams can use this to free some resources.
103 # Is automatically invoked at the end of `with` structures.
105 # call `close` by default.
109 # A `Stream` that can be read from
110 abstract class Reader
113 # Reads a character. Returns `null` on EOF or timeout
114 fun read_char
: nullable Char is abstract
116 # Reads a byte. Returns a negative value on error
117 fun read_byte
: Int is abstract
119 # Reads a String of at most `i` length
120 fun read
(i
: Int): String do return read_bytes
(i
).to_s
122 # Read at most i bytes
124 # If i <= 0, an empty buffer will be returned
125 fun read_bytes
(i
: Int): Bytes
127 if last_error
!= null or i
<= 0 then return new Bytes.empty
128 var s
= new CString(i
)
129 var buf
= new Bytes(s
, 0, i
)
130 while i
> 0 and not eof
do
141 # Read a string until the end of the line.
143 # The line terminator '\n' and '\r\n', if any, is removed in each line.
146 # var txt = "Hello\n\nWorld\n"
147 # var i = new StringReader(txt)
148 # assert i.read_line == "Hello"
149 # assert i.read_line == ""
150 # assert i.read_line == "World"
154 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
155 # the end or file (EOF) is considered to delimit the end of lines.
156 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
159 # var txt2 = "Hello\r\n\n\rWorld"
160 # var i2 = new StringReader(txt2)
161 # assert i2.read_line == "Hello"
162 # assert i2.read_line == ""
163 # assert i2.read_line == "\rWorld"
167 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
168 fun read_line
: String
170 if last_error
!= null then return ""
171 if eof
then return ""
172 var s
= new FlatBuffer
177 # Read all the lines until the eof.
179 # The line terminator '\n' and `\r\n` is removed in each line,
182 # var txt = "Hello\n\nWorld\n"
183 # var i = new StringReader(txt)
184 # assert i.read_lines == ["Hello", "", "World"]
187 # This method is more efficient that splitting
188 # the result of `read_all`.
190 # NOTE: SEE `read_line` for details.
191 fun read_lines
: Array[String]
193 var res
= new Array[String]
200 # Return an iterator that read each line.
202 # The line terminator '\n' and `\r\n` is removed in each line,
203 # The line are read with `read_line`. See this method for details.
206 # var txt = "Hello\n\nWorld\n"
207 # var i = new StringReader(txt)
208 # assert i.each_line.to_a == ["Hello", "", "World"]
211 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
212 # Therefore, the stream should no be closed until the end of the stream.
215 # i = new StringReader(txt)
216 # var el = i.each_line
218 # assert el.item == "Hello"
220 # assert el.item == ""
225 # assert not el.is_ok
226 # # closed before "world" is read
228 fun each_line
: LineIterator do return new LineIterator(self)
230 # Read all the stream until the eof.
232 # The content of the file is returned as a String.
235 # var txt = "Hello\n\nWorld\n"
236 # var i = new StringReader(txt)
237 # assert i.read_all == txt
239 fun read_all
: String do
240 var s
= read_all_bytes
242 if slen
== 0 then return ""
245 var str
= s
.items
.clean_utf8
(slen
)
246 slen
= str
.byte_length
250 # The 129 size was decided more or less arbitrarily
251 # It will require some more benchmarking to compute
252 # if this is the best size or not
254 if chunksz
> remsp
then
255 rets
+= new FlatString.with_infos
(sits
, remsp
, pos
)
258 var st
= sits
.find_beginning_of_char_at
(pos
+ chunksz
- 1)
259 var byte_length
= st
- pos
260 rets
+= new FlatString.with_infos
(sits
, byte_length
, pos
)
264 if rets
isa Concat then return rets
.balance
268 # Read all the stream until the eof.
270 # The content of the file is returned verbatim.
271 fun read_all_bytes
: Bytes
273 if last_error
!= null then return new Bytes.empty
274 var s
= new Bytes.empty
277 if c
< 0 then continue
283 # Read a string until the end of the line and append it to `s`.
285 # Unlike `read_line` and other related methods,
286 # the line terminator '\n', if any, is preserved in each line.
287 # Use the method `Text::chomp` to safely remove it.
290 # var txt = "Hello\n\nWorld\n"
291 # var i = new StringReader(txt)
292 # var b = new FlatBuffer
293 # i.append_line_to(b)
294 # assert b == "Hello\n"
295 # i.append_line_to(b)
296 # assert b == "Hello\n\n"
297 # i.append_line_to(b)
302 # If `\n` is not present at the end of the result, it means that
303 # a non-eol terminated last line was returned.
306 # var i2 = new StringReader("hello")
308 # var b2 = new FlatBuffer
309 # i2.append_line_to(b2)
310 # assert b2 == "hello"
314 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
315 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
316 fun append_line_to
(s
: Buffer)
318 if last_error
!= null then return
325 if x
== '\n' then return
330 # Is there something to read.
331 # This function returns 'false' if there is something to read.
332 fun eof
: Bool is abstract
334 # Read the next sequence of non whitespace characters.
336 # Leading whitespace characters are skipped.
337 # The first whitespace character that follows the result is consumed.
339 # An empty string is returned if the end of the file or an error is encounter.
342 # var w = new StringReader(" Hello, \n\t World!")
343 # assert w.read_word == "Hello,"
344 # assert w.read_char == '\n'
345 # assert w.read_word == "World!"
346 # assert w.read_word == ""
349 # `Char::is_whitespace` determines what is a whitespace.
350 fun read_word
: String
352 var buf
= new FlatBuffer
353 var c
= read_nonwhitespace
358 if c
== null then break
359 if c
.is_whitespace
then break
367 # Skip whitespace characters (if any) then return the following non-whitespace character.
369 # Returns the code point of the character.
370 # Returns `null` on end of file or error.
372 # In fact, this method works like `read_char` except it skips whitespace.
375 # var w = new StringReader(" \nab\tc")
376 # assert w.read_nonwhitespace == 'a'
377 # assert w.read_nonwhitespace == 'b'
378 # assert w.read_nonwhitespace == 'c'
379 # assert w.read_nonwhitespace == null
382 # `Char::is_whitespace` determines what is a whitespace.
383 fun read_nonwhitespace
: nullable Char
385 var c
: nullable Char = null
388 if c
== null or not c
.is_whitespace
then break
394 # Iterator returned by `Reader::each_line`.
395 # See the aforementioned method for details.
397 super Iterator[String]
399 # The original stream
404 var res
= not stream
.eof
405 if not res
and close_on_finish
then stream
.close
413 line
= stream
.read_line
419 # The last line read (cache)
420 private var line
: nullable String = null
425 if line
== null then item
430 # Close the stream when the stream is at the EOF.
433 var close_on_finish
= false is writable
437 if close_on_finish
then stream
.close
441 # `Reader` capable of declaring if readable without blocking
442 abstract class PollableReader
445 # Is there something to read? (without blocking)
446 fun poll_in
: Bool is abstract
450 # A `Stream` that can be written to
451 abstract class Writer
454 # Writes bytes from `s`
455 fun write_bytes
(s
: Bytes) is abstract
458 fun write
(s
: Text) is abstract
460 # Write a single byte
461 fun write_byte
(value
: Byte) is abstract
463 # Writes a single char
464 fun write_char
(c
: Char) do write
(c
.to_s
)
466 # Can the stream be used to write
467 fun is_writable
: Bool is abstract
470 # Things that can be efficienlty written to a `Writer`
472 # The point of this interface is to allow the instance to be efficiently
473 # written into a `Writer`.
475 # Ready-to-save documents usually provide this interface.
477 # Write itself to a `stream`
478 # The specific logic it let to the concrete subclasses
479 fun write_to
(stream
: Writer) is abstract
481 # Like `write_to` but return a new String (may be quite large)
483 # This funtionality is anectodical, since the point
484 # of streamable object to to be efficienlty written to a
485 # stream without having to allocate and concatenate strings
486 fun write_to_string
: String
488 var stream
= new StringWriter
496 redef fun write_to
(s
) do s
.write_bytes
(self)
498 redef fun write_to_string
do return to_s
503 redef fun write_to
(stream
) do stream
.write
(self)
506 # Input streams with a buffered input for efficiency purposes
507 abstract class BufferedReader
511 if last_error
!= null then return null
513 last_error
= new IOError("Stream has reached eof")
516 # TODO: Fix when supporting UTF-8
517 var c
= _buffer
[_buffer_pos
].to_i
.code_point
524 if last_error
!= null then return -1
526 last_error
= new IOError("Stream has reached eof")
529 var c
= _buffer
[_buffer_pos
]
534 # Resets the internal buffer
540 # Peeks up to `n` bytes in the buffer
542 # The operation does not consume the buffer
545 # var x = new FileReader.open("File.txt")
546 # assert x.peek(5) == x.read(5)
548 fun peek
(i
: Int): Bytes do
549 if eof
then return new Bytes.empty
550 var remsp
= _buffer_length
- _buffer_pos
552 var bf
= new Bytes.with_capacity
(i
)
553 bf
.append_ns_from
(_buffer
, i
, _buffer_pos
)
556 var bf
= new Bytes.with_capacity
(i
)
557 bf
.append_ns_from
(_buffer
, remsp
, _buffer_pos
)
558 _buffer_pos
= _buffer_length
559 read_intern
(i
- bf
.length
, bf
)
560 remsp
= _buffer_length
- _buffer_pos
561 var full_len
= bf
.length
+ remsp
562 if full_len
> _buffer_capacity
then
563 var c
= _buffer_capacity
564 while c
< full_len
do c
= c
* 2 + 2
567 var nns
= new CString(_buffer_capacity
)
568 bf
.items
.copy_to
(nns
, bf
.length
, 0, 0)
569 _buffer
.copy_to
(nns
, remsp
, _buffer_pos
, bf
.length
)
572 _buffer_length
= full_len
576 redef fun read_bytes
(i
)
578 if last_error
!= null then return new Bytes.empty
579 var buf
= new Bytes.with_capacity
(i
)
584 # Fills `buf` with at most `i` bytes read from `self`
585 private fun read_intern
(i
: Int, buf
: Bytes): Int do
588 var bufsp
= _buffer_length
- p
591 buf
.append_ns_from
(_buffer
, i
, p
)
594 _buffer_pos
= _buffer_length
595 var readln
= _buffer_length
- p
596 buf
.append_ns_from
(_buffer
, readln
, p
)
597 var rd
= read_intern
(i
- readln
, buf
)
601 redef fun read_all_bytes
603 if last_error
!= null then return new Bytes.empty
604 var s
= new Bytes.with_capacity
(10)
608 var k
= _buffer_length
610 s
.append_ns_from
(b
, rd_sz
, j
)
617 redef fun append_line_to
(s
)
619 var lb
= new Bytes.with_capacity
(10)
621 # First phase: look for a '\n'
623 while i
< _buffer_length
and _buffer
[i
] != 0xAu
8 do
628 if i
< _buffer_length
then
629 assert _buffer
[i
] == 0xAu
8
636 # if there is something to append
637 if i
> _buffer_pos
then
638 # Copy from the buffer to the string
668 if _buffer_pos
< _buffer_length
then return false
669 if end_reached
then return true
671 return _buffer_pos
>= _buffer_length
and end_reached
675 private var buffer
: CString = new CString(0)
677 # The current position in the buffer
678 private var buffer_pos
= 0
680 # Length of the current buffer (i.e. nuber of bytes in the buffer)
681 private var buffer_length
= 0
683 # Capacity of the buffer
684 private var buffer_capacity
= 0
687 protected fun fill_buffer
is abstract
689 # Has the last fill_buffer reached the end
690 protected fun end_reached
: Bool is abstract
692 # Allocate a `_buffer` for a given `capacity`.
693 protected fun prepare_buffer
(capacity
: Int)
695 _buffer
= new CString(capacity
)
696 _buffer_pos
= 0 # need to read
698 _buffer_capacity
= capacity
702 # A `Stream` that can be written to and read from
703 abstract class Duplex
708 # Write to `bytes` in memory
711 # var writer = new BytesWriter
713 # writer.write "Strings "
714 # writer.write_char '&'
715 # writer.write_byte 0x20u8
716 # writer.write_bytes "bytes".to_bytes
718 # assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73"
719 # assert writer.bytes.to_s == "Strings & bytes"
722 # As with any binary data, UTF-8 code points encoded on two bytes or more
723 # can be constructed byte by byte.
726 # writer = new BytesWriter
728 # # Write just the character first half
729 # writer.write_byte 0xC2u8
730 # assert writer.to_s == "\\xC2"
731 # assert writer.bytes.to_s == "�"
733 # # Complete the character
734 # writer.write_byte 0xA2u8
735 # assert writer.to_s == "\\xC2\\xA2"
736 # assert writer.bytes.to_s == "¢"
742 var bytes
= new Bytes.empty
744 redef fun to_s
do return bytes
.chexdigest
748 if closed
then return
749 str
.append_to_bytes bytes
752 redef fun write_char
(c
)
754 if closed
then return
758 redef fun write_byte
(value
)
760 if closed
then return
764 redef fun write_bytes
(b
)
766 if closed
then return
770 # Is the stream closed?
771 protected var closed
= false
773 redef fun close
do closed
= true
774 redef fun is_writable
do return not closed
777 # `Stream` writing to a `String`
779 # This class has the same behavior as `BytesWriter`
780 # except for `to_s` which decodes `bytes` to a string.
783 # var writer = new StringWriter
785 # writer.write "Strings "
786 # writer.write_char '&'
787 # writer.write_byte 0x20u8
788 # writer.write_bytes "bytes".to_bytes
790 # assert writer.to_s == "Strings & bytes"
795 redef fun to_s
do return bytes
.to_s
798 # Read from `bytes` in memory
801 # var reader = new BytesReader(b"a…b")
802 # assert reader.read_char == 'a'
803 # assert reader.read_byte == 0xE2 # 1st byte of '…'
804 # assert reader.read_byte == 0x80 # 2nd byte of '…'
805 # assert reader.read_char == '�' # Reads the last byte as an invalid char
806 # assert reader.read_all_bytes == b"b"
811 # Source data to read
814 # The current position in `bytes`
815 private var cursor
= 0
819 if cursor
>= bytes
.length
then return null
821 var len
= bytes
.items
.length_of_char_at
(cursor
)
822 var char
= bytes
.items
.char_at
(cursor
)
829 if cursor
>= bytes
.length
then return -1
831 var c
= bytes
[cursor
]
836 redef fun close
do bytes
= new Bytes.empty
838 redef fun read_all_bytes
840 var res
= bytes
.slice_from
(cursor
)
841 cursor
= bytes
.length
845 redef fun eof
do return cursor
>= bytes
.length
848 # `Stream` reading from a `String` source
850 # This class has the same behavior as `BytesReader`
851 # except for its constructor accepting a `String`.
854 # var reader = new StringReader("a…b")
855 # assert reader.read_char == 'a'
856 # assert reader.read_byte == 0xE2 # 1st byte of '…'
857 # assert reader.read_byte == 0x80 # 2nd byte of '…'
858 # assert reader.read_char == '�' # Reads the last byte as an invalid char
859 # assert reader.read_all == "b"
866 # Source data to read
869 init do bytes
= source
.to_bytes