1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # Input and output streams of characters
24 # Any kind of error that could be produced by an operation on Streams
29 # Any kind of stream to read/write/both to or from a source
31 # Codec used to transform raw data to text
33 # Note: defaults to UTF-8
34 var codec
: Codec = utf8_codec
is protected writable(set_codec
)
36 # Lookahead buffer for codecs
38 # Since some codecs are multibyte, a lookahead may be required
39 # to store the next bytes and consume them only if a valid character
41 protected var lookahead
: CString is noinit
43 # Capacity of the lookahead
44 protected var lookahead_capacity
= 0
46 # Current occupation of the lookahead
47 protected var lookahead_length
= 0
49 # Buffer for writing data to a stream
50 protected var write_buffer
: CString is noinit
53 var lcap
= codec
.max_lookahead
54 lookahead
= new CString(lcap
)
55 write_buffer
= new CString(lcap
)
57 lookahead_capacity
= lcap
60 # Change the codec for this stream.
61 fun codec
=(c
: Codec) do
62 if c
.max_lookahead
> lookahead_capacity
then
63 var lcap
= codec
.max_lookahead
64 var lk
= new CString(lcap
)
65 var llen
= lookahead_length
67 lookahead
.copy_to
(lk
, llen
, 0, 0)
70 lookahead_capacity
= lcap
71 write_buffer
= new CString(lcap
)
76 # Error produced by the file stream
78 # var ifs = new FileReader.open("donotmakethisfile.binx")
81 # assert ifs.last_error != null
82 var last_error
: nullable IOError = null
89 # Used to inform `self` that operations will start.
90 # Specific streams can use this to prepare some resources.
92 # Is automatically invoked at the beginning of `with` structures.
94 # Do nothing by default.
99 # Used to inform `self` that the operations are over.
100 # Specific streams can use this to free some resources.
102 # Is automatically invoked at the end of `with` structures.
104 # call `close` by default.
108 # A `Stream` that can be read from
109 abstract class Reader
112 # Reads a character. Returns `null` on EOF or timeout
113 fun read_char
: nullable Char is abstract
115 # Reads a byte. Returns a negative value on error
116 fun read_byte
: Int is abstract
118 # Reads a String of at most `i` length
119 fun read
(i
: Int): String do return read_bytes
(i
).to_s
121 # Read at most i bytes
123 # If i <= 0, an empty buffer will be returned
124 fun read_bytes
(i
: Int): Bytes
126 if last_error
!= null or i
<= 0 then return new Bytes.empty
127 var s
= new CString(i
)
128 var buf
= new Bytes(s
, 0, i
)
129 while i
> 0 and not eof
do
140 # Read a string until the end of the line.
142 # The line terminator '\n' and '\r\n', if any, is removed in each line.
145 # var txt = "Hello\n\nWorld\n"
146 # var i = new StringReader(txt)
147 # assert i.read_line == "Hello"
148 # assert i.read_line == ""
149 # assert i.read_line == "World"
153 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
154 # the end or file (EOF) is considered to delimit the end of lines.
155 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
158 # var txt2 = "Hello\r\n\n\rWorld"
159 # var i2 = new StringReader(txt2)
160 # assert i2.read_line == "Hello"
161 # assert i2.read_line == ""
162 # assert i2.read_line == "\rWorld"
166 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
167 fun read_line
: String
169 if last_error
!= null then return ""
170 if eof
then return ""
171 var s
= new FlatBuffer
176 # Read all the lines until the eof.
178 # The line terminator '\n' and `\r\n` is removed in each line,
181 # var txt = "Hello\n\nWorld\n"
182 # var i = new StringReader(txt)
183 # assert i.read_lines == ["Hello", "", "World"]
186 # This method is more efficient that splitting
187 # the result of `read_all`.
189 # NOTE: SEE `read_line` for details.
190 fun read_lines
: Array[String]
192 var res
= new Array[String]
199 # Return an iterator that read each line.
201 # The line terminator '\n' and `\r\n` is removed in each line,
202 # The line are read with `read_line`. See this method for details.
205 # var txt = "Hello\n\nWorld\n"
206 # var i = new StringReader(txt)
207 # assert i.each_line.to_a == ["Hello", "", "World"]
210 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
211 # Therefore, the stream should no be closed until the end of the stream.
214 # i = new StringReader(txt)
215 # var el = i.each_line
217 # assert el.item == "Hello"
219 # assert el.item == ""
224 # assert not el.is_ok
225 # # closed before "world" is read
227 fun each_line
: LineIterator do return new LineIterator(self)
229 # Read all the stream until the eof.
231 # The content of the file is returned as a String.
234 # var txt = "Hello\n\nWorld\n"
235 # var i = new StringReader(txt)
236 # assert i.read_all == txt
238 fun read_all
: String do
239 var s
= read_all_bytes
241 if slen
== 0 then return ""
242 return codec
.decode_string
(s
.items
, s
.length
)
245 # Read all the stream until the eof.
247 # The content of the file is returned verbatim.
248 fun read_all_bytes
: Bytes
250 if last_error
!= null then return new Bytes.empty
251 var s
= new Bytes.empty
254 if c
< 0 then continue
260 # Read a string until the end of the line and append it to `s`.
262 # Unlike `read_line` and other related methods,
263 # the line terminator '\n', if any, is preserved in each line.
264 # Use the method `Text::chomp` to safely remove it.
267 # var txt = "Hello\n\nWorld\n"
268 # var i = new StringReader(txt)
269 # var b = new FlatBuffer
270 # i.append_line_to(b)
271 # assert b == "Hello\n"
272 # i.append_line_to(b)
273 # assert b == "Hello\n\n"
274 # i.append_line_to(b)
279 # If `\n` is not present at the end of the result, it means that
280 # a non-eol terminated last line was returned.
283 # var i2 = new StringReader("hello")
285 # var b2 = new FlatBuffer
286 # i2.append_line_to(b2)
287 # assert b2 == "hello"
291 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
292 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
293 fun append_line_to
(s
: Buffer)
295 if last_error
!= null then return
302 if x
== '\n' then return
307 # Is there something to read.
308 # This function returns 'false' if there is something to read.
309 fun eof
: Bool is abstract
311 # Read the next sequence of non whitespace characters.
313 # Leading whitespace characters are skipped.
314 # The first whitespace character that follows the result is consumed.
316 # An empty string is returned if the end of the file or an error is encounter.
319 # var w = new StringReader(" Hello, \n\t World!")
320 # assert w.read_word == "Hello,"
321 # assert w.read_char == '\n'
322 # assert w.read_word == "World!"
323 # assert w.read_word == ""
326 # `Char::is_whitespace` determines what is a whitespace.
327 fun read_word
: String
329 var buf
= new FlatBuffer
330 var c
= read_nonwhitespace
335 if c
== null then break
336 if c
.is_whitespace
then break
344 # Skip whitespace characters (if any) then return the following non-whitespace character.
346 # Returns the code point of the character.
347 # Returns `null` on end of file or error.
349 # In fact, this method works like `read_char` except it skips whitespace.
352 # var w = new StringReader(" \nab\tc")
353 # assert w.read_nonwhitespace == 'a'
354 # assert w.read_nonwhitespace == 'b'
355 # assert w.read_nonwhitespace == 'c'
356 # assert w.read_nonwhitespace == null
359 # `Char::is_whitespace` determines what is a whitespace.
360 fun read_nonwhitespace
: nullable Char
362 var c
: nullable Char = null
365 if c
== null or not c
.is_whitespace
then break
371 # Iterator returned by `Reader::each_line`.
372 # See the aforementioned method for details.
374 super Iterator[String]
376 # The original stream
381 var res
= not stream
.eof
382 if not res
and close_on_finish
then stream
.close
390 line
= stream
.read_line
396 # The last line read (cache)
397 private var line
: nullable String = null
402 if line
== null then item
407 # Close the stream when the stream is at the EOF.
410 var close_on_finish
= false is writable
414 if close_on_finish
then stream
.close
418 # `Reader` capable of declaring if readable without blocking
419 abstract class PollableReader
422 # Is there something to read? (without blocking)
423 fun poll_in
: Bool is abstract
427 # A `Stream` that can be written to
428 abstract class Writer
431 # Writes bytes from `s`
432 fun write_bytes
(s
: Bytes) is abstract
435 fun write
(s
: Text) is abstract
437 # Write a single byte
438 fun write_byte
(value
: Byte) is abstract
440 # Writes a single char
441 fun write_char
(c
: Char) do write
(c
.to_s
)
443 # Can the stream be used to write
444 fun is_writable
: Bool is abstract
447 # Things that can be efficienlty written to a `Writer`
449 # The point of this interface is to allow the instance to be efficiently
450 # written into a `Writer`.
452 # Ready-to-save documents usually provide this interface.
454 # Write itself to a `stream`
455 # The specific logic it let to the concrete subclasses
456 fun write_to
(stream
: Writer) is abstract
458 # Like `write_to` but return a new String (may be quite large)
460 # This funtionality is anectodical, since the point
461 # of streamable object to to be efficienlty written to a
462 # stream without having to allocate and concatenate strings
463 fun write_to_string
: String
465 var stream
= new StringWriter
473 redef fun write_to
(s
) do s
.write_bytes
(self)
475 redef fun write_to_string
do return to_s
480 redef fun write_to
(stream
) do stream
.write
(self)
483 # Input streams with a buffered input for efficiency purposes
484 abstract class BufferedReader
488 if last_error
!= null then return null
490 last_error
= new IOError("Stream has reached eof")
493 # TODO: Fix when supporting UTF-8
494 var c
= _buffer
[_buffer_pos
].to_i
.code_point
501 if last_error
!= null then return -1
503 last_error
= new IOError("Stream has reached eof")
506 var c
= _buffer
[_buffer_pos
]
511 # Resets the internal buffer
517 # Peeks up to `n` bytes in the buffer
519 # The operation does not consume the buffer
522 # var x = new FileReader.open("File.txt")
523 # assert x.peek(5) == x.read(5)
525 fun peek
(i
: Int): Bytes do
526 if eof
then return new Bytes.empty
527 var remsp
= _buffer_length
- _buffer_pos
529 var bf
= new Bytes.with_capacity
(i
)
530 bf
.append_ns_from
(_buffer
, i
, _buffer_pos
)
533 var bf
= new Bytes.with_capacity
(i
)
534 bf
.append_ns_from
(_buffer
, remsp
, _buffer_pos
)
535 _buffer_pos
= _buffer_length
536 read_intern
(i
- bf
.length
, bf
)
537 remsp
= _buffer_length
- _buffer_pos
538 var full_len
= bf
.length
+ remsp
539 if full_len
> _buffer_capacity
then
540 var c
= _buffer_capacity
541 while c
< full_len
do c
= c
* 2 + 2
544 var nns
= new CString(_buffer_capacity
)
545 bf
.items
.copy_to
(nns
, bf
.length
, 0, 0)
546 _buffer
.copy_to
(nns
, remsp
, _buffer_pos
, bf
.length
)
549 _buffer_length
= full_len
553 redef fun read_bytes
(i
)
555 if last_error
!= null then return new Bytes.empty
556 var buf
= new Bytes.with_capacity
(i
)
561 # Fills `buf` with at most `i` bytes read from `self`
562 private fun read_intern
(i
: Int, buf
: Bytes): Int do
565 var bufsp
= _buffer_length
- p
568 buf
.append_ns_from
(_buffer
, i
, p
)
571 _buffer_pos
= _buffer_length
572 var readln
= _buffer_length
- p
573 buf
.append_ns_from
(_buffer
, readln
, p
)
574 var rd
= read_intern
(i
- readln
, buf
)
578 redef fun read_all_bytes
580 if last_error
!= null then return new Bytes.empty
581 var s
= new Bytes.with_capacity
(10)
585 var k
= _buffer_length
587 s
.append_ns_from
(b
, rd_sz
, j
)
594 redef fun append_line_to
(s
)
596 var lb
= new Bytes.with_capacity
(10)
598 # First phase: look for a '\n'
600 while i
< _buffer_length
and _buffer
[i
] != 0xAu
8 do
605 if i
< _buffer_length
then
606 assert _buffer
[i
] == 0xAu
8
613 # if there is something to append
614 if i
> _buffer_pos
then
615 # Copy from the buffer to the string
645 if _buffer_pos
< _buffer_length
then return false
646 if end_reached
then return true
648 return _buffer_pos
>= _buffer_length
and end_reached
652 private var buffer
: CString = new CString(0)
654 # The current position in the buffer
655 private var buffer_pos
= 0
657 # Length of the current buffer (i.e. nuber of bytes in the buffer)
658 private var buffer_length
= 0
660 # Capacity of the buffer
661 private var buffer_capacity
= 0
664 protected fun fill_buffer
is abstract
666 # Has the last fill_buffer reached the end
667 protected fun end_reached
: Bool is abstract
669 # Allocate a `_buffer` for a given `capacity`.
670 protected fun prepare_buffer
(capacity
: Int)
672 _buffer
= new CString(capacity
)
673 _buffer_pos
= 0 # need to read
675 _buffer_capacity
= capacity
679 # A `Stream` that can be written to and read from
680 abstract class Duplex
685 # Write to `bytes` in memory
688 # var writer = new BytesWriter
690 # writer.write "Strings "
691 # writer.write_char '&'
692 # writer.write_byte 0x20u8
693 # writer.write_bytes "bytes".to_bytes
695 # assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73"
696 # assert writer.bytes.to_s == "Strings & bytes"
699 # As with any binary data, UTF-8 code points encoded on two bytes or more
700 # can be constructed byte by byte.
703 # writer = new BytesWriter
705 # # Write just the character first half
706 # writer.write_byte 0xC2u8
707 # assert writer.to_s == "\\xC2"
708 # assert writer.bytes.to_s == "�"
710 # # Complete the character
711 # writer.write_byte 0xA2u8
712 # assert writer.to_s == "\\xC2\\xA2"
713 # assert writer.bytes.to_s == "¢"
719 var bytes
= new Bytes.empty
721 redef fun to_s
do return bytes
.chexdigest
725 if closed
then return
726 str
.append_to_bytes bytes
729 redef fun write_char
(c
)
731 if closed
then return
735 redef fun write_byte
(value
)
737 if closed
then return
741 redef fun write_bytes
(b
)
743 if closed
then return
747 # Is the stream closed?
748 protected var closed
= false
750 redef fun close
do closed
= true
751 redef fun is_writable
do return not closed
754 # `Stream` writing to a `String`
756 # This class has the same behavior as `BytesWriter`
757 # except for `to_s` which decodes `bytes` to a string.
760 # var writer = new StringWriter
762 # writer.write "Strings "
763 # writer.write_char '&'
764 # writer.write_byte 0x20u8
765 # writer.write_bytes "bytes".to_bytes
767 # assert writer.to_s == "Strings & bytes"
772 redef fun to_s
do return bytes
.to_s
775 # Read from `bytes` in memory
778 # var reader = new BytesReader(b"a…b")
779 # assert reader.read_char == 'a'
780 # assert reader.read_byte == 0xE2 # 1st byte of '…'
781 # assert reader.read_byte == 0x80 # 2nd byte of '…'
782 # assert reader.read_char == '�' # Reads the last byte as an invalid char
783 # assert reader.read_all_bytes == b"b"
788 # Source data to read
791 # The current position in `bytes`
792 private var cursor
= 0
796 if cursor
>= bytes
.length
then return null
798 var len
= bytes
.items
.length_of_char_at
(cursor
)
799 var char
= bytes
.items
.char_at
(cursor
)
806 if cursor
>= bytes
.length
then return -1
808 var c
= bytes
[cursor
]
813 redef fun close
do bytes
= new Bytes.empty
815 redef fun read_all_bytes
817 var res
= bytes
.slice_from
(cursor
)
818 cursor
= bytes
.length
822 redef fun eof
do return cursor
>= bytes
.length
825 # `Stream` reading from a `String` source
827 # This class has the same behavior as `BytesReader`
828 # except for its constructor accepting a `String`.
831 # var reader = new StringReader("a…b")
832 # assert reader.read_char == 'a'
833 # assert reader.read_byte == 0xE2 # 1st byte of '…'
834 # assert reader.read_byte == 0x80 # 2nd byte of '…'
835 # assert reader.read_char == '�' # Reads the last byte as an invalid char
836 # assert reader.read_all == "b"
843 # Source data to read
846 init do bytes
= source
.to_bytes