1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # Input and output streams of characters
24 # Any kind of error that could be produced by an operation on Streams
29 # Any kind of stream to read/write/both to or from a source
31 # Codec used to transform raw data to text
33 # Note: defaults to UTF-8
34 var codec
: Codec = utf8_codec
is protected writable(set_codec
)
36 # Lookahead buffer for codecs
38 # Since some codecs are multibyte, a lookahead may be required
39 # to store the next bytes and consume them only if a valid character
41 protected var lookahead
: CString is noinit
43 # Capacity of the lookahead
44 protected var lookahead_capacity
= 0
46 # Current occupation of the lookahead
47 protected var lookahead_length
= 0
49 # Buffer for writing data to a stream
50 protected var write_buffer
: CString is noinit
53 var lcap
= codec
.max_lookahead
54 lookahead
= new CString(lcap
)
55 write_buffer
= new CString(lcap
)
57 lookahead_capacity
= lcap
60 # Change the codec for this stream.
61 fun codec
=(c
: Codec) do
62 if c
.max_lookahead
> lookahead_capacity
then
63 var lcap
= codec
.max_lookahead
64 var lk
= new CString(lcap
)
65 var llen
= lookahead_length
67 lookahead
.copy_to
(lk
, llen
, 0, 0)
70 lookahead_capacity
= lcap
71 write_buffer
= new CString(lcap
)
76 # Error produced by the file stream
78 # var ifs = new FileReader.open("donotmakethisfile.binx")
81 # assert ifs.last_error != null
82 var last_error
: nullable IOError = null
89 # Used to inform `self` that operations will start.
90 # Specific streams can use this to prepare some resources.
92 # Is automatically invoked at the beginning of `with` structures.
94 # Do nothing by default.
99 # Used to inform `self` that the operations are over.
100 # Specific streams can use this to free some resources.
102 # Is automatically invoked at the end of `with` structures.
104 # call `close` by default.
108 # A `Stream` that can be read from
109 abstract class Reader
112 # Read a byte directly from the underlying stream, without
113 # considering any eventual buffer
114 protected fun raw_read_byte
: Int is abstract
116 # Read at most `max` bytes from the underlying stream into `buf`,
117 # without considering any eventual buffer
119 # Returns how many bytes were read
120 protected fun raw_read_bytes
(buf
: CString, max
: Int): Int do
122 for i
in [0 .. max
[ do
123 var b
= raw_read_byte
131 # Reads a character. Returns `null` on EOF or timeout
132 fun read_char
: nullable Char is abstract
134 # Reads a byte. Returns a negative value on error
135 fun read_byte
: Int do
136 var llen
= lookahead_length
137 if llen
== 0 then return raw_read_byte
143 lk
.lshift
(1, llen
- 1, 1)
144 lookahead_length
-= 1
149 # Reads a String of at most `i` length
150 fun read
(i
: Int): String do
152 var cs
= new CString(i
)
153 var rd
= read_bytes_to_cstring
(cs
, i
)
154 if rd
< 0 then return ""
155 return codec
.decode_string
(cs
, rd
)
158 # Reads up to `max` bytes from source
159 fun read_bytes
(max
: Int): Bytes do
161 var cs
= new CString(max
)
162 var rd
= read_bytes_to_cstring
(cs
, max
)
163 return new Bytes(cs
, rd
, max
)
166 # Reads up to `max` bytes from source and stores them in `bytes`
167 fun read_bytes_to_cstring
(bytes
: CString, max
: Int): Int do
168 var llen
= lookahead_length
169 if llen
== 0 then return raw_read_bytes
(bytes
, max
)
170 var rd
= max
.min
(llen
)
172 lk
.copy_to
(bytes
, rd
, 0, 0)
174 lk
.lshift
(rd
, llen
- rd
, rd
)
175 lookahead_length
-= rd
179 return rd
+ raw_read_bytes
(bytes
, max
- rd
)
182 # Read a string until the end of the line.
184 # The line terminator '\n' and '\r\n', if any, is removed in each line.
187 # var txt = "Hello\n\nWorld\n"
188 # var i = new StringReader(txt)
189 # assert i.read_line == "Hello"
190 # assert i.read_line == ""
191 # assert i.read_line == "World"
195 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
196 # the end or file (EOF) is considered to delimit the end of lines.
197 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
200 # var txt2 = "Hello\r\n\n\rWorld"
201 # var i2 = new StringReader(txt2)
202 # assert i2.read_line == "Hello"
203 # assert i2.read_line == ""
204 # assert i2.read_line == "\rWorld"
208 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
209 fun read_line
: String
211 if last_error
!= null then return ""
212 if eof
then return ""
213 var s
= new FlatBuffer
218 # Read all the lines until the eof.
220 # The line terminator '\n' and `\r\n` is removed in each line,
223 # var txt = "Hello\n\nWorld\n"
224 # var i = new StringReader(txt)
225 # assert i.read_lines == ["Hello", "", "World"]
228 # This method is more efficient that splitting
229 # the result of `read_all`.
231 # NOTE: SEE `read_line` for details.
232 fun read_lines
: Array[String]
234 var res
= new Array[String]
241 # Return an iterator that read each line.
243 # The line terminator '\n' and `\r\n` is removed in each line,
244 # The line are read with `read_line`. See this method for details.
247 # var txt = "Hello\n\nWorld\n"
248 # var i = new StringReader(txt)
249 # assert i.each_line.to_a == ["Hello", "", "World"]
252 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
253 # Therefore, the stream should no be closed until the end of the stream.
256 # i = new StringReader(txt)
257 # var el = i.each_line
259 # assert el.item == "Hello"
261 # assert el.item == ""
266 # assert not el.is_ok
267 # # closed before "world" is read
269 fun each_line
: LineIterator do return new LineIterator(self)
271 # Read all the stream until the eof.
273 # The content of the file is returned as a String.
276 # var txt = "Hello\n\nWorld\n"
277 # var i = new StringReader(txt)
278 # assert i.read_all == txt
280 fun read_all
: String do
281 var s
= read_all_bytes
283 if slen
== 0 then return ""
284 return codec
.decode_string
(s
.items
, s
.length
)
287 # Read all the stream until the eof.
289 # The content of the file is returned verbatim.
290 fun read_all_bytes
: Bytes
292 if last_error
!= null then return new Bytes.empty
293 var s
= new Bytes.empty
294 var buf
= new CString(4096)
296 var rd
= read_bytes_to_cstring
(buf
, 4096)
302 # Read a string until the end of the line and append it to `s`.
304 # Unlike `read_line` and other related methods,
305 # the line terminator '\n', if any, is preserved in each line.
306 # Use the method `Text::chomp` to safely remove it.
309 # var txt = "Hello\n\nWorld\n"
310 # var i = new StringReader(txt)
311 # var b = new FlatBuffer
312 # i.append_line_to(b)
313 # assert b == "Hello\n"
314 # i.append_line_to(b)
315 # assert b == "Hello\n\n"
316 # i.append_line_to(b)
321 # If `\n` is not present at the end of the result, it means that
322 # a non-eol terminated last line was returned.
325 # var i2 = new StringReader("hello")
327 # var b2 = new FlatBuffer
328 # i2.append_line_to(b2)
329 # assert b2 == "hello"
333 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
334 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
335 fun append_line_to
(s
: Buffer)
337 if last_error
!= null then return
344 if x
== '\n' then return
349 # Is there something to read.
350 # This function returns 'false' if there is something to read.
351 fun eof
: Bool is abstract
353 # Read the next sequence of non whitespace characters.
355 # Leading whitespace characters are skipped.
356 # The first whitespace character that follows the result is consumed.
358 # An empty string is returned if the end of the file or an error is encounter.
361 # var w = new StringReader(" Hello, \n\t World!")
362 # assert w.read_word == "Hello,"
363 # assert w.read_char == '\n'
364 # assert w.read_word == "World!"
365 # assert w.read_word == ""
368 # `Char::is_whitespace` determines what is a whitespace.
369 fun read_word
: String
371 var buf
= new FlatBuffer
372 var c
= read_nonwhitespace
377 if c
== null then break
378 if c
.is_whitespace
then break
386 # Skip whitespace characters (if any) then return the following non-whitespace character.
388 # Returns the code point of the character.
389 # Returns `null` on end of file or error.
391 # In fact, this method works like `read_char` except it skips whitespace.
394 # var w = new StringReader(" \nab\tc")
395 # assert w.read_nonwhitespace == 'a'
396 # assert w.read_nonwhitespace == 'b'
397 # assert w.read_nonwhitespace == 'c'
398 # assert w.read_nonwhitespace == null
401 # `Char::is_whitespace` determines what is a whitespace.
402 fun read_nonwhitespace
: nullable Char
404 var c
: nullable Char = null
407 if c
== null or not c
.is_whitespace
then break
413 # Iterator returned by `Reader::each_line`.
414 # See the aforementioned method for details.
416 super Iterator[String]
418 # The original stream
423 var res
= not stream
.eof
424 if not res
and close_on_finish
then stream
.close
432 line
= stream
.read_line
438 # The last line read (cache)
439 private var line
: nullable String = null
444 if line
== null then item
449 # Close the stream when the stream is at the EOF.
452 var close_on_finish
= false is writable
456 if close_on_finish
then stream
.close
460 # `Reader` capable of declaring if readable without blocking
461 abstract class PollableReader
464 # Is there something to read? (without blocking)
465 fun poll_in
: Bool is abstract
469 # A `Stream` that can be written to
470 abstract class Writer
473 # Write bytes from `s`
474 fun write_bytes
(s
: Bytes) do write_bytes_from_cstring
(s
.items
, s
.length
)
476 # Write `len` bytes from `ns`
477 fun write_bytes_from_cstring
(ns
: CString, len
: Int) is abstract
480 fun write
(s
: Text) is abstract
482 # Write a single byte
483 fun write_byte
(value
: Byte) is abstract
485 # Write a single char
486 fun write_char
(c
: Char) do
487 var ln
= codec
.add_char_to
(c
, write_buffer
)
488 write_bytes_from_cstring
(write_buffer
, ln
)
491 # Can the stream be used to write
492 fun is_writable
: Bool is abstract
495 # Things that can be efficienlty written to a `Writer`
497 # The point of this interface is to allow the instance to be efficiently
498 # written into a `Writer`.
500 # Ready-to-save documents usually provide this interface.
502 # Write itself to a `stream`
503 # The specific logic it let to the concrete subclasses
504 fun write_to
(stream
: Writer) is abstract
506 # Like `write_to` but return a new String (may be quite large)
508 # This funtionality is anectodical, since the point
509 # of streamable object to to be efficienlty written to a
510 # stream without having to allocate and concatenate strings
511 fun write_to_string
: String
513 var stream
= new StringWriter
521 redef fun write_to
(s
) do s
.write_bytes
(self)
523 redef fun write_to_string
do return to_s
528 redef fun write_to
(stream
) do stream
.write
(self)
531 # Input streams with a buffered input for efficiency purposes
532 abstract class BufferedReader
536 if last_error
!= null then return null
538 last_error
= new IOError("Stream has reached eof")
541 # TODO: Fix when supporting UTF-8
542 var c
= _buffer
[_buffer_pos
].to_i
.code_point
549 if last_error
!= null then return -1
551 last_error
= new IOError("Stream has reached eof")
554 var c
= _buffer
[_buffer_pos
]
559 # Resets the internal buffer
565 # Peeks up to `n` bytes in the buffer
567 # The operation does not consume the buffer
570 # var x = new FileReader.open("File.txt")
571 # assert x.peek(5) == x.read(5)
573 fun peek
(i
: Int): Bytes do
574 if eof
then return new Bytes.empty
575 var remsp
= _buffer_length
- _buffer_pos
577 var bf
= new Bytes.with_capacity
(i
)
578 bf
.append_ns_from
(_buffer
, i
, _buffer_pos
)
581 var bf
= new Bytes.with_capacity
(i
)
582 bf
.append_ns_from
(_buffer
, remsp
, _buffer_pos
)
583 _buffer_pos
= _buffer_length
584 read_intern
(i
- bf
.length
, bf
)
585 remsp
= _buffer_length
- _buffer_pos
586 var full_len
= bf
.length
+ remsp
587 if full_len
> _buffer_capacity
then
588 var c
= _buffer_capacity
589 while c
< full_len
do c
= c
* 2 + 2
592 var nns
= new CString(_buffer_capacity
)
593 bf
.items
.copy_to
(nns
, bf
.length
, 0, 0)
594 _buffer
.copy_to
(nns
, remsp
, _buffer_pos
, bf
.length
)
597 _buffer_length
= full_len
601 redef fun read_bytes_to_cstring
(buf
, i
)
603 if last_error
!= null then return 0
604 var bbf
= new Bytes(buf
, 0, i
)
605 return read_intern
(i
, bbf
)
608 # Fills `buf` with at most `i` bytes read from `self`
609 private fun read_intern
(i
: Int, buf
: Bytes): Int do
612 var bufsp
= _buffer_length
- p
615 buf
.append_ns_from
(_buffer
, i
, p
)
618 _buffer_pos
= _buffer_length
619 var readln
= _buffer_length
- p
620 buf
.append_ns_from
(_buffer
, readln
, p
)
621 var rd
= read_intern
(i
- readln
, buf
)
625 redef fun read_all_bytes
627 if last_error
!= null then return new Bytes.empty
628 var s
= new Bytes.with_capacity
(10)
632 var k
= _buffer_length
634 s
.append_ns_from
(b
, rd_sz
, j
)
641 redef fun append_line_to
(s
)
643 var lb
= new Bytes.with_capacity
(10)
645 # First phase: look for a '\n'
647 while i
< _buffer_length
and _buffer
[i
] != 0xAu
8 do
652 if i
< _buffer_length
then
653 assert _buffer
[i
] == 0xAu
8
660 # if there is something to append
661 if i
> _buffer_pos
then
662 # Copy from the buffer to the string
692 if _buffer_pos
< _buffer_length
then return false
693 if end_reached
then return true
695 return _buffer_pos
>= _buffer_length
and end_reached
699 private var buffer
: CString = new CString(0)
701 # The current position in the buffer
702 private var buffer_pos
= 0
704 # Length of the current buffer (i.e. nuber of bytes in the buffer)
705 private var buffer_length
= 0
707 # Capacity of the buffer
708 private var buffer_capacity
= 0
711 protected fun fill_buffer
is abstract
713 # Has the last fill_buffer reached the end
714 protected fun end_reached
: Bool is abstract
716 # Allocate a `_buffer` for a given `capacity`.
717 protected fun prepare_buffer
(capacity
: Int)
719 _buffer
= new CString(capacity
)
720 _buffer_pos
= 0 # need to read
722 _buffer_capacity
= capacity
726 # A `Stream` that can be written to and read from
727 abstract class Duplex
732 # Write to `bytes` in memory
735 # var writer = new BytesWriter
737 # writer.write "Strings "
738 # writer.write_char '&'
739 # writer.write_byte 0x20u8
740 # writer.write_bytes "bytes".to_bytes
742 # assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73"
743 # assert writer.bytes.to_s == "Strings & bytes"
746 # As with any binary data, UTF-8 code points encoded on two bytes or more
747 # can be constructed byte by byte.
750 # writer = new BytesWriter
752 # # Write just the character first half
753 # writer.write_byte 0xC2u8
754 # assert writer.to_s == "\\xC2"
755 # assert writer.bytes.to_s == "�"
757 # # Complete the character
758 # writer.write_byte 0xA2u8
759 # assert writer.to_s == "\\xC2\\xA2"
760 # assert writer.bytes.to_s == "¢"
766 var bytes
= new Bytes.empty
768 redef fun to_s
do return bytes
.chexdigest
772 if closed
then return
773 str
.append_to_bytes bytes
776 redef fun write_char
(c
)
778 if closed
then return
782 redef fun write_byte
(value
)
784 if closed
then return
788 redef fun write_bytes_from_cstring
(ns
, len
) do
789 if closed
then return
790 bytes
.append_ns
(ns
, len
)
793 # Is the stream closed?
794 protected var closed
= false
796 redef fun close
do closed
= true
797 redef fun is_writable
do return not closed
800 # `Stream` writing to a `String`
802 # This class has the same behavior as `BytesWriter`
803 # except for `to_s` which decodes `bytes` to a string.
806 # var writer = new StringWriter
808 # writer.write "Strings "
809 # writer.write_char '&'
810 # writer.write_byte 0x20u8
811 # writer.write_bytes "bytes".to_bytes
813 # assert writer.to_s == "Strings & bytes"
818 redef fun to_s
do return bytes
.to_s
821 # Read from `bytes` in memory
824 # var reader = new BytesReader(b"a…b")
825 # assert reader.read_char == 'a'
826 # assert reader.read_byte == 0xE2 # 1st byte of '…'
827 # assert reader.read_byte == 0x80 # 2nd byte of '…'
828 # assert reader.read_char == '�' # Reads the last byte as an invalid char
829 # assert reader.read_all_bytes == b"b"
834 # Source data to read
837 # The current position in `bytes`
838 private var cursor
= 0
842 if cursor
>= bytes
.length
then return null
844 var len
= bytes
.items
.length_of_char_at
(cursor
)
845 var char
= bytes
.items
.char_at
(cursor
)
852 if cursor
>= bytes
.length
then return -1
854 var c
= bytes
[cursor
]
859 redef fun close
do bytes
= new Bytes.empty
861 redef fun read_all_bytes
863 var res
= bytes
.slice_from
(cursor
)
864 cursor
= bytes
.length
868 redef fun raw_read_bytes
(ns
, max
) do
869 if cursor
>= bytes
.length
then return 0
871 var copy
= max
.min
(bytes
.length
- cursor
)
872 bytes
.items
.copy_to
(ns
, copy
, cursor
, 0)
877 redef fun eof
do return cursor
>= bytes
.length
880 # `Stream` reading from a `String` source
882 # This class has the same behavior as `BytesReader`
883 # except for its constructor accepting a `String`.
886 # var reader = new StringReader("a…b")
887 # assert reader.read_char == 'a'
888 # assert reader.read_byte == 0xE2 # 1st byte of '…'
889 # assert reader.read_byte == 0x80 # 2nd byte of '…'
890 # assert reader.read_char == '�' # Reads the last byte as an invalid char
891 # assert reader.read_all == "b"
898 # Source data to read
901 init do bytes
= source
.to_bytes