1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # Input and output streams of characters
14 intrude import text
::ropes
25 # Any kind of error that could be produced by an operation on Streams
30 # Any kind of stream to read/write/both to or from a source
32 # Error produced by the file stream
34 # var ifs = new FileReader.open("donotmakethisfile.binx")
37 # assert ifs.last_error != null
38 var last_error
: nullable IOError = null
45 # Used to inform `self` that operations will start.
46 # Specific streams can use this to prepare some resources.
48 # Is automatically invoked at the beginning of `with` structures.
50 # Do nothing by default.
55 # Used to inform `self` that the operations are over.
56 # Specific streams can use this to free some resources.
58 # Is automatically invoked at the end of `with` structures.
60 # call `close` by default.
64 # A `Stream` that can be read from
68 # Decoder used to transform input bytes to UTF-8
69 var decoder
: Codec = utf8_codec
is writable
71 # Reads a character. Returns `null` on EOF or timeout
72 fun read_char
: nullable Char is abstract
74 # Reads a byte. Returns a negative value on error
75 fun read_byte
: Int is abstract
77 # Reads a String of at most `i` length
78 fun read
(i
: Int): String do return read_bytes
(i
).to_s
80 # Read at most i bytes
82 # If i <= 0, an empty buffer will be returned
83 fun read_bytes
(i
: Int): Bytes
85 if last_error
!= null or i
<= 0 then return new Bytes.empty
86 var s
= new CString(i
)
87 var buf
= new Bytes(s
, 0, i
)
88 while i
> 0 and not eof
do
99 # Read a string until the end of the line.
101 # The line terminator '\n' and '\r\n', if any, is removed in each line.
104 # var txt = "Hello\n\nWorld\n"
105 # var i = new StringReader(txt)
106 # assert i.read_line == "Hello"
107 # assert i.read_line == ""
108 # assert i.read_line == "World"
112 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
113 # the end or file (EOF) is considered to delimit the end of lines.
114 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
117 # var txt2 = "Hello\r\n\n\rWorld"
118 # var i2 = new StringReader(txt2)
119 # assert i2.read_line == "Hello"
120 # assert i2.read_line == ""
121 # assert i2.read_line == "\rWorld"
125 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
126 fun read_line
: String
128 if last_error
!= null then return ""
129 if eof
then return ""
130 var s
= new FlatBuffer
135 # Read all the lines until the eof.
137 # The line terminator '\n' and `\r\n` is removed in each line,
140 # var txt = "Hello\n\nWorld\n"
141 # var i = new StringReader(txt)
142 # assert i.read_lines == ["Hello", "", "World"]
145 # This method is more efficient that splitting
146 # the result of `read_all`.
148 # NOTE: SEE `read_line` for details.
149 fun read_lines
: Array[String]
151 var res
= new Array[String]
158 # Return an iterator that read each line.
160 # The line terminator '\n' and `\r\n` is removed in each line,
161 # The line are read with `read_line`. See this method for details.
164 # var txt = "Hello\n\nWorld\n"
165 # var i = new StringReader(txt)
166 # assert i.each_line.to_a == ["Hello", "", "World"]
169 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
170 # Therefore, the stream should no be closed until the end of the stream.
173 # i = new StringReader(txt)
174 # var el = i.each_line
176 # assert el.item == "Hello"
178 # assert el.item == ""
183 # assert not el.is_ok
184 # # closed before "world" is read
186 fun each_line
: LineIterator do return new LineIterator(self)
188 # Read all the stream until the eof.
190 # The content of the file is returned as a String.
193 # var txt = "Hello\n\nWorld\n"
194 # var i = new StringReader(txt)
195 # assert i.read_all == txt
197 fun read_all
: String do
198 var s
= read_all_bytes
200 if slen
== 0 then return ""
203 var str
= s
.items
.clean_utf8
(slen
)
204 slen
= str
.byte_length
208 # The 129 size was decided more or less arbitrarily
209 # It will require some more benchmarking to compute
210 # if this is the best size or not
212 if chunksz
> remsp
then
213 rets
+= new FlatString.with_infos
(sits
, remsp
, pos
)
216 var st
= sits
.find_beginning_of_char_at
(pos
+ chunksz
- 1)
217 var byte_length
= st
- pos
218 rets
+= new FlatString.with_infos
(sits
, byte_length
, pos
)
222 if rets
isa Concat then return rets
.balance
226 # Read all the stream until the eof.
228 # The content of the file is returned verbatim.
229 fun read_all_bytes
: Bytes
231 if last_error
!= null then return new Bytes.empty
232 var s
= new Bytes.empty
235 if c
< 0 then continue
241 # Read a string until the end of the line and append it to `s`.
243 # Unlike `read_line` and other related methods,
244 # the line terminator '\n', if any, is preserved in each line.
245 # Use the method `Text::chomp` to safely remove it.
248 # var txt = "Hello\n\nWorld\n"
249 # var i = new StringReader(txt)
250 # var b = new FlatBuffer
251 # i.append_line_to(b)
252 # assert b == "Hello\n"
253 # i.append_line_to(b)
254 # assert b == "Hello\n\n"
255 # i.append_line_to(b)
260 # If `\n` is not present at the end of the result, it means that
261 # a non-eol terminated last line was returned.
264 # var i2 = new StringReader("hello")
266 # var b2 = new FlatBuffer
267 # i2.append_line_to(b2)
268 # assert b2 == "hello"
272 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
273 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
274 fun append_line_to
(s
: Buffer)
276 if last_error
!= null then return
283 if x
== '\n' then return
288 # Is there something to read.
289 # This function returns 'false' if there is something to read.
290 fun eof
: Bool is abstract
292 # Read the next sequence of non whitespace characters.
294 # Leading whitespace characters are skipped.
295 # The first whitespace character that follows the result is consumed.
297 # An empty string is returned if the end of the file or an error is encounter.
300 # var w = new StringReader(" Hello, \n\t World!")
301 # assert w.read_word == "Hello,"
302 # assert w.read_char == '\n'
303 # assert w.read_word == "World!"
304 # assert w.read_word == ""
307 # `Char::is_whitespace` determines what is a whitespace.
308 fun read_word
: String
310 var buf
= new FlatBuffer
311 var c
= read_nonwhitespace
316 if c
== null then break
317 if c
.is_whitespace
then break
325 # Skip whitespace characters (if any) then return the following non-whitespace character.
327 # Returns the code point of the character.
328 # Returns `null` on end of file or error.
330 # In fact, this method works like `read_char` except it skips whitespace.
333 # var w = new StringReader(" \nab\tc")
334 # assert w.read_nonwhitespace == 'a'
335 # assert w.read_nonwhitespace == 'b'
336 # assert w.read_nonwhitespace == 'c'
337 # assert w.read_nonwhitespace == null
340 # `Char::is_whitespace` determines what is a whitespace.
341 fun read_nonwhitespace
: nullable Char
343 var c
: nullable Char = null
346 if c
== null or not c
.is_whitespace
then break
352 # Iterator returned by `Reader::each_line`.
353 # See the aforementioned method for details.
355 super Iterator[String]
357 # The original stream
362 var res
= not stream
.eof
363 if not res
and close_on_finish
then stream
.close
371 line
= stream
.read_line
377 # The last line read (cache)
378 private var line
: nullable String = null
383 if line
== null then item
388 # Close the stream when the stream is at the EOF.
391 var close_on_finish
= false is writable
395 if close_on_finish
then stream
.close
399 # `Reader` capable of declaring if readable without blocking
400 abstract class PollableReader
403 # Is there something to read? (without blocking)
404 fun poll_in
: Bool is abstract
408 # A `Stream` that can be written to
409 abstract class Writer
412 # The coder from a nit UTF-8 String to the output file
413 var coder
: Codec = utf8_codec
is writable
415 # Writes bytes from `s`
416 fun write_bytes
(s
: Bytes) is abstract
419 fun write
(s
: Text) is abstract
421 # Write a single byte
422 fun write_byte
(value
: Byte) is abstract
424 # Writes a single char
425 fun write_char
(c
: Char) do write
(c
.to_s
)
427 # Can the stream be used to write
428 fun is_writable
: Bool is abstract
431 # Things that can be efficienlty written to a `Writer`
433 # The point of this interface is to allow the instance to be efficiently
434 # written into a `Writer`.
436 # Ready-to-save documents usually provide this interface.
438 # Write itself to a `stream`
439 # The specific logic it let to the concrete subclasses
440 fun write_to
(stream
: Writer) is abstract
442 # Like `write_to` but return a new String (may be quite large)
444 # This funtionality is anectodical, since the point
445 # of streamable object to to be efficienlty written to a
446 # stream without having to allocate and concatenate strings
447 fun write_to_string
: String
449 var stream
= new StringWriter
457 redef fun write_to
(s
) do s
.write_bytes
(self)
459 redef fun write_to_string
do return to_s
464 redef fun write_to
(stream
) do stream
.write
(self)
467 # Input streams with a buffered input for efficiency purposes
468 abstract class BufferedReader
472 if last_error
!= null then return null
474 last_error
= new IOError("Stream has reached eof")
477 # TODO: Fix when supporting UTF-8
478 var c
= _buffer
[_buffer_pos
].to_i
.code_point
485 if last_error
!= null then return -1
487 last_error
= new IOError("Stream has reached eof")
490 var c
= _buffer
[_buffer_pos
]
495 # Resets the internal buffer
501 # Peeks up to `n` bytes in the buffer
503 # The operation does not consume the buffer
506 # var x = new FileReader.open("File.txt")
507 # assert x.peek(5) == x.read(5)
509 fun peek
(i
: Int): Bytes do
510 if eof
then return new Bytes.empty
511 var remsp
= _buffer_length
- _buffer_pos
513 var bf
= new Bytes.with_capacity
(i
)
514 bf
.append_ns_from
(_buffer
, i
, _buffer_pos
)
517 var bf
= new Bytes.with_capacity
(i
)
518 bf
.append_ns_from
(_buffer
, remsp
, _buffer_pos
)
519 _buffer_pos
= _buffer_length
520 read_intern
(i
- bf
.length
, bf
)
521 remsp
= _buffer_length
- _buffer_pos
522 var full_len
= bf
.length
+ remsp
523 if full_len
> _buffer_capacity
then
524 var c
= _buffer_capacity
525 while c
< full_len
do c
= c
* 2 + 2
528 var nns
= new CString(_buffer_capacity
)
529 bf
.items
.copy_to
(nns
, bf
.length
, 0, 0)
530 _buffer
.copy_to
(nns
, remsp
, _buffer_pos
, bf
.length
)
533 _buffer_length
= full_len
537 redef fun read_bytes
(i
)
539 if last_error
!= null then return new Bytes.empty
540 var buf
= new Bytes.with_capacity
(i
)
545 # Fills `buf` with at most `i` bytes read from `self`
546 private fun read_intern
(i
: Int, buf
: Bytes): Int do
549 var bufsp
= _buffer_length
- p
552 buf
.append_ns_from
(_buffer
, i
, p
)
555 _buffer_pos
= _buffer_length
556 var readln
= _buffer_length
- p
557 buf
.append_ns_from
(_buffer
, readln
, p
)
558 var rd
= read_intern
(i
- readln
, buf
)
562 redef fun read_all_bytes
564 if last_error
!= null then return new Bytes.empty
565 var s
= new Bytes.with_capacity
(10)
569 var k
= _buffer_length
571 s
.append_ns_from
(b
, rd_sz
, j
)
578 redef fun append_line_to
(s
)
580 var lb
= new Bytes.with_capacity
(10)
582 # First phase: look for a '\n'
584 while i
< _buffer_length
and _buffer
[i
] != 0xAu
8 do
589 if i
< _buffer_length
then
590 assert _buffer
[i
] == 0xAu
8
597 # if there is something to append
598 if i
> _buffer_pos
then
599 # Copy from the buffer to the string
629 if _buffer_pos
< _buffer_length
then return false
630 if end_reached
then return true
632 return _buffer_pos
>= _buffer_length
and end_reached
636 private var buffer
: CString = new CString(0)
638 # The current position in the buffer
639 private var buffer_pos
= 0
641 # Length of the current buffer (i.e. nuber of bytes in the buffer)
642 private var buffer_length
= 0
644 # Capacity of the buffer
645 private var buffer_capacity
= 0
648 protected fun fill_buffer
is abstract
650 # Has the last fill_buffer reached the end
651 protected fun end_reached
: Bool is abstract
653 # Allocate a `_buffer` for a given `capacity`.
654 protected fun prepare_buffer
(capacity
: Int)
656 _buffer
= new CString(capacity
)
657 _buffer_pos
= 0 # need to read
659 _buffer_capacity
= capacity
663 # A `Stream` that can be written to and read from
664 abstract class Duplex
669 # Write to `bytes` in memory
672 # var writer = new BytesWriter
674 # writer.write "Strings "
675 # writer.write_char '&'
676 # writer.write_byte 0x20u8
677 # writer.write_bytes "bytes".to_bytes
679 # assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73"
680 # assert writer.bytes.to_s == "Strings & bytes"
683 # As with any binary data, UTF-8 code points encoded on two bytes or more
684 # can be constructed byte by byte.
687 # writer = new BytesWriter
689 # # Write just the character first half
690 # writer.write_byte 0xC2u8
691 # assert writer.to_s == "\\xC2"
692 # assert writer.bytes.to_s == "�"
694 # # Complete the character
695 # writer.write_byte 0xA2u8
696 # assert writer.to_s == "\\xC2\\xA2"
697 # assert writer.bytes.to_s == "¢"
703 var bytes
= new Bytes.empty
705 redef fun to_s
do return bytes
.chexdigest
709 if closed
then return
710 str
.append_to_bytes bytes
713 redef fun write_char
(c
)
715 if closed
then return
719 redef fun write_byte
(value
)
721 if closed
then return
725 redef fun write_bytes
(b
)
727 if closed
then return
731 # Is the stream closed?
732 protected var closed
= false
734 redef fun close
do closed
= true
735 redef fun is_writable
do return not closed
738 # `Stream` writing to a `String`
740 # This class has the same behavior as `BytesWriter`
741 # except for `to_s` which decodes `bytes` to a string.
744 # var writer = new StringWriter
746 # writer.write "Strings "
747 # writer.write_char '&'
748 # writer.write_byte 0x20u8
749 # writer.write_bytes "bytes".to_bytes
751 # assert writer.to_s == "Strings & bytes"
756 redef fun to_s
do return bytes
.to_s
759 # Read from `bytes` in memory
762 # var reader = new BytesReader(b"a…b")
763 # assert reader.read_char == 'a'
764 # assert reader.read_byte == 0xE2 # 1st byte of '…'
765 # assert reader.read_byte == 0x80 # 2nd byte of '…'
766 # assert reader.read_char == '�' # Reads the last byte as an invalid char
767 # assert reader.read_all_bytes == b"b"
772 # Source data to read
775 # The current position in `bytes`
776 private var cursor
= 0
780 if cursor
>= bytes
.length
then return null
782 var len
= bytes
.items
.length_of_char_at
(cursor
)
783 var char
= bytes
.items
.char_at
(cursor
)
790 if cursor
>= bytes
.length
then return -1
792 var c
= bytes
[cursor
]
797 redef fun close
do bytes
= new Bytes.empty
799 redef fun read_all_bytes
801 var res
= bytes
.slice_from
(cursor
)
802 cursor
= bytes
.length
806 redef fun eof
do return cursor
>= bytes
.length
809 # `Stream` reading from a `String` source
811 # This class has the same behavior as `BytesReader`
812 # except for its constructor accepting a `String`.
815 # var reader = new StringReader("a…b")
816 # assert reader.read_char == 'a'
817 # assert reader.read_byte == 0xE2 # 1st byte of '…'
818 # assert reader.read_byte == 0x80 # 2nd byte of '…'
819 # assert reader.read_char == '�' # Reads the last byte as an invalid char
820 # assert reader.read_all == "b"
827 # Source data to read
830 init do bytes
= source
.to_bytes