1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
11 # Input and output streams of characters
24 # Any kind of error that could be produced by an operation on Streams
29 # Any kind of stream to read/write/both to or from a source
31 # Codec used to transform raw data to text
33 # Note: defaults to UTF-8
34 var codec
: Codec = utf8_codec
is protected writable(set_codec
)
36 # Lookahead buffer for codecs
38 # Since some codecs are multibyte, a lookahead may be required
39 # to store the next bytes and consume them only if a valid character
41 protected var lookahead
: CString is noinit
43 # Capacity of the lookahead
44 protected var lookahead_capacity
= 0
46 # Current occupation of the lookahead
47 protected var lookahead_length
= 0
49 # Buffer for writing data to a stream
50 protected var write_buffer
: CString is noinit
53 var lcap
= codec
.max_lookahead
54 lookahead
= new CString(lcap
)
55 write_buffer
= new CString(lcap
)
57 lookahead_capacity
= lcap
60 # Change the codec for this stream.
61 fun codec
=(c
: Codec) do
62 if c
.max_lookahead
> lookahead_capacity
then
63 var lcap
= codec
.max_lookahead
64 var lk
= new CString(lcap
)
65 var llen
= lookahead_length
67 lookahead
.copy_to
(lk
, llen
, 0, 0)
70 lookahead_capacity
= lcap
71 write_buffer
= new CString(lcap
)
76 # Error produced by the file stream
78 # var ifs = new FileReader.open("donotmakethisfile.binx")
81 # assert ifs.last_error != null
82 var last_error
: nullable IOError = null
89 # Used to inform `self` that operations will start.
90 # Specific streams can use this to prepare some resources.
92 # Is automatically invoked at the beginning of `with` structures.
94 # Do nothing by default.
99 # Used to inform `self` that the operations are over.
100 # Specific streams can use this to free some resources.
102 # Is automatically invoked at the end of `with` structures.
104 # call `close` by default.
108 # A `Stream` that can be read from
109 abstract class Reader
112 # Read a byte directly from the underlying stream, without
113 # considering any eventual buffer
114 protected fun raw_read_byte
: Int is abstract
116 # Read at most `max` bytes from the underlying stream into `buf`,
117 # without considering any eventual buffer
119 # Returns how many bytes were read
120 protected fun raw_read_bytes
(buf
: CString, max
: Int): Int do
122 for i
in [0 .. max
[ do
123 var b
= raw_read_byte
131 # Reads a character. Returns `null` on EOF or timeout
133 # Returns unicode replacement character '�' if an
134 # invalid byte sequence is read.
136 # `read_char` may block if:
138 # * No byte could be read from the current buffer
139 # * An incomplete char is partially read, and more bytes are
140 # required for full decoding.
141 fun read_char
: nullable Char do
142 if eof
then return null
144 var codet_sz
= cod
.codet_size
146 var llen
= lookahead_length
147 if llen
< codet_sz
then
148 llen
+= raw_read_bytes
(lk
.fast_cstring
(llen
), codet_sz
- llen
)
150 if llen
< codet_sz
then
152 return 0xFFFD.code_point
154 var ret
= cod
.is_valid_char
(lk
, codet_sz
)
155 var max_llen
= cod
.max_lookahead
156 while ret
== 1 and llen
< max_llen
do
157 var rd
= raw_read_bytes
(lk
.fast_cstring
(llen
), codet_sz
)
158 if rd
< codet_sz
then
161 lookahead
.lshift
(codet_sz
, llen
, codet_sz
)
163 lookahead_length
= llen
.max
(0)
164 return 0xFFFD.code_point
167 ret
= cod
.is_valid_char
(lk
, llen
)
170 var c
= cod
.decode_char
(lk
)
171 var clen
= c
.u8char_len
174 lookahead
.lshift
(clen
, llen
, clen
)
176 lookahead_length
= llen
179 if ret
== 2 or ret
== 1 then
182 lookahead
.lshift
(codet_sz
, llen
, codet_sz
)
184 lookahead_length
= llen
185 return 0xFFFD.code_point
187 # Should not happen if the decoder works properly
188 var arr
= new Array[Object]
189 arr
.push
"Decoder error: could not decode nor recover from byte sequence ["
190 for i
in [0 .. llen
[ do
195 var err
= new IOError(arr
.plain_to_s
)
196 err
.cause
= last_error
198 return 0xFFFD.code_point
201 # Reads a byte. Returns a negative value on error
202 fun read_byte
: Int do
203 var llen
= lookahead_length
204 if llen
== 0 then return raw_read_byte
210 lk
.lshift
(1, llen
- 1, 1)
211 lookahead_length
-= 1
216 # Reads a String of at most `i` length
217 fun read
(i
: Int): String do
219 var cs
= new CString(i
)
220 var rd
= read_bytes_to_cstring
(cs
, i
)
221 if rd
< 0 then return ""
222 return codec
.decode_string
(cs
, rd
)
225 # Reads up to `max` bytes from source
226 fun read_bytes
(max
: Int): Bytes do
228 var cs
= new CString(max
)
229 var rd
= read_bytes_to_cstring
(cs
, max
)
230 return new Bytes(cs
, rd
, max
)
233 # Reads up to `max` bytes from source and stores them in `bytes`
234 fun read_bytes_to_cstring
(bytes
: CString, max
: Int): Int do
235 var llen
= lookahead_length
236 if llen
== 0 then return raw_read_bytes
(bytes
, max
)
237 var rd
= max
.min
(llen
)
239 lk
.copy_to
(bytes
, rd
, 0, 0)
241 lk
.lshift
(rd
, llen
- rd
, rd
)
242 lookahead_length
-= rd
246 return rd
+ raw_read_bytes
(bytes
.fast_cstring
(rd
), max
- rd
)
249 # Read a string until the end of the line.
251 # The line terminator '\n' and '\r\n', if any, is removed in each line.
254 # var txt = "Hello\n\nWorld\n"
255 # var i = new StringReader(txt)
256 # assert i.read_line == "Hello"
257 # assert i.read_line == ""
258 # assert i.read_line == "World"
262 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
263 # the end or file (EOF) is considered to delimit the end of lines.
264 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
267 # var txt2 = "Hello\r\n\n\rWorld"
268 # var i2 = new StringReader(txt2)
269 # assert i2.read_line == "Hello"
270 # assert i2.read_line == ""
271 # assert i2.read_line == "\rWorld"
275 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
276 fun read_line
: String
278 if last_error
!= null then return ""
279 if eof
then return ""
280 var s
= new FlatBuffer
285 # Read all the lines until the eof.
287 # The line terminator '\n' and `\r\n` is removed in each line,
290 # var txt = "Hello\n\nWorld\n"
291 # var i = new StringReader(txt)
292 # assert i.read_lines == ["Hello", "", "World"]
295 # This method is more efficient that splitting
296 # the result of `read_all`.
298 # NOTE: SEE `read_line` for details.
299 fun read_lines
: Array[String]
301 var res
= new Array[String]
308 # Return an iterator that read each line.
310 # The line terminator '\n' and `\r\n` is removed in each line,
311 # The line are read with `read_line`. See this method for details.
314 # var txt = "Hello\n\nWorld\n"
315 # var i = new StringReader(txt)
316 # assert i.each_line.to_a == ["Hello", "", "World"]
319 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
320 # Therefore, the stream should no be closed until the end of the stream.
323 # i = new StringReader(txt)
324 # var el = i.each_line
326 # assert el.item == "Hello"
328 # assert el.item == ""
333 # assert not el.is_ok
334 # # closed before "world" is read
336 fun each_line
: LineIterator do return new LineIterator(self)
338 # Read all the stream until the eof.
340 # The content of the file is returned as a String.
343 # var txt = "Hello\n\nWorld\n"
344 # var i = new StringReader(txt)
345 # assert i.read_all == txt
347 fun read_all
: String do
348 var s
= read_all_bytes
350 if slen
== 0 then return ""
351 return codec
.decode_string
(s
.items
, s
.length
)
354 # Read all the stream until the eof.
356 # The content of the file is returned verbatim.
357 fun read_all_bytes
: Bytes
359 if last_error
!= null then return new Bytes.empty
360 var s
= new Bytes.empty
361 var buf
= new CString(4096)
363 var rd
= read_bytes_to_cstring
(buf
, 4096)
369 # Read a string until the end of the line and append it to `s`.
371 # Unlike `read_line` and other related methods,
372 # the line terminator '\n', if any, is preserved in each line.
373 # Use the method `Text::chomp` to safely remove it.
376 # var txt = "Hello\n\nWorld\n"
377 # var i = new StringReader(txt)
378 # var b = new FlatBuffer
379 # i.append_line_to(b)
380 # assert b == "Hello\n"
381 # i.append_line_to(b)
382 # assert b == "Hello\n\n"
383 # i.append_line_to(b)
388 # If `\n` is not present at the end of the result, it means that
389 # a non-eol terminated last line was returned.
392 # var i2 = new StringReader("hello")
394 # var b2 = new FlatBuffer
395 # i2.append_line_to(b2)
396 # assert b2 == "hello"
400 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
401 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
402 fun append_line_to
(s
: Buffer)
404 if last_error
!= null then return
411 if x
== '\n' then return
416 # Is there something to read.
417 # This function returns 'false' if there is something to read.
419 if lookahead_length
> 0 then return false
420 lookahead_length
= raw_read_bytes
(lookahead
, 1)
421 return lookahead_length
<= 0
424 # Read the next sequence of non whitespace characters.
426 # Leading whitespace characters are skipped.
427 # The first whitespace character that follows the result is consumed.
429 # An empty string is returned if the end of the file or an error is encounter.
432 # var w = new StringReader(" Hello, \n\t World!")
433 # assert w.read_word == "Hello,"
434 # assert w.read_char == '\n'
435 # assert w.read_word == "World!"
436 # assert w.read_word == ""
439 # `Char::is_whitespace` determines what is a whitespace.
440 fun read_word
: String
442 var buf
= new FlatBuffer
443 var c
= read_nonwhitespace
448 if c
== null then break
449 if c
.is_whitespace
then break
457 # Skip whitespace characters (if any) then return the following non-whitespace character.
459 # Returns the code point of the character.
460 # Returns `null` on end of file or error.
462 # In fact, this method works like `read_char` except it skips whitespace.
465 # var w = new StringReader(" \nab\tc")
466 # assert w.read_nonwhitespace == 'a'
467 # assert w.read_nonwhitespace == 'b'
468 # assert w.read_nonwhitespace == 'c'
469 # assert w.read_nonwhitespace == null
472 # `Char::is_whitespace` determines what is a whitespace.
473 fun read_nonwhitespace
: nullable Char
475 var c
: nullable Char = null
478 if c
== null or not c
.is_whitespace
then break
484 # Iterator returned by `Reader::each_line`.
485 # See the aforementioned method for details.
487 super Iterator[String]
489 # The original stream
494 var res
= not stream
.eof
495 if not res
and close_on_finish
then stream
.close
503 line
= stream
.read_line
509 # The last line read (cache)
510 private var line
: nullable String = null
515 if line
== null then item
520 # Close the stream when the stream is at the EOF.
523 var close_on_finish
= false is writable
527 if close_on_finish
then stream
.close
531 # `Reader` capable of declaring if readable without blocking
532 abstract class PollableReader
535 # Is there something to read? (without blocking)
536 fun poll_in
: Bool is abstract
540 # A `Stream` that can be written to
541 abstract class Writer
544 # Write bytes from `s`
545 fun write_bytes
(s
: Bytes) do write_bytes_from_cstring
(s
.items
, s
.length
)
547 # Write `len` bytes from `ns`
548 fun write_bytes_from_cstring
(ns
: CString, len
: Int) is abstract
551 fun write
(s
: Text) is abstract
553 # Write a single byte
554 fun write_byte
(value
: Int) is abstract
556 # Write a single char
557 fun write_char
(c
: Char) do
558 var ln
= codec
.add_char_to
(c
, write_buffer
)
559 write_bytes_from_cstring
(write_buffer
, ln
)
562 # Can the stream be used to write
563 fun is_writable
: Bool is abstract
566 # Things that can be efficienlty written to a `Writer`
568 # The point of this interface is to allow the instance to be efficiently
569 # written into a `Writer`.
571 # Ready-to-save documents usually provide this interface.
573 # Write itself to a `stream`
574 # The specific logic it let to the concrete subclasses
575 fun write_to
(stream
: Writer) is abstract
577 # Like `write_to` but return a new String (may be quite large).
579 # This functionality is anecdotal, since the point
580 # of a streamable object is to be efficiently written to a
581 # stream without having to allocate and concatenate strings.
582 fun write_to_string
: String
584 var stream
= new StringWriter
589 # Like `write_to` but return a new Bytes (may be quite large)
591 # This functionality is anecdotal, since the point
592 # of a streamable object is to be efficiently written to a
593 # stream without having to allocate and concatenate buffers.
595 # Nevertheless, you might need this method if you want to know
596 # the byte size of a writable object.
597 fun write_to_bytes
: Bytes
599 var stream
= new BytesWriter
607 redef fun write_to
(s
) do s
.write_bytes
(self)
609 redef fun write_to_string
do return to_s
614 redef fun write_to
(stream
) do stream
.write
(self)
617 # A `Stream` that can be written to and read from
618 abstract class Duplex
623 # Write to `bytes` in memory
626 # var writer = new BytesWriter
628 # writer.write "Strings "
629 # writer.write_char '&'
630 # writer.write_byte 0x20
631 # writer.write_bytes "bytes".to_bytes
633 # assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73"
634 # assert writer.bytes.to_s == "Strings & bytes"
637 # As with any binary data, UTF-8 code points encoded on two bytes or more
638 # can be constructed byte by byte.
641 # writer = new BytesWriter
643 # # Write just the character first half
644 # writer.write_byte 0xC2
645 # assert writer.to_s == "\\xC2"
646 # assert writer.bytes.to_s == "�"
648 # # Complete the character
649 # writer.write_byte 0xA2
650 # assert writer.to_s == "\\xC2\\xA2"
651 # assert writer.bytes.to_s == "¢"
657 var bytes
= new Bytes.empty
659 redef fun to_s
do return bytes
.chexdigest
663 if closed
then return
664 str
.append_to_bytes bytes
667 redef fun write_char
(c
)
669 if closed
then return
673 redef fun write_byte
(value
)
675 if closed
then return
679 redef fun write_bytes_from_cstring
(ns
, len
) do
680 if closed
then return
681 bytes
.append_ns
(ns
, len
)
684 # Is the stream closed?
685 protected var closed
= false
687 redef fun close
do closed
= true
688 redef fun is_writable
do return not closed
691 # `Stream` writing to a `String`
693 # This class has the same behavior as `BytesWriter`
694 # except for `to_s` which decodes `bytes` to a string.
697 # var writer = new StringWriter
699 # writer.write "Strings "
700 # writer.write_char '&'
701 # writer.write_byte 0x20
702 # writer.write_bytes "bytes".to_bytes
704 # assert writer.to_s == "Strings & bytes"
709 redef fun to_s
do return bytes
.to_s
712 # Read from `bytes` in memory
715 # var reader = new BytesReader(b"a…b")
716 # assert reader.read_char == 'a'
717 # assert reader.read_byte == 0xE2 # 1st byte of '…'
718 # assert reader.read_byte == 0x80 # 2nd byte of '…'
719 # assert reader.read_char == '�' # Reads the last byte as an invalid char
720 # assert reader.read_all_bytes == b"b"
725 # Source data to read
728 # The current position in `bytes`
729 private var cursor
= 0
731 redef fun raw_read_byte
733 if cursor
>= bytes
.length
then return -1
735 var c
= bytes
[cursor
]
740 redef fun close
do bytes
= new Bytes.empty
742 redef fun read_all_bytes
744 var res
= bytes
.slice_from
(cursor
)
745 cursor
= bytes
.length
749 redef fun raw_read_bytes
(ns
, max
) do
750 if cursor
>= bytes
.length
then return 0
752 var copy
= max
.min
(bytes
.length
- cursor
)
753 bytes
.items
.copy_to
(ns
, copy
, cursor
, 0)
758 redef fun eof
do return cursor
>= bytes
.length
761 # `Stream` reading from a `String` source
763 # This class has the same behavior as `BytesReader`
764 # except for its constructor accepting a `String`.
767 # var reader = new StringReader("a…b")
768 # assert reader.read_char == 'a'
769 # assert reader.read_byte == 0xE2 # 1st byte of '…'
770 # assert reader.read_byte == 0x80 # 2nd byte of '…'
771 # assert reader.read_char == '�' # Reads the last byte as an invalid char
772 # assert reader.read_all == "b"
779 # Source data to read
782 init do bytes
= source
.to_bytes