core/stream: change read_byte return type to Int
[nit.git] / lib / core / stream.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Input and output streams of characters
12 module stream
13
14 intrude import text::ropes
15 import error
16 intrude import bytes
17 import codecs
18
19 in "C" `{
20 #include <unistd.h>
21 #include <string.h>
22 #include <signal.h>
23 `}
24
25 # Any kind of error that could be produced by an operation on Streams
26 class IOError
27 super Error
28 end
29
30 # Any kind of stream to read/write/both to or from a source
31 abstract class Stream
32 # Error produced by the file stream
33 #
34 # var ifs = new FileReader.open("donotmakethisfile.binx")
35 # ifs.read_all
36 # ifs.close
37 # assert ifs.last_error != null
38 var last_error: nullable IOError = null
39
40 # close the stream
41 fun close is abstract
42
43 # Pre-work hook.
44 #
45 # Used to inform `self` that operations will start.
46 # Specific streams can use this to prepare some resources.
47 #
48 # Is automatically invoked at the beginning of `with` structures.
49 #
50 # Do nothing by default.
51 fun start do end
52
53 # Post-work hook.
54 #
55 # Used to inform `self` that the operations are over.
56 # Specific streams can use this to free some resources.
57 #
58 # Is automatically invoked at the end of `with` structures.
59 #
60 # call `close` by default.
61 fun finish do close
62 end
63
64 # A `Stream` that can be read from
65 abstract class Reader
66 super Stream
67
68 # Decoder used to transform input bytes to UTF-8
69 var decoder: Codec = utf8_codec is writable
70
71 # Reads a character. Returns `null` on EOF or timeout
72 fun read_char: nullable Char is abstract
73
74 # Reads a byte. Returns a negative value on error
75 fun read_byte: Int is abstract
76
77 # Reads a String of at most `i` length
78 fun read(i: Int): String do return read_bytes(i).to_s
79
80 # Read at most i bytes
81 #
82 # If i <= 0, an empty buffer will be returned
83 fun read_bytes(i: Int): Bytes
84 do
85 if last_error != null or i <= 0 then return new Bytes.empty
86 var s = new CString(i)
87 var buf = new Bytes(s, 0, i)
88 while i > 0 and not eof do
89 var c = read_byte
90 if c < 0 then
91 continue
92 end
93 buf.add c.to_b
94 i -= 1
95 end
96 return buf
97 end
98
99 # Read a string until the end of the line.
100 #
101 # The line terminator '\n' and '\r\n', if any, is removed in each line.
102 #
103 # ~~~
104 # var txt = "Hello\n\nWorld\n"
105 # var i = new StringReader(txt)
106 # assert i.read_line == "Hello"
107 # assert i.read_line == ""
108 # assert i.read_line == "World"
109 # assert i.eof
110 # ~~~
111 #
112 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
113 # the end or file (EOF) is considered to delimit the end of lines.
114 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
115 #
116 # ~~~
117 # var txt2 = "Hello\r\n\n\rWorld"
118 # var i2 = new StringReader(txt2)
119 # assert i2.read_line == "Hello"
120 # assert i2.read_line == ""
121 # assert i2.read_line == "\rWorld"
122 # assert i2.eof
123 # ~~~
124 #
125 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
126 fun read_line: String
127 do
128 if last_error != null then return ""
129 if eof then return ""
130 var s = new FlatBuffer
131 append_line_to(s)
132 return s.to_s.chomp
133 end
134
135 # Read all the lines until the eof.
136 #
137 # The line terminator '\n' and `\r\n` is removed in each line,
138 #
139 # ~~~
140 # var txt = "Hello\n\nWorld\n"
141 # var i = new StringReader(txt)
142 # assert i.read_lines == ["Hello", "", "World"]
143 # ~~~
144 #
145 # This method is more efficient that splitting
146 # the result of `read_all`.
147 #
148 # NOTE: SEE `read_line` for details.
149 fun read_lines: Array[String]
150 do
151 var res = new Array[String]
152 while not eof do
153 res.add read_line
154 end
155 return res
156 end
157
158 # Return an iterator that read each line.
159 #
160 # The line terminator '\n' and `\r\n` is removed in each line,
161 # The line are read with `read_line`. See this method for details.
162 #
163 # ~~~
164 # var txt = "Hello\n\nWorld\n"
165 # var i = new StringReader(txt)
166 # assert i.each_line.to_a == ["Hello", "", "World"]
167 # ~~~
168 #
169 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
170 # Therefore, the stream should no be closed until the end of the stream.
171 #
172 # ~~~
173 # i = new StringReader(txt)
174 # var el = i.each_line
175 #
176 # assert el.item == "Hello"
177 # el.next
178 # assert el.item == ""
179 # el.next
180 #
181 # i.close
182 #
183 # assert not el.is_ok
184 # # closed before "world" is read
185 # ~~~
186 fun each_line: LineIterator do return new LineIterator(self)
187
188 # Read all the stream until the eof.
189 #
190 # The content of the file is returned as a String.
191 #
192 # ~~~
193 # var txt = "Hello\n\nWorld\n"
194 # var i = new StringReader(txt)
195 # assert i.read_all == txt
196 # ~~~
197 fun read_all: String do
198 var s = read_all_bytes
199 var slen = s.length
200 if slen == 0 then return ""
201 var rets = ""
202 var pos = 0
203 var str = s.items.clean_utf8(slen)
204 slen = str.byte_length
205 var sits = str.items
206 var remsp = slen
207 while pos < slen do
208 # The 129 size was decided more or less arbitrarily
209 # It will require some more benchmarking to compute
210 # if this is the best size or not
211 var chunksz = 129
212 if chunksz > remsp then
213 rets += new FlatString.with_infos(sits, remsp, pos)
214 break
215 end
216 var st = sits.find_beginning_of_char_at(pos + chunksz - 1)
217 var byte_length = st - pos
218 rets += new FlatString.with_infos(sits, byte_length, pos)
219 pos = st
220 remsp -= byte_length
221 end
222 if rets isa Concat then return rets.balance
223 return rets
224 end
225
226 # Read all the stream until the eof.
227 #
228 # The content of the file is returned verbatim.
229 fun read_all_bytes: Bytes
230 do
231 if last_error != null then return new Bytes.empty
232 var s = new Bytes.empty
233 while not eof do
234 var c = read_byte
235 if c < 0 then continue
236 s.add(c.to_b)
237 end
238 return s
239 end
240
241 # Read a string until the end of the line and append it to `s`.
242 #
243 # Unlike `read_line` and other related methods,
244 # the line terminator '\n', if any, is preserved in each line.
245 # Use the method `Text::chomp` to safely remove it.
246 #
247 # ~~~
248 # var txt = "Hello\n\nWorld\n"
249 # var i = new StringReader(txt)
250 # var b = new FlatBuffer
251 # i.append_line_to(b)
252 # assert b == "Hello\n"
253 # i.append_line_to(b)
254 # assert b == "Hello\n\n"
255 # i.append_line_to(b)
256 # assert b == txt
257 # assert i.eof
258 # ~~~
259 #
260 # If `\n` is not present at the end of the result, it means that
261 # a non-eol terminated last line was returned.
262 #
263 # ~~~
264 # var i2 = new StringReader("hello")
265 # assert not i2.eof
266 # var b2 = new FlatBuffer
267 # i2.append_line_to(b2)
268 # assert b2 == "hello"
269 # assert i2.eof
270 # ~~~
271 #
272 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
273 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
274 fun append_line_to(s: Buffer)
275 do
276 if last_error != null then return
277 loop
278 var x = read_char
279 if x == null then
280 if eof then return
281 else
282 s.chars.push(x)
283 if x == '\n' then return
284 end
285 end
286 end
287
288 # Is there something to read.
289 # This function returns 'false' if there is something to read.
290 fun eof: Bool is abstract
291
292 # Read the next sequence of non whitespace characters.
293 #
294 # Leading whitespace characters are skipped.
295 # The first whitespace character that follows the result is consumed.
296 #
297 # An empty string is returned if the end of the file or an error is encounter.
298 #
299 # ~~~
300 # var w = new StringReader(" Hello, \n\t World!")
301 # assert w.read_word == "Hello,"
302 # assert w.read_char == '\n'
303 # assert w.read_word == "World!"
304 # assert w.read_word == ""
305 # ~~~
306 #
307 # `Char::is_whitespace` determines what is a whitespace.
308 fun read_word: String
309 do
310 var buf = new FlatBuffer
311 var c = read_nonwhitespace
312 if c != null then
313 buf.add(c)
314 while not eof do
315 c = read_char
316 if c == null then break
317 if c.is_whitespace then break
318 buf.add(c)
319 end
320 end
321 var res = buf.to_s
322 return res
323 end
324
325 # Skip whitespace characters (if any) then return the following non-whitespace character.
326 #
327 # Returns the code point of the character.
328 # Returns `null` on end of file or error.
329 #
330 # In fact, this method works like `read_char` except it skips whitespace.
331 #
332 # ~~~
333 # var w = new StringReader(" \nab\tc")
334 # assert w.read_nonwhitespace == 'a'
335 # assert w.read_nonwhitespace == 'b'
336 # assert w.read_nonwhitespace == 'c'
337 # assert w.read_nonwhitespace == null
338 # ~~~
339 #
340 # `Char::is_whitespace` determines what is a whitespace.
341 fun read_nonwhitespace: nullable Char
342 do
343 var c: nullable Char = null
344 while not eof do
345 c = read_char
346 if c == null or not c.is_whitespace then break
347 end
348 return c
349 end
350 end
351
352 # Iterator returned by `Reader::each_line`.
353 # See the aforementioned method for details.
354 class LineIterator
355 super Iterator[String]
356
357 # The original stream
358 var stream: Reader
359
360 redef fun is_ok
361 do
362 var res = not stream.eof
363 if not res and close_on_finish then stream.close
364 return res
365 end
366
367 redef fun item
368 do
369 var line = self.line
370 if line == null then
371 line = stream.read_line
372 end
373 self.line = line
374 return line
375 end
376
377 # The last line read (cache)
378 private var line: nullable String = null
379
380 redef fun next
381 do
382 # force the read
383 if line == null then item
384 # drop the line
385 line = null
386 end
387
388 # Close the stream when the stream is at the EOF.
389 #
390 # Default is false.
391 var close_on_finish = false is writable
392
393 redef fun finish
394 do
395 if close_on_finish then stream.close
396 end
397 end
398
399 # `Reader` capable of declaring if readable without blocking
400 abstract class PollableReader
401 super Reader
402
403 # Is there something to read? (without blocking)
404 fun poll_in: Bool is abstract
405
406 end
407
408 # A `Stream` that can be written to
409 abstract class Writer
410 super Stream
411
412 # The coder from a nit UTF-8 String to the output file
413 var coder: Codec = utf8_codec is writable
414
415 # Writes bytes from `s`
416 fun write_bytes(s: Bytes) is abstract
417
418 # write a string
419 fun write(s: Text) is abstract
420
421 # Write a single byte
422 fun write_byte(value: Byte) is abstract
423
424 # Writes a single char
425 fun write_char(c: Char) do write(c.to_s)
426
427 # Can the stream be used to write
428 fun is_writable: Bool is abstract
429 end
430
431 # Things that can be efficienlty written to a `Writer`
432 #
433 # The point of this interface is to allow the instance to be efficiently
434 # written into a `Writer`.
435 #
436 # Ready-to-save documents usually provide this interface.
437 interface Writable
438 # Write itself to a `stream`
439 # The specific logic it let to the concrete subclasses
440 fun write_to(stream: Writer) is abstract
441
442 # Like `write_to` but return a new String (may be quite large)
443 #
444 # This funtionality is anectodical, since the point
445 # of streamable object to to be efficienlty written to a
446 # stream without having to allocate and concatenate strings
447 fun write_to_string: String
448 do
449 var stream = new StringWriter
450 write_to(stream)
451 return stream.to_s
452 end
453 end
454
455 redef class Bytes
456 super Writable
457 redef fun write_to(s) do s.write_bytes(self)
458
459 redef fun write_to_string do return to_s
460 end
461
462 redef class Text
463 super Writable
464 redef fun write_to(stream) do stream.write(self)
465 end
466
467 # Input streams with a buffered input for efficiency purposes
468 abstract class BufferedReader
469 super Reader
470 redef fun read_char
471 do
472 if last_error != null then return null
473 if eof then
474 last_error = new IOError("Stream has reached eof")
475 return null
476 end
477 # TODO: Fix when supporting UTF-8
478 var c = _buffer[_buffer_pos].to_i.code_point
479 _buffer_pos += 1
480 return c
481 end
482
483 redef fun read_byte
484 do
485 if last_error != null then return -1
486 if eof then
487 last_error = new IOError("Stream has reached eof")
488 return -1
489 end
490 var c = _buffer[_buffer_pos]
491 _buffer_pos += 1
492 return c.to_i
493 end
494
495 # Resets the internal buffer
496 fun buffer_reset do
497 _buffer_length = 0
498 _buffer_pos = 0
499 end
500
501 # Peeks up to `n` bytes in the buffer
502 #
503 # The operation does not consume the buffer
504 #
505 # ~~~nitish
506 # var x = new FileReader.open("File.txt")
507 # assert x.peek(5) == x.read(5)
508 # ~~~
509 fun peek(i: Int): Bytes do
510 if eof then return new Bytes.empty
511 var remsp = _buffer_length - _buffer_pos
512 if i <= remsp then
513 var bf = new Bytes.with_capacity(i)
514 bf.append_ns_from(_buffer, i, _buffer_pos)
515 return bf
516 end
517 var bf = new Bytes.with_capacity(i)
518 bf.append_ns_from(_buffer, remsp, _buffer_pos)
519 _buffer_pos = _buffer_length
520 read_intern(i - bf.length, bf)
521 remsp = _buffer_length - _buffer_pos
522 var full_len = bf.length + remsp
523 if full_len > _buffer_capacity then
524 var c = _buffer_capacity
525 while c < full_len do c = c * 2 + 2
526 _buffer_capacity = c
527 end
528 var nns = new CString(_buffer_capacity)
529 bf.items.copy_to(nns, bf.length, 0, 0)
530 _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
531 _buffer = nns
532 _buffer_pos = 0
533 _buffer_length = full_len
534 return bf
535 end
536
537 redef fun read_bytes(i)
538 do
539 if last_error != null then return new Bytes.empty
540 var buf = new Bytes.with_capacity(i)
541 read_intern(i, buf)
542 return buf
543 end
544
545 # Fills `buf` with at most `i` bytes read from `self`
546 private fun read_intern(i: Int, buf: Bytes): Int do
547 if eof then return 0
548 var p = _buffer_pos
549 var bufsp = _buffer_length - p
550 if bufsp >= i then
551 _buffer_pos += i
552 buf.append_ns_from(_buffer, i, p)
553 return i
554 end
555 _buffer_pos = _buffer_length
556 var readln = _buffer_length - p
557 buf.append_ns_from(_buffer, readln, p)
558 var rd = read_intern(i - readln, buf)
559 return rd + readln
560 end
561
562 redef fun read_all_bytes
563 do
564 if last_error != null then return new Bytes.empty
565 var s = new Bytes.with_capacity(10)
566 var b = _buffer
567 while not eof do
568 var j = _buffer_pos
569 var k = _buffer_length
570 var rd_sz = k - j
571 s.append_ns_from(b, rd_sz, j)
572 _buffer_pos = k
573 fill_buffer
574 end
575 return s
576 end
577
578 redef fun append_line_to(s)
579 do
580 var lb = new Bytes.with_capacity(10)
581 loop
582 # First phase: look for a '\n'
583 var i = _buffer_pos
584 while i < _buffer_length and _buffer[i] != 0xAu8 do
585 i += 1
586 end
587
588 var eol
589 if i < _buffer_length then
590 assert _buffer[i] == 0xAu8
591 i += 1
592 eol = true
593 else
594 eol = false
595 end
596
597 # if there is something to append
598 if i > _buffer_pos then
599 # Copy from the buffer to the string
600 var j = _buffer_pos
601 while j < i do
602 lb.add(_buffer[j])
603 j += 1
604 end
605 _buffer_pos = i
606 else
607 assert end_reached
608 s.append lb.to_s
609 return
610 end
611
612 if eol then
613 # so \n is found
614 s.append lb.to_s
615 return
616 else
617 # so \n is not found
618 if end_reached then
619 s.append lb.to_s
620 return
621 end
622 fill_buffer
623 end
624 end
625 end
626
627 redef fun eof
628 do
629 if _buffer_pos < _buffer_length then return false
630 if end_reached then return true
631 fill_buffer
632 return _buffer_pos >= _buffer_length and end_reached
633 end
634
635 # The buffer
636 private var buffer: CString = new CString(0)
637
638 # The current position in the buffer
639 private var buffer_pos = 0
640
641 # Length of the current buffer (i.e. nuber of bytes in the buffer)
642 private var buffer_length = 0
643
644 # Capacity of the buffer
645 private var buffer_capacity = 0
646
647 # Fill the buffer
648 protected fun fill_buffer is abstract
649
650 # Has the last fill_buffer reached the end
651 protected fun end_reached: Bool is abstract
652
653 # Allocate a `_buffer` for a given `capacity`.
654 protected fun prepare_buffer(capacity: Int)
655 do
656 _buffer = new CString(capacity)
657 _buffer_pos = 0 # need to read
658 _buffer_length = 0
659 _buffer_capacity = capacity
660 end
661 end
662
663 # A `Stream` that can be written to and read from
664 abstract class Duplex
665 super Reader
666 super Writer
667 end
668
669 # Write to `bytes` in memory
670 #
671 # ~~~
672 # var writer = new BytesWriter
673 #
674 # writer.write "Strings "
675 # writer.write_char '&'
676 # writer.write_byte 0x20u8
677 # writer.write_bytes "bytes".to_bytes
678 #
679 # assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73"
680 # assert writer.bytes.to_s == "Strings & bytes"
681 # ~~~
682 #
683 # As with any binary data, UTF-8 code points encoded on two bytes or more
684 # can be constructed byte by byte.
685 #
686 # ~~~
687 # writer = new BytesWriter
688 #
689 # # Write just the character first half
690 # writer.write_byte 0xC2u8
691 # assert writer.to_s == "\\xC2"
692 # assert writer.bytes.to_s == "�"
693 #
694 # # Complete the character
695 # writer.write_byte 0xA2u8
696 # assert writer.to_s == "\\xC2\\xA2"
697 # assert writer.bytes.to_s == "¢"
698 # ~~~
699 class BytesWriter
700 super Writer
701
702 # Written memory
703 var bytes = new Bytes.empty
704
705 redef fun to_s do return bytes.chexdigest
706
707 redef fun write(str)
708 do
709 if closed then return
710 str.append_to_bytes bytes
711 end
712
713 redef fun write_char(c)
714 do
715 if closed then return
716 bytes.add_char c
717 end
718
719 redef fun write_byte(value)
720 do
721 if closed then return
722 bytes.add value
723 end
724
725 redef fun write_bytes(b)
726 do
727 if closed then return
728 bytes.append b
729 end
730
731 # Is the stream closed?
732 protected var closed = false
733
734 redef fun close do closed = true
735 redef fun is_writable do return not closed
736 end
737
738 # `Stream` writing to a `String`
739 #
740 # This class has the same behavior as `BytesWriter`
741 # except for `to_s` which decodes `bytes` to a string.
742 #
743 # ~~~
744 # var writer = new StringWriter
745 #
746 # writer.write "Strings "
747 # writer.write_char '&'
748 # writer.write_byte 0x20u8
749 # writer.write_bytes "bytes".to_bytes
750 #
751 # assert writer.to_s == "Strings & bytes"
752 # ~~~
753 class StringWriter
754 super BytesWriter
755
756 redef fun to_s do return bytes.to_s
757 end
758
759 # Read from `bytes` in memory
760 #
761 # ~~~
762 # var reader = new BytesReader(b"a…b")
763 # assert reader.read_char == 'a'
764 # assert reader.read_byte == 0xE2 # 1st byte of '…'
765 # assert reader.read_byte == 0x80 # 2nd byte of '…'
766 # assert reader.read_char == '�' # Reads the last byte as an invalid char
767 # assert reader.read_all_bytes == b"b"
768 # ~~~
769 class BytesReader
770 super Reader
771
772 # Source data to read
773 var bytes: Bytes
774
775 # The current position in `bytes`
776 private var cursor = 0
777
778 redef fun read_char
779 do
780 if cursor >= bytes.length then return null
781
782 var len = bytes.items.length_of_char_at(cursor)
783 var char = bytes.items.char_at(cursor)
784 cursor += len
785 return char
786 end
787
788 redef fun read_byte
789 do
790 if cursor >= bytes.length then return -1
791
792 var c = bytes[cursor]
793 cursor += 1
794 return c.to_i
795 end
796
797 redef fun close do bytes = new Bytes.empty
798
799 redef fun read_all_bytes
800 do
801 var res = bytes.slice_from(cursor)
802 cursor = bytes.length
803 return res
804 end
805
806 redef fun eof do return cursor >= bytes.length
807 end
808
809 # `Stream` reading from a `String` source
810 #
811 # This class has the same behavior as `BytesReader`
812 # except for its constructor accepting a `String`.
813 #
814 # ~~~
815 # var reader = new StringReader("a…b")
816 # assert reader.read_char == 'a'
817 # assert reader.read_byte == 0xE2 # 1st byte of '…'
818 # assert reader.read_byte == 0x80 # 2nd byte of '…'
819 # assert reader.read_char == '�' # Reads the last byte as an invalid char
820 # assert reader.read_all == "b"
821 # ~~~
822 class StringReader
823 super BytesReader
824
825 autoinit source
826
827 # Source data to read
828 var source: String
829
830 init do bytes = source.to_bytes
831
832 redef fun close
833 do
834 source = ""
835 super
836 end
837 end