lib/core: remove ropes intrude import in stream
[nit.git] / lib / core / stream.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Input and output streams of characters
12 module stream
13
14 import error
15 intrude import bytes
16 import codecs
17
18 in "C" `{
19 #include <unistd.h>
20 #include <string.h>
21 #include <signal.h>
22 `}
23
24 # Any kind of error that could be produced by an operation on Streams
25 class IOError
26 super Error
27 end
28
29 # Any kind of stream to read/write/both to or from a source
30 abstract class Stream
31 # Codec used to transform raw data to text
32 #
33 # Note: defaults to UTF-8
34 var codec: Codec = utf8_codec is protected writable(set_codec)
35
36 # Lookahead buffer for codecs
37 #
38 # Since some codecs are multibyte, a lookahead may be required
39 # to store the next bytes and consume them only if a valid character
40 # is read.
41 protected var lookahead: CString is noinit
42
43 # Capacity of the lookahead
44 protected var lookahead_capacity = 0
45
46 # Current occupation of the lookahead
47 protected var lookahead_length = 0
48
49 # Buffer for writing data to a stream
50 protected var write_buffer: CString is noinit
51
52 init do
53 var lcap = codec.max_lookahead
54 lookahead = new CString(lcap)
55 write_buffer = new CString(lcap)
56 lookahead_length = 0
57 lookahead_capacity = lcap
58 end
59
60 # Change the codec for this stream.
61 fun codec=(c: Codec) do
62 if c.max_lookahead > lookahead_capacity then
63 var lcap = codec.max_lookahead
64 var lk = new CString(lcap)
65 var llen = lookahead_length
66 if llen > 0 then
67 lookahead.copy_to(lk, llen, 0, 0)
68 end
69 lookahead = lk
70 lookahead_capacity = lcap
71 write_buffer = new CString(lcap)
72 end
73 set_codec(c)
74 end
75
76 # Error produced by the file stream
77 #
78 # var ifs = new FileReader.open("donotmakethisfile.binx")
79 # ifs.read_all
80 # ifs.close
81 # assert ifs.last_error != null
82 var last_error: nullable IOError = null
83
84 # close the stream
85 fun close is abstract
86
87 # Pre-work hook.
88 #
89 # Used to inform `self` that operations will start.
90 # Specific streams can use this to prepare some resources.
91 #
92 # Is automatically invoked at the beginning of `with` structures.
93 #
94 # Do nothing by default.
95 fun start do end
96
97 # Post-work hook.
98 #
99 # Used to inform `self` that the operations are over.
100 # Specific streams can use this to free some resources.
101 #
102 # Is automatically invoked at the end of `with` structures.
103 #
104 # call `close` by default.
105 fun finish do close
106 end
107
108 # A `Stream` that can be read from
109 abstract class Reader
110 super Stream
111
112 # Reads a character. Returns `null` on EOF or timeout
113 fun read_char: nullable Char is abstract
114
115 # Reads a byte. Returns a negative value on error
116 fun read_byte: Int is abstract
117
118 # Reads a String of at most `i` length
119 fun read(i: Int): String do return read_bytes(i).to_s
120
121 # Read at most i bytes
122 #
123 # If i <= 0, an empty buffer will be returned
124 fun read_bytes(i: Int): Bytes
125 do
126 if last_error != null or i <= 0 then return new Bytes.empty
127 var s = new CString(i)
128 var buf = new Bytes(s, 0, i)
129 while i > 0 and not eof do
130 var c = read_byte
131 if c < 0 then
132 continue
133 end
134 buf.add c.to_b
135 i -= 1
136 end
137 return buf
138 end
139
140 # Read a string until the end of the line.
141 #
142 # The line terminator '\n' and '\r\n', if any, is removed in each line.
143 #
144 # ~~~
145 # var txt = "Hello\n\nWorld\n"
146 # var i = new StringReader(txt)
147 # assert i.read_line == "Hello"
148 # assert i.read_line == ""
149 # assert i.read_line == "World"
150 # assert i.eof
151 # ~~~
152 #
153 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
154 # the end or file (EOF) is considered to delimit the end of lines.
155 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
156 #
157 # ~~~
158 # var txt2 = "Hello\r\n\n\rWorld"
159 # var i2 = new StringReader(txt2)
160 # assert i2.read_line == "Hello"
161 # assert i2.read_line == ""
162 # assert i2.read_line == "\rWorld"
163 # assert i2.eof
164 # ~~~
165 #
166 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
167 fun read_line: String
168 do
169 if last_error != null then return ""
170 if eof then return ""
171 var s = new FlatBuffer
172 append_line_to(s)
173 return s.to_s.chomp
174 end
175
176 # Read all the lines until the eof.
177 #
178 # The line terminator '\n' and `\r\n` is removed in each line,
179 #
180 # ~~~
181 # var txt = "Hello\n\nWorld\n"
182 # var i = new StringReader(txt)
183 # assert i.read_lines == ["Hello", "", "World"]
184 # ~~~
185 #
186 # This method is more efficient that splitting
187 # the result of `read_all`.
188 #
189 # NOTE: SEE `read_line` for details.
190 fun read_lines: Array[String]
191 do
192 var res = new Array[String]
193 while not eof do
194 res.add read_line
195 end
196 return res
197 end
198
199 # Return an iterator that read each line.
200 #
201 # The line terminator '\n' and `\r\n` is removed in each line,
202 # The line are read with `read_line`. See this method for details.
203 #
204 # ~~~
205 # var txt = "Hello\n\nWorld\n"
206 # var i = new StringReader(txt)
207 # assert i.each_line.to_a == ["Hello", "", "World"]
208 # ~~~
209 #
210 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
211 # Therefore, the stream should no be closed until the end of the stream.
212 #
213 # ~~~
214 # i = new StringReader(txt)
215 # var el = i.each_line
216 #
217 # assert el.item == "Hello"
218 # el.next
219 # assert el.item == ""
220 # el.next
221 #
222 # i.close
223 #
224 # assert not el.is_ok
225 # # closed before "world" is read
226 # ~~~
227 fun each_line: LineIterator do return new LineIterator(self)
228
229 # Read all the stream until the eof.
230 #
231 # The content of the file is returned as a String.
232 #
233 # ~~~
234 # var txt = "Hello\n\nWorld\n"
235 # var i = new StringReader(txt)
236 # assert i.read_all == txt
237 # ~~~
238 fun read_all: String do
239 var s = read_all_bytes
240 var slen = s.length
241 if slen == 0 then return ""
242 return codec.decode_string(s.items, s.length)
243 end
244
245 # Read all the stream until the eof.
246 #
247 # The content of the file is returned verbatim.
248 fun read_all_bytes: Bytes
249 do
250 if last_error != null then return new Bytes.empty
251 var s = new Bytes.empty
252 while not eof do
253 var c = read_byte
254 if c < 0 then continue
255 s.add(c.to_b)
256 end
257 return s
258 end
259
260 # Read a string until the end of the line and append it to `s`.
261 #
262 # Unlike `read_line` and other related methods,
263 # the line terminator '\n', if any, is preserved in each line.
264 # Use the method `Text::chomp` to safely remove it.
265 #
266 # ~~~
267 # var txt = "Hello\n\nWorld\n"
268 # var i = new StringReader(txt)
269 # var b = new FlatBuffer
270 # i.append_line_to(b)
271 # assert b == "Hello\n"
272 # i.append_line_to(b)
273 # assert b == "Hello\n\n"
274 # i.append_line_to(b)
275 # assert b == txt
276 # assert i.eof
277 # ~~~
278 #
279 # If `\n` is not present at the end of the result, it means that
280 # a non-eol terminated last line was returned.
281 #
282 # ~~~
283 # var i2 = new StringReader("hello")
284 # assert not i2.eof
285 # var b2 = new FlatBuffer
286 # i2.append_line_to(b2)
287 # assert b2 == "hello"
288 # assert i2.eof
289 # ~~~
290 #
291 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
292 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
293 fun append_line_to(s: Buffer)
294 do
295 if last_error != null then return
296 loop
297 var x = read_char
298 if x == null then
299 if eof then return
300 else
301 s.chars.push(x)
302 if x == '\n' then return
303 end
304 end
305 end
306
307 # Is there something to read.
308 # This function returns 'false' if there is something to read.
309 fun eof: Bool is abstract
310
311 # Read the next sequence of non whitespace characters.
312 #
313 # Leading whitespace characters are skipped.
314 # The first whitespace character that follows the result is consumed.
315 #
316 # An empty string is returned if the end of the file or an error is encounter.
317 #
318 # ~~~
319 # var w = new StringReader(" Hello, \n\t World!")
320 # assert w.read_word == "Hello,"
321 # assert w.read_char == '\n'
322 # assert w.read_word == "World!"
323 # assert w.read_word == ""
324 # ~~~
325 #
326 # `Char::is_whitespace` determines what is a whitespace.
327 fun read_word: String
328 do
329 var buf = new FlatBuffer
330 var c = read_nonwhitespace
331 if c != null then
332 buf.add(c)
333 while not eof do
334 c = read_char
335 if c == null then break
336 if c.is_whitespace then break
337 buf.add(c)
338 end
339 end
340 var res = buf.to_s
341 return res
342 end
343
344 # Skip whitespace characters (if any) then return the following non-whitespace character.
345 #
346 # Returns the code point of the character.
347 # Returns `null` on end of file or error.
348 #
349 # In fact, this method works like `read_char` except it skips whitespace.
350 #
351 # ~~~
352 # var w = new StringReader(" \nab\tc")
353 # assert w.read_nonwhitespace == 'a'
354 # assert w.read_nonwhitespace == 'b'
355 # assert w.read_nonwhitespace == 'c'
356 # assert w.read_nonwhitespace == null
357 # ~~~
358 #
359 # `Char::is_whitespace` determines what is a whitespace.
360 fun read_nonwhitespace: nullable Char
361 do
362 var c: nullable Char = null
363 while not eof do
364 c = read_char
365 if c == null or not c.is_whitespace then break
366 end
367 return c
368 end
369 end
370
371 # Iterator returned by `Reader::each_line`.
372 # See the aforementioned method for details.
373 class LineIterator
374 super Iterator[String]
375
376 # The original stream
377 var stream: Reader
378
379 redef fun is_ok
380 do
381 var res = not stream.eof
382 if not res and close_on_finish then stream.close
383 return res
384 end
385
386 redef fun item
387 do
388 var line = self.line
389 if line == null then
390 line = stream.read_line
391 end
392 self.line = line
393 return line
394 end
395
396 # The last line read (cache)
397 private var line: nullable String = null
398
399 redef fun next
400 do
401 # force the read
402 if line == null then item
403 # drop the line
404 line = null
405 end
406
407 # Close the stream when the stream is at the EOF.
408 #
409 # Default is false.
410 var close_on_finish = false is writable
411
412 redef fun finish
413 do
414 if close_on_finish then stream.close
415 end
416 end
417
418 # `Reader` capable of declaring if readable without blocking
419 abstract class PollableReader
420 super Reader
421
422 # Is there something to read? (without blocking)
423 fun poll_in: Bool is abstract
424
425 end
426
427 # A `Stream` that can be written to
428 abstract class Writer
429 super Stream
430
431 # Writes bytes from `s`
432 fun write_bytes(s: Bytes) is abstract
433
434 # write a string
435 fun write(s: Text) is abstract
436
437 # Write a single byte
438 fun write_byte(value: Byte) is abstract
439
440 # Writes a single char
441 fun write_char(c: Char) do write(c.to_s)
442
443 # Can the stream be used to write
444 fun is_writable: Bool is abstract
445 end
446
447 # Things that can be efficienlty written to a `Writer`
448 #
449 # The point of this interface is to allow the instance to be efficiently
450 # written into a `Writer`.
451 #
452 # Ready-to-save documents usually provide this interface.
453 interface Writable
454 # Write itself to a `stream`
455 # The specific logic it let to the concrete subclasses
456 fun write_to(stream: Writer) is abstract
457
458 # Like `write_to` but return a new String (may be quite large)
459 #
460 # This funtionality is anectodical, since the point
461 # of streamable object to to be efficienlty written to a
462 # stream without having to allocate and concatenate strings
463 fun write_to_string: String
464 do
465 var stream = new StringWriter
466 write_to(stream)
467 return stream.to_s
468 end
469 end
470
471 redef class Bytes
472 super Writable
473 redef fun write_to(s) do s.write_bytes(self)
474
475 redef fun write_to_string do return to_s
476 end
477
478 redef class Text
479 super Writable
480 redef fun write_to(stream) do stream.write(self)
481 end
482
483 # Input streams with a buffered input for efficiency purposes
484 abstract class BufferedReader
485 super Reader
486 redef fun read_char
487 do
488 if last_error != null then return null
489 if eof then
490 last_error = new IOError("Stream has reached eof")
491 return null
492 end
493 # TODO: Fix when supporting UTF-8
494 var c = _buffer[_buffer_pos].to_i.code_point
495 _buffer_pos += 1
496 return c
497 end
498
499 redef fun read_byte
500 do
501 if last_error != null then return -1
502 if eof then
503 last_error = new IOError("Stream has reached eof")
504 return -1
505 end
506 var c = _buffer[_buffer_pos]
507 _buffer_pos += 1
508 return c.to_i
509 end
510
511 # Resets the internal buffer
512 fun buffer_reset do
513 _buffer_length = 0
514 _buffer_pos = 0
515 end
516
517 # Peeks up to `n` bytes in the buffer
518 #
519 # The operation does not consume the buffer
520 #
521 # ~~~nitish
522 # var x = new FileReader.open("File.txt")
523 # assert x.peek(5) == x.read(5)
524 # ~~~
525 fun peek(i: Int): Bytes do
526 if eof then return new Bytes.empty
527 var remsp = _buffer_length - _buffer_pos
528 if i <= remsp then
529 var bf = new Bytes.with_capacity(i)
530 bf.append_ns_from(_buffer, i, _buffer_pos)
531 return bf
532 end
533 var bf = new Bytes.with_capacity(i)
534 bf.append_ns_from(_buffer, remsp, _buffer_pos)
535 _buffer_pos = _buffer_length
536 read_intern(i - bf.length, bf)
537 remsp = _buffer_length - _buffer_pos
538 var full_len = bf.length + remsp
539 if full_len > _buffer_capacity then
540 var c = _buffer_capacity
541 while c < full_len do c = c * 2 + 2
542 _buffer_capacity = c
543 end
544 var nns = new CString(_buffer_capacity)
545 bf.items.copy_to(nns, bf.length, 0, 0)
546 _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
547 _buffer = nns
548 _buffer_pos = 0
549 _buffer_length = full_len
550 return bf
551 end
552
553 redef fun read_bytes(i)
554 do
555 if last_error != null then return new Bytes.empty
556 var buf = new Bytes.with_capacity(i)
557 read_intern(i, buf)
558 return buf
559 end
560
561 # Fills `buf` with at most `i` bytes read from `self`
562 private fun read_intern(i: Int, buf: Bytes): Int do
563 if eof then return 0
564 var p = _buffer_pos
565 var bufsp = _buffer_length - p
566 if bufsp >= i then
567 _buffer_pos += i
568 buf.append_ns_from(_buffer, i, p)
569 return i
570 end
571 _buffer_pos = _buffer_length
572 var readln = _buffer_length - p
573 buf.append_ns_from(_buffer, readln, p)
574 var rd = read_intern(i - readln, buf)
575 return rd + readln
576 end
577
578 redef fun read_all_bytes
579 do
580 if last_error != null then return new Bytes.empty
581 var s = new Bytes.with_capacity(10)
582 var b = _buffer
583 while not eof do
584 var j = _buffer_pos
585 var k = _buffer_length
586 var rd_sz = k - j
587 s.append_ns_from(b, rd_sz, j)
588 _buffer_pos = k
589 fill_buffer
590 end
591 return s
592 end
593
594 redef fun append_line_to(s)
595 do
596 var lb = new Bytes.with_capacity(10)
597 loop
598 # First phase: look for a '\n'
599 var i = _buffer_pos
600 while i < _buffer_length and _buffer[i] != 0xAu8 do
601 i += 1
602 end
603
604 var eol
605 if i < _buffer_length then
606 assert _buffer[i] == 0xAu8
607 i += 1
608 eol = true
609 else
610 eol = false
611 end
612
613 # if there is something to append
614 if i > _buffer_pos then
615 # Copy from the buffer to the string
616 var j = _buffer_pos
617 while j < i do
618 lb.add(_buffer[j])
619 j += 1
620 end
621 _buffer_pos = i
622 else
623 assert end_reached
624 s.append lb.to_s
625 return
626 end
627
628 if eol then
629 # so \n is found
630 s.append lb.to_s
631 return
632 else
633 # so \n is not found
634 if end_reached then
635 s.append lb.to_s
636 return
637 end
638 fill_buffer
639 end
640 end
641 end
642
643 redef fun eof
644 do
645 if _buffer_pos < _buffer_length then return false
646 if end_reached then return true
647 fill_buffer
648 return _buffer_pos >= _buffer_length and end_reached
649 end
650
651 # The buffer
652 private var buffer: CString = new CString(0)
653
654 # The current position in the buffer
655 private var buffer_pos = 0
656
657 # Length of the current buffer (i.e. nuber of bytes in the buffer)
658 private var buffer_length = 0
659
660 # Capacity of the buffer
661 private var buffer_capacity = 0
662
663 # Fill the buffer
664 protected fun fill_buffer is abstract
665
666 # Has the last fill_buffer reached the end
667 protected fun end_reached: Bool is abstract
668
669 # Allocate a `_buffer` for a given `capacity`.
670 protected fun prepare_buffer(capacity: Int)
671 do
672 _buffer = new CString(capacity)
673 _buffer_pos = 0 # need to read
674 _buffer_length = 0
675 _buffer_capacity = capacity
676 end
677 end
678
679 # A `Stream` that can be written to and read from
680 abstract class Duplex
681 super Reader
682 super Writer
683 end
684
685 # Write to `bytes` in memory
686 #
687 # ~~~
688 # var writer = new BytesWriter
689 #
690 # writer.write "Strings "
691 # writer.write_char '&'
692 # writer.write_byte 0x20u8
693 # writer.write_bytes "bytes".to_bytes
694 #
695 # assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73"
696 # assert writer.bytes.to_s == "Strings & bytes"
697 # ~~~
698 #
699 # As with any binary data, UTF-8 code points encoded on two bytes or more
700 # can be constructed byte by byte.
701 #
702 # ~~~
703 # writer = new BytesWriter
704 #
705 # # Write just the character first half
706 # writer.write_byte 0xC2u8
707 # assert writer.to_s == "\\xC2"
708 # assert writer.bytes.to_s == "�"
709 #
710 # # Complete the character
711 # writer.write_byte 0xA2u8
712 # assert writer.to_s == "\\xC2\\xA2"
713 # assert writer.bytes.to_s == "¢"
714 # ~~~
715 class BytesWriter
716 super Writer
717
718 # Written memory
719 var bytes = new Bytes.empty
720
721 redef fun to_s do return bytes.chexdigest
722
723 redef fun write(str)
724 do
725 if closed then return
726 str.append_to_bytes bytes
727 end
728
729 redef fun write_char(c)
730 do
731 if closed then return
732 bytes.add_char c
733 end
734
735 redef fun write_byte(value)
736 do
737 if closed then return
738 bytes.add value
739 end
740
741 redef fun write_bytes(b)
742 do
743 if closed then return
744 bytes.append b
745 end
746
747 # Is the stream closed?
748 protected var closed = false
749
750 redef fun close do closed = true
751 redef fun is_writable do return not closed
752 end
753
754 # `Stream` writing to a `String`
755 #
756 # This class has the same behavior as `BytesWriter`
757 # except for `to_s` which decodes `bytes` to a string.
758 #
759 # ~~~
760 # var writer = new StringWriter
761 #
762 # writer.write "Strings "
763 # writer.write_char '&'
764 # writer.write_byte 0x20u8
765 # writer.write_bytes "bytes".to_bytes
766 #
767 # assert writer.to_s == "Strings & bytes"
768 # ~~~
769 class StringWriter
770 super BytesWriter
771
772 redef fun to_s do return bytes.to_s
773 end
774
775 # Read from `bytes` in memory
776 #
777 # ~~~
778 # var reader = new BytesReader(b"a…b")
779 # assert reader.read_char == 'a'
780 # assert reader.read_byte == 0xE2 # 1st byte of '…'
781 # assert reader.read_byte == 0x80 # 2nd byte of '…'
782 # assert reader.read_char == '�' # Reads the last byte as an invalid char
783 # assert reader.read_all_bytes == b"b"
784 # ~~~
785 class BytesReader
786 super Reader
787
788 # Source data to read
789 var bytes: Bytes
790
791 # The current position in `bytes`
792 private var cursor = 0
793
794 redef fun read_char
795 do
796 if cursor >= bytes.length then return null
797
798 var len = bytes.items.length_of_char_at(cursor)
799 var char = bytes.items.char_at(cursor)
800 cursor += len
801 return char
802 end
803
804 redef fun read_byte
805 do
806 if cursor >= bytes.length then return -1
807
808 var c = bytes[cursor]
809 cursor += 1
810 return c.to_i
811 end
812
813 redef fun close do bytes = new Bytes.empty
814
815 redef fun read_all_bytes
816 do
817 var res = bytes.slice_from(cursor)
818 cursor = bytes.length
819 return res
820 end
821
822 redef fun eof do return cursor >= bytes.length
823 end
824
825 # `Stream` reading from a `String` source
826 #
827 # This class has the same behavior as `BytesReader`
828 # except for its constructor accepting a `String`.
829 #
830 # ~~~
831 # var reader = new StringReader("a…b")
832 # assert reader.read_char == 'a'
833 # assert reader.read_byte == 0xE2 # 1st byte of '…'
834 # assert reader.read_byte == 0x80 # 2nd byte of '…'
835 # assert reader.read_char == '�' # Reads the last byte as an invalid char
836 # assert reader.read_all == "b"
837 # ~~~
838 class StringReader
839 super BytesReader
840
841 autoinit source
842
843 # Source data to read
844 var source: String
845
846 init do bytes = source.to_bytes
847
848 redef fun close
849 do
850 source = ""
851 super
852 end
853 end