lib/core: remove ropes intrude import in stream
[nit.git] / lib / core / stream.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Input and output streams of characters
12 module stream
13
14 import error
15 intrude import bytes
16 import codecs
17
18 in "C" `{
19 #include <unistd.h>
20 #include <string.h>
21 #include <signal.h>
22 `}
23
24 # Any kind of error that could be produced by an operation on Streams
25 class IOError
26 super Error
27 end
28
29 # Any kind of stream to read/write/both to or from a source
30 abstract class Stream
31 # Codec used to transform raw data to text
32 #
33 # Note: defaults to UTF-8
34 var codec: Codec = utf8_codec is protected writable(set_codec)
35
36 # Lookahead buffer for codecs
37 #
38 # Since some codecs are multibyte, a lookahead may be required
39 # to store the next bytes and consume them only if a valid character
40 # is read.
41 protected var lookahead: CString is noinit
42
43 # Capacity of the lookahead
44 protected var lookahead_capacity = 0
45
46 # Current occupation of the lookahead
47 protected var lookahead_length = 0
48
49 # Buffer for writing data to a stream
50 protected var write_buffer: CString is noinit
51
52 init do
53 var lcap = codec.max_lookahead
54 lookahead = new CString(lcap)
55 write_buffer = new CString(lcap)
56 lookahead_length = 0
57 lookahead_capacity = lcap
58 end
59
60 # Change the codec for this stream.
61 fun codec=(c: Codec) do
62 if c.max_lookahead > lookahead_capacity then
63 var lcap = codec.max_lookahead
64 var lk = new CString(lcap)
65 var llen = lookahead_length
66 if llen > 0 then
67 lookahead.copy_to(lk, llen, 0, 0)
68 end
69 lookahead = lk
70 lookahead_capacity = lcap
71 write_buffer = new CString(lcap)
72 end
73 set_codec(c)
74 end
75
76 # Error produced by the file stream
77 #
78 # var ifs = new FileReader.open("donotmakethisfile.binx")
79 # ifs.read_all
80 # ifs.close
81 # assert ifs.last_error != null
82 var last_error: nullable IOError = null
83
84 # close the stream
85 fun close is abstract
86
87 # Pre-work hook.
88 #
89 # Used to inform `self` that operations will start.
90 # Specific streams can use this to prepare some resources.
91 #
92 # Is automatically invoked at the beginning of `with` structures.
93 #
94 # Do nothing by default.
95 fun start do end
96
97 # Post-work hook.
98 #
99 # Used to inform `self` that the operations are over.
100 # Specific streams can use this to free some resources.
101 #
102 # Is automatically invoked at the end of `with` structures.
103 #
104 # call `close` by default.
105 fun finish do close
106 end
107
108 # A `Stream` that can be read from
109 abstract class Reader
110 super Stream
111
112 # Reads a character. Returns `null` on EOF or timeout
113 fun read_char: nullable Char is abstract
114
115 # Reads a byte. Returns `null` on EOF or timeout
116 fun read_byte: nullable Byte is abstract
117
118 # Reads a String of at most `i` length
119 fun read(i: Int): String do return read_bytes(i).to_s
120
121 # Read at most i bytes
122 fun read_bytes(i: Int): Bytes
123 do
124 if last_error != null then return new Bytes.empty
125 var s = new CString(i)
126 var buf = new Bytes(s, 0, 0)
127 while i > 0 and not eof do
128 var c = read_byte
129 if c != null then
130 buf.add c
131 i -= 1
132 end
133 end
134 return buf
135 end
136
137 # Read a string until the end of the line.
138 #
139 # The line terminator '\n' and '\r\n', if any, is removed in each line.
140 #
141 # ~~~
142 # var txt = "Hello\n\nWorld\n"
143 # var i = new StringReader(txt)
144 # assert i.read_line == "Hello"
145 # assert i.read_line == ""
146 # assert i.read_line == "World"
147 # assert i.eof
148 # ~~~
149 #
150 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
151 # the end or file (EOF) is considered to delimit the end of lines.
152 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
153 #
154 # ~~~
155 # var txt2 = "Hello\r\n\n\rWorld"
156 # var i2 = new StringReader(txt2)
157 # assert i2.read_line == "Hello"
158 # assert i2.read_line == ""
159 # assert i2.read_line == "\rWorld"
160 # assert i2.eof
161 # ~~~
162 #
163 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
164 fun read_line: String
165 do
166 if last_error != null then return ""
167 if eof then return ""
168 var s = new FlatBuffer
169 append_line_to(s)
170 return s.to_s.chomp
171 end
172
173 # Read all the lines until the eof.
174 #
175 # The line terminator '\n' and `\r\n` is removed in each line,
176 #
177 # ~~~
178 # var txt = "Hello\n\nWorld\n"
179 # var i = new StringReader(txt)
180 # assert i.read_lines == ["Hello", "", "World"]
181 # ~~~
182 #
183 # This method is more efficient that splitting
184 # the result of `read_all`.
185 #
186 # NOTE: SEE `read_line` for details.
187 fun read_lines: Array[String]
188 do
189 var res = new Array[String]
190 while not eof do
191 res.add read_line
192 end
193 return res
194 end
195
196 # Return an iterator that read each line.
197 #
198 # The line terminator '\n' and `\r\n` is removed in each line,
199 # The line are read with `read_line`. See this method for details.
200 #
201 # ~~~
202 # var txt = "Hello\n\nWorld\n"
203 # var i = new StringReader(txt)
204 # assert i.each_line.to_a == ["Hello", "", "World"]
205 # ~~~
206 #
207 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
208 # Therefore, the stream should no be closed until the end of the stream.
209 #
210 # ~~~
211 # i = new StringReader(txt)
212 # var el = i.each_line
213 #
214 # assert el.item == "Hello"
215 # el.next
216 # assert el.item == ""
217 # el.next
218 #
219 # i.close
220 #
221 # assert not el.is_ok
222 # # closed before "world" is read
223 # ~~~
224 fun each_line: LineIterator do return new LineIterator(self)
225
226 # Read all the stream until the eof.
227 #
228 # The content of the file is returned as a String.
229 #
230 # ~~~
231 # var txt = "Hello\n\nWorld\n"
232 # var i = new StringReader(txt)
233 # assert i.read_all == txt
234 # ~~~
235 fun read_all: String do
236 var s = read_all_bytes
237 var slen = s.length
238 if slen == 0 then return ""
239 return codec.decode_string(s.items, s.length)
240 end
241
242 # Read all the stream until the eof.
243 #
244 # The content of the file is returned verbatim.
245 fun read_all_bytes: Bytes
246 do
247 if last_error != null then return new Bytes.empty
248 var s = new Bytes.empty
249 while not eof do
250 var c = read_byte
251 if c != null then s.add(c)
252 end
253 return s
254 end
255
256 # Read a string until the end of the line and append it to `s`.
257 #
258 # Unlike `read_line` and other related methods,
259 # the line terminator '\n', if any, is preserved in each line.
260 # Use the method `Text::chomp` to safely remove it.
261 #
262 # ~~~
263 # var txt = "Hello\n\nWorld\n"
264 # var i = new StringReader(txt)
265 # var b = new FlatBuffer
266 # i.append_line_to(b)
267 # assert b == "Hello\n"
268 # i.append_line_to(b)
269 # assert b == "Hello\n\n"
270 # i.append_line_to(b)
271 # assert b == txt
272 # assert i.eof
273 # ~~~
274 #
275 # If `\n` is not present at the end of the result, it means that
276 # a non-eol terminated last line was returned.
277 #
278 # ~~~
279 # var i2 = new StringReader("hello")
280 # assert not i2.eof
281 # var b2 = new FlatBuffer
282 # i2.append_line_to(b2)
283 # assert b2 == "hello"
284 # assert i2.eof
285 # ~~~
286 #
287 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
288 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
289 fun append_line_to(s: Buffer)
290 do
291 if last_error != null then return
292 loop
293 var x = read_char
294 if x == null then
295 if eof then return
296 else
297 s.chars.push(x)
298 if x == '\n' then return
299 end
300 end
301 end
302
303 # Is there something to read.
304 # This function returns 'false' if there is something to read.
305 fun eof: Bool is abstract
306
307 # Read the next sequence of non whitespace characters.
308 #
309 # Leading whitespace characters are skipped.
310 # The first whitespace character that follows the result is consumed.
311 #
312 # An empty string is returned if the end of the file or an error is encounter.
313 #
314 # ~~~
315 # var w = new StringReader(" Hello, \n\t World!")
316 # assert w.read_word == "Hello,"
317 # assert w.read_char == '\n'
318 # assert w.read_word == "World!"
319 # assert w.read_word == ""
320 # ~~~
321 #
322 # `Char::is_whitespace` determines what is a whitespace.
323 fun read_word: String
324 do
325 var buf = new FlatBuffer
326 var c = read_nonwhitespace
327 if c != null then
328 buf.add(c)
329 while not eof do
330 c = read_char
331 if c == null then break
332 if c.is_whitespace then break
333 buf.add(c)
334 end
335 end
336 var res = buf.to_s
337 return res
338 end
339
340 # Skip whitespace characters (if any) then return the following non-whitespace character.
341 #
342 # Returns the code point of the character.
343 # Returns `null` on end of file or error.
344 #
345 # In fact, this method works like `read_char` except it skips whitespace.
346 #
347 # ~~~
348 # var w = new StringReader(" \nab\tc")
349 # assert w.read_nonwhitespace == 'a'
350 # assert w.read_nonwhitespace == 'b'
351 # assert w.read_nonwhitespace == 'c'
352 # assert w.read_nonwhitespace == null
353 # ~~~
354 #
355 # `Char::is_whitespace` determines what is a whitespace.
356 fun read_nonwhitespace: nullable Char
357 do
358 var c: nullable Char = null
359 while not eof do
360 c = read_char
361 if c == null or not c.is_whitespace then break
362 end
363 return c
364 end
365 end
366
367 # Iterator returned by `Reader::each_line`.
368 # See the aforementioned method for details.
369 class LineIterator
370 super Iterator[String]
371
372 # The original stream
373 var stream: Reader
374
375 redef fun is_ok
376 do
377 var res = not stream.eof
378 if not res and close_on_finish then stream.close
379 return res
380 end
381
382 redef fun item
383 do
384 var line = self.line
385 if line == null then
386 line = stream.read_line
387 end
388 self.line = line
389 return line
390 end
391
392 # The last line read (cache)
393 private var line: nullable String = null
394
395 redef fun next
396 do
397 # force the read
398 if line == null then item
399 # drop the line
400 line = null
401 end
402
403 # Close the stream when the stream is at the EOF.
404 #
405 # Default is false.
406 var close_on_finish = false is writable
407
408 redef fun finish
409 do
410 if close_on_finish then stream.close
411 end
412 end
413
414 # `Reader` capable of declaring if readable without blocking
415 abstract class PollableReader
416 super Reader
417
418 # Is there something to read? (without blocking)
419 fun poll_in: Bool is abstract
420
421 end
422
423 # A `Stream` that can be written to
424 abstract class Writer
425 super Stream
426
427 # Writes bytes from `s`
428 fun write_bytes(s: Bytes) is abstract
429
430 # write a string
431 fun write(s: Text) is abstract
432
433 # Write a single byte
434 fun write_byte(value: Byte) is abstract
435
436 # Writes a single char
437 fun write_char(c: Char) do write(c.to_s)
438
439 # Can the stream be used to write
440 fun is_writable: Bool is abstract
441 end
442
443 # Things that can be efficienlty written to a `Writer`
444 #
445 # The point of this interface is to allow the instance to be efficiently
446 # written into a `Writer`.
447 #
448 # Ready-to-save documents usually provide this interface.
449 interface Writable
450 # Write itself to a `stream`
451 # The specific logic it let to the concrete subclasses
452 fun write_to(stream: Writer) is abstract
453
454 # Like `write_to` but return a new String (may be quite large)
455 #
456 # This funtionality is anectodical, since the point
457 # of streamable object to to be efficienlty written to a
458 # stream without having to allocate and concatenate strings
459 fun write_to_string: String
460 do
461 var stream = new StringWriter
462 write_to(stream)
463 return stream.to_s
464 end
465 end
466
467 redef class Bytes
468 super Writable
469 redef fun write_to(s) do s.write_bytes(self)
470
471 redef fun write_to_string do return to_s
472 end
473
474 redef class Text
475 super Writable
476 redef fun write_to(stream) do stream.write(self)
477 end
478
479 # Input streams with a buffered input for efficiency purposes
480 abstract class BufferedReader
481 super Reader
482 redef fun read_char
483 do
484 if last_error != null then return null
485 if eof then
486 last_error = new IOError("Stream has reached eof")
487 return null
488 end
489 # TODO: Fix when supporting UTF-8
490 var c = _buffer[_buffer_pos].to_i.code_point
491 _buffer_pos += 1
492 return c
493 end
494
495 redef fun read_byte
496 do
497 if last_error != null then return null
498 if eof then
499 last_error = new IOError("Stream has reached eof")
500 return null
501 end
502 var c = _buffer[_buffer_pos]
503 _buffer_pos += 1
504 return c
505 end
506
507 # Resets the internal buffer
508 fun buffer_reset do
509 _buffer_length = 0
510 _buffer_pos = 0
511 end
512
513 # Peeks up to `n` bytes in the buffer
514 #
515 # The operation does not consume the buffer
516 #
517 # ~~~nitish
518 # var x = new FileReader.open("File.txt")
519 # assert x.peek(5) == x.read(5)
520 # ~~~
521 fun peek(i: Int): Bytes do
522 if eof then return new Bytes.empty
523 var remsp = _buffer_length - _buffer_pos
524 if i <= remsp then
525 var bf = new Bytes.with_capacity(i)
526 bf.append_ns_from(_buffer, i, _buffer_pos)
527 return bf
528 end
529 var bf = new Bytes.with_capacity(i)
530 bf.append_ns_from(_buffer, remsp, _buffer_pos)
531 _buffer_pos = _buffer_length
532 read_intern(i - bf.length, bf)
533 remsp = _buffer_length - _buffer_pos
534 var full_len = bf.length + remsp
535 if full_len > _buffer_capacity then
536 var c = _buffer_capacity
537 while c < full_len do c = c * 2 + 2
538 _buffer_capacity = c
539 end
540 var nns = new CString(_buffer_capacity)
541 bf.items.copy_to(nns, bf.length, 0, 0)
542 _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
543 _buffer = nns
544 _buffer_pos = 0
545 _buffer_length = full_len
546 return bf
547 end
548
549 redef fun read_bytes(i)
550 do
551 if last_error != null then return new Bytes.empty
552 var buf = new Bytes.with_capacity(i)
553 read_intern(i, buf)
554 return buf
555 end
556
557 # Fills `buf` with at most `i` bytes read from `self`
558 private fun read_intern(i: Int, buf: Bytes): Int do
559 if eof then return 0
560 var p = _buffer_pos
561 var bufsp = _buffer_length - p
562 if bufsp >= i then
563 _buffer_pos += i
564 buf.append_ns_from(_buffer, i, p)
565 return i
566 end
567 _buffer_pos = _buffer_length
568 var readln = _buffer_length - p
569 buf.append_ns_from(_buffer, readln, p)
570 var rd = read_intern(i - readln, buf)
571 return rd + readln
572 end
573
574 redef fun read_all_bytes
575 do
576 if last_error != null then return new Bytes.empty
577 var s = new Bytes.with_capacity(10)
578 var b = _buffer
579 while not eof do
580 var j = _buffer_pos
581 var k = _buffer_length
582 var rd_sz = k - j
583 s.append_ns_from(b, rd_sz, j)
584 _buffer_pos = k
585 fill_buffer
586 end
587 return s
588 end
589
590 redef fun append_line_to(s)
591 do
592 var lb = new Bytes.with_capacity(10)
593 loop
594 # First phase: look for a '\n'
595 var i = _buffer_pos
596 while i < _buffer_length and _buffer[i] != 0xAu8 do
597 i += 1
598 end
599
600 var eol
601 if i < _buffer_length then
602 assert _buffer[i] == 0xAu8
603 i += 1
604 eol = true
605 else
606 eol = false
607 end
608
609 # if there is something to append
610 if i > _buffer_pos then
611 # Copy from the buffer to the string
612 var j = _buffer_pos
613 while j < i do
614 lb.add(_buffer[j])
615 j += 1
616 end
617 _buffer_pos = i
618 else
619 assert end_reached
620 s.append lb.to_s
621 return
622 end
623
624 if eol then
625 # so \n is found
626 s.append lb.to_s
627 return
628 else
629 # so \n is not found
630 if end_reached then
631 s.append lb.to_s
632 return
633 end
634 fill_buffer
635 end
636 end
637 end
638
639 redef fun eof
640 do
641 if _buffer_pos < _buffer_length then return false
642 if end_reached then return true
643 fill_buffer
644 return _buffer_pos >= _buffer_length and end_reached
645 end
646
647 # The buffer
648 private var buffer: CString = new CString(0)
649
650 # The current position in the buffer
651 private var buffer_pos = 0
652
653 # Length of the current buffer (i.e. nuber of bytes in the buffer)
654 private var buffer_length = 0
655
656 # Capacity of the buffer
657 private var buffer_capacity = 0
658
659 # Fill the buffer
660 protected fun fill_buffer is abstract
661
662 # Has the last fill_buffer reached the end
663 protected fun end_reached: Bool is abstract
664
665 # Allocate a `_buffer` for a given `capacity`.
666 protected fun prepare_buffer(capacity: Int)
667 do
668 _buffer = new CString(capacity)
669 _buffer_pos = 0 # need to read
670 _buffer_length = 0
671 _buffer_capacity = capacity
672 end
673 end
674
675 # A `Stream` that can be written to and read from
676 abstract class Duplex
677 super Reader
678 super Writer
679 end
680
681 # Write to `bytes` in memory
682 #
683 # ~~~
684 # var writer = new BytesWriter
685 #
686 # writer.write "Strings "
687 # writer.write_char '&'
688 # writer.write_byte 0x20u8
689 # writer.write_bytes "bytes".to_bytes
690 #
691 # assert writer.to_s == "\\x53\\x74\\x72\\x69\\x6E\\x67\\x73\\x20\\x26\\x20\\x62\\x79\\x74\\x65\\x73"
692 # assert writer.bytes.to_s == "Strings & bytes"
693 # ~~~
694 #
695 # As with any binary data, UTF-8 code points encoded on two bytes or more
696 # can be constructed byte by byte.
697 #
698 # ~~~
699 # writer = new BytesWriter
700 #
701 # # Write just the character first half
702 # writer.write_byte 0xC2u8
703 # assert writer.to_s == "\\xC2"
704 # assert writer.bytes.to_s == "�"
705 #
706 # # Complete the character
707 # writer.write_byte 0xA2u8
708 # assert writer.to_s == "\\xC2\\xA2"
709 # assert writer.bytes.to_s == "¢"
710 # ~~~
711 class BytesWriter
712 super Writer
713
714 # Written memory
715 var bytes = new Bytes.empty
716
717 redef fun to_s do return bytes.chexdigest
718
719 redef fun write(str)
720 do
721 if closed then return
722 str.append_to_bytes bytes
723 end
724
725 redef fun write_char(c)
726 do
727 if closed then return
728 bytes.add_char c
729 end
730
731 redef fun write_byte(value)
732 do
733 if closed then return
734 bytes.add value
735 end
736
737 redef fun write_bytes(b)
738 do
739 if closed then return
740 bytes.append b
741 end
742
743 # Is the stream closed?
744 protected var closed = false
745
746 redef fun close do closed = true
747 redef fun is_writable do return not closed
748 end
749
750 # `Stream` writing to a `String`
751 #
752 # This class has the same behavior as `BytesWriter`
753 # except for `to_s` which decodes `bytes` to a string.
754 #
755 # ~~~
756 # var writer = new StringWriter
757 #
758 # writer.write "Strings "
759 # writer.write_char '&'
760 # writer.write_byte 0x20u8
761 # writer.write_bytes "bytes".to_bytes
762 #
763 # assert writer.to_s == "Strings & bytes"
764 # ~~~
765 class StringWriter
766 super BytesWriter
767
768 redef fun to_s do return bytes.to_s
769 end
770
771 # Read from `bytes` in memory
772 #
773 # ~~~
774 # var reader = new BytesReader(b"a…b")
775 # assert reader.read_char == 'a'
776 # assert reader.read_byte == 0xE2u8 # 1st byte of '…'
777 # assert reader.read_byte == 0x80u8 # 2nd byte of '…'
778 # assert reader.read_char == '�' # Reads the last byte as an invalid char
779 # assert reader.read_all_bytes == b"b"
780 # ~~~
781 class BytesReader
782 super Reader
783
784 # Source data to read
785 var bytes: Bytes
786
787 # The current position in `bytes`
788 private var cursor = 0
789
790 redef fun read_char
791 do
792 if cursor >= bytes.length then return null
793
794 var len = bytes.items.length_of_char_at(cursor)
795 var char = bytes.items.char_at(cursor)
796 cursor += len
797 return char
798 end
799
800 redef fun read_byte
801 do
802 if cursor >= bytes.length then return null
803
804 var c = bytes[cursor]
805 cursor += 1
806 return c
807 end
808
809 redef fun close do bytes = new Bytes.empty
810
811 redef fun read_all_bytes
812 do
813 var res = bytes.slice_from(cursor)
814 cursor = bytes.length
815 return res
816 end
817
818 redef fun eof do return cursor >= bytes.length
819 end
820
821 # `Stream` reading from a `String` source
822 #
823 # This class has the same behavior as `BytesReader`
824 # except for its constructor accepting a `String`.
825 #
826 # ~~~
827 # var reader = new StringReader("a…b")
828 # assert reader.read_char == 'a'
829 # assert reader.read_byte == 0xE2u8 # 1st byte of '…'
830 # assert reader.read_byte == 0x80u8 # 2nd byte of '…'
831 # assert reader.read_char == '�' # Reads the last byte as an invalid char
832 # assert reader.read_all == "b"
833 # ~~~
834 class StringReader
835 super BytesReader
836
837 autoinit source
838
839 # Source data to read
840 var source: String
841
842 init do bytes = source.to_bytes
843
844 redef fun close
845 do
846 source = ""
847 super
848 end
849 end