Merge: Make stream and iterators withable
[nit.git] / lib / core / stream.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Input and output streams of characters
12 module stream
13
14 intrude import text::ropes
15 import error
16 intrude import bytes
17 import codecs
18
19 in "C" `{
20 #include <unistd.h>
21 #include <string.h>
22 #include <signal.h>
23 `}
24
25 # Any kind of error that could be produced by an operation on Streams
26 class IOError
27 super Error
28 end
29
30 # Any kind of stream to read/write/both to or from a source
31 abstract class Stream
32 # Error produced by the file stream
33 #
34 # var ifs = new FileReader.open("donotmakethisfile.binx")
35 # ifs.read_all
36 # ifs.close
37 # assert ifs.last_error != null
38 var last_error: nullable IOError = null
39
40 # close the stream
41 fun close is abstract
42
43 # Pre-work hook.
44 #
45 # Used to inform `self` that operations will start.
46 # Specific streams can use this to prepare some resources.
47 #
48 # Is automatically invoked at the beginning of `with` structures.
49 #
50 # Do nothing by default.
51 fun start do end
52
53 # Post-work hook.
54 #
55 # Used to inform `self` that the operations are over.
56 # Specific streams can use this to free some resources.
57 #
58 # Is automatically invoked at the end of `woth` structures.
59 #
60 # call `close` by default.
61 fun finish do close
62 end
63
64 # A `Stream` that can be read from
65 abstract class Reader
66 super Stream
67
68 # Decoder used to transform input bytes to UTF-8
69 var decoder: Decoder = utf8_decoder is writable
70
71 # Reads a character. Returns `null` on EOF or timeout
72 fun read_char: nullable Char is abstract
73
74 # Reads a byte. Returns `null` on EOF or timeout
75 fun read_byte: nullable Byte is abstract
76
77 # Reads a String of at most `i` length
78 fun read(i: Int): String do return read_bytes(i).to_s
79
80 # Read at most i bytes
81 fun read_bytes(i: Int): Bytes
82 do
83 if last_error != null then return new Bytes.empty
84 var s = new NativeString(i)
85 var buf = new Bytes(s, 0, 0)
86 while i > 0 and not eof do
87 var c = read_byte
88 if c != null then
89 buf.add c
90 i -= 1
91 end
92 end
93 return buf
94 end
95
96 # Read a string until the end of the line.
97 #
98 # The line terminator '\n' and '\r\n', if any, is removed in each line.
99 #
100 # ~~~
101 # var txt = "Hello\n\nWorld\n"
102 # var i = new StringReader(txt)
103 # assert i.read_line == "Hello"
104 # assert i.read_line == ""
105 # assert i.read_line == "World"
106 # assert i.eof
107 # ~~~
108 #
109 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
110 # the end or file (EOF) is considered to delimit the end of lines.
111 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
112 #
113 # ~~~
114 # var txt2 = "Hello\r\n\n\rWorld"
115 # var i2 = new StringReader(txt2)
116 # assert i2.read_line == "Hello"
117 # assert i2.read_line == ""
118 # assert i2.read_line == "\rWorld"
119 # assert i2.eof
120 # ~~~
121 #
122 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
123 fun read_line: String
124 do
125 if last_error != null then return ""
126 if eof then return ""
127 var s = new FlatBuffer
128 append_line_to(s)
129 return s.to_s.chomp
130 end
131
132 # Read all the lines until the eof.
133 #
134 # The line terminator '\n' and `\r\n` is removed in each line,
135 #
136 # ~~~
137 # var txt = "Hello\n\nWorld\n"
138 # var i = new StringReader(txt)
139 # assert i.read_lines == ["Hello", "", "World"]
140 # ~~~
141 #
142 # This method is more efficient that splitting
143 # the result of `read_all`.
144 #
145 # NOTE: SEE `read_line` for details.
146 fun read_lines: Array[String]
147 do
148 var res = new Array[String]
149 while not eof do
150 res.add read_line
151 end
152 return res
153 end
154
155 # Return an iterator that read each line.
156 #
157 # The line terminator '\n' and `\r\n` is removed in each line,
158 # The line are read with `read_line`. See this method for details.
159 #
160 # ~~~
161 # var txt = "Hello\n\nWorld\n"
162 # var i = new StringReader(txt)
163 # assert i.each_line.to_a == ["Hello", "", "World"]
164 # ~~~
165 #
166 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
167 # Therefore, the stream should no be closed until the end of the stream.
168 #
169 # ~~~
170 # i = new StringReader(txt)
171 # var el = i.each_line
172 #
173 # assert el.item == "Hello"
174 # el.next
175 # assert el.item == ""
176 # el.next
177 #
178 # i.close
179 #
180 # assert not el.is_ok
181 # # closed before "world" is read
182 # ~~~
183 fun each_line: LineIterator do return new LineIterator(self)
184
185 # Read all the stream until the eof.
186 #
187 # The content of the file is returned as a String.
188 #
189 # ~~~
190 # var txt = "Hello\n\nWorld\n"
191 # var i = new StringReader(txt)
192 # assert i.read_all == txt
193 # ~~~
194 fun read_all: String do
195 var s = read_all_bytes
196 var slen = s.length
197 if slen == 0 then return ""
198 var rets = ""
199 var pos = 0
200 var str = s.items.clean_utf8(slen)
201 slen = str.bytelen
202 var sits = str.items
203 var remsp = slen
204 while pos < slen do
205 # The 129 size was decided more or less arbitrarily
206 # It will require some more benchmarking to compute
207 # if this is the best size or not
208 var chunksz = 129
209 if chunksz > remsp then
210 rets += new FlatString.with_infos(sits, remsp, pos, pos + remsp - 1)
211 break
212 end
213 var st = sits.find_beginning_of_char_at(pos + chunksz - 1)
214 var bytelen = st - pos
215 rets += new FlatString.with_infos(sits, bytelen, pos, st - 1)
216 pos = st
217 remsp -= bytelen
218 end
219 if rets isa Concat then return rets.balance
220 return rets
221 end
222
223 # Read all the stream until the eof.
224 #
225 # The content of the file is returned verbatim.
226 fun read_all_bytes: Bytes
227 do
228 if last_error != null then return new Bytes.empty
229 var s = new Bytes.empty
230 while not eof do
231 var c = read_byte
232 if c != null then s.add(c)
233 end
234 return s
235 end
236
237 # Read a string until the end of the line and append it to `s`.
238 #
239 # Unlike `read_line` and other related methods,
240 # the line terminator '\n', if any, is preserved in each line.
241 # Use the method `Text::chomp` to safely remove it.
242 #
243 # ~~~
244 # var txt = "Hello\n\nWorld\n"
245 # var i = new StringReader(txt)
246 # var b = new FlatBuffer
247 # i.append_line_to(b)
248 # assert b == "Hello\n"
249 # i.append_line_to(b)
250 # assert b == "Hello\n\n"
251 # i.append_line_to(b)
252 # assert b == txt
253 # assert i.eof
254 # ~~~
255 #
256 # If `\n` is not present at the end of the result, it means that
257 # a non-eol terminated last line was returned.
258 #
259 # ~~~
260 # var i2 = new StringReader("hello")
261 # assert not i2.eof
262 # var b2 = new FlatBuffer
263 # i2.append_line_to(b2)
264 # assert b2 == "hello"
265 # assert i2.eof
266 # ~~~
267 #
268 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
269 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
270 fun append_line_to(s: Buffer)
271 do
272 if last_error != null then return
273 loop
274 var x = read_char
275 if x == null then
276 if eof then return
277 else
278 s.chars.push(x)
279 if x == '\n' then return
280 end
281 end
282 end
283
284 # Is there something to read.
285 # This function returns 'false' if there is something to read.
286 fun eof: Bool is abstract
287
288 # Read the next sequence of non whitespace characters.
289 #
290 # Leading whitespace characters are skipped.
291 # The first whitespace character that follows the result is consumed.
292 #
293 # An empty string is returned if the end of the file or an error is encounter.
294 #
295 # ~~~
296 # var w = new StringReader(" Hello, \n\t World!")
297 # assert w.read_word == "Hello,"
298 # assert w.read_char == '\n'
299 # assert w.read_word == "World!"
300 # assert w.read_word == ""
301 # ~~~
302 #
303 # `Char::is_whitespace` determines what is a whitespace.
304 fun read_word: String
305 do
306 var buf = new FlatBuffer
307 var c = read_nonwhitespace
308 if c != null then
309 buf.add(c)
310 while not eof do
311 c = read_char
312 if c == null then break
313 if c.is_whitespace then break
314 buf.add(c)
315 end
316 end
317 var res = buf.to_s
318 return res
319 end
320
321 # Skip whitespace characters (if any) then return the following non-whitespace character.
322 #
323 # Returns the code point of the character.
324 # Returns `null` on end of file or error.
325 #
326 # In fact, this method works like `read_char` except it skips whitespace.
327 #
328 # ~~~
329 # var w = new StringReader(" \nab\tc")
330 # assert w.read_nonwhitespace == 'a'
331 # assert w.read_nonwhitespace == 'b'
332 # assert w.read_nonwhitespace == 'c'
333 # assert w.read_nonwhitespace == null
334 # ~~~
335 #
336 # `Char::is_whitespace` determines what is a whitespace.
337 fun read_nonwhitespace: nullable Char
338 do
339 var c: nullable Char = null
340 while not eof do
341 c = read_char
342 if c == null or not c.is_whitespace then break
343 end
344 return c
345 end
346 end
347
348 # Iterator returned by `Reader::each_line`.
349 # See the aforementioned method for details.
350 class LineIterator
351 super Iterator[String]
352
353 # The original stream
354 var stream: Reader
355
356 redef fun is_ok
357 do
358 var res = not stream.eof
359 if not res and close_on_finish then stream.close
360 return res
361 end
362
363 redef fun item
364 do
365 var line = self.line
366 if line == null then
367 line = stream.read_line
368 end
369 self.line = line
370 return line
371 end
372
373 # The last line read (cache)
374 private var line: nullable String = null
375
376 redef fun next
377 do
378 # force the read
379 if line == null then item
380 # drop the line
381 line = null
382 end
383
384 # Close the stream when the stream is at the EOF.
385 #
386 # Default is false.
387 var close_on_finish = false is writable
388
389 redef fun finish
390 do
391 if close_on_finish then stream.close
392 end
393 end
394
395 # `Reader` capable of declaring if readable without blocking
396 abstract class PollableReader
397 super Reader
398
399 # Is there something to read? (without blocking)
400 fun poll_in: Bool is abstract
401
402 end
403
404 # A `Stream` that can be written to
405 abstract class Writer
406 super Stream
407
408 # The coder from a nit UTF-8 String to the output file
409 var coder: Coder = utf8_coder is writable
410
411 # Writes bytes from `s`
412 fun write_bytes(s: Bytes) is abstract
413
414 # write a string
415 fun write(s: Text) is abstract
416
417 # Write a single byte
418 fun write_byte(value: Byte) is abstract
419
420 # Can the stream be used to write
421 fun is_writable: Bool is abstract
422 end
423
424 # Things that can be efficienlty written to a `Writer`
425 #
426 # The point of this interface is to allow the instance to be efficiently
427 # written into a `Writer`.
428 #
429 # Ready-to-save documents usually provide this interface.
430 interface Writable
431 # Write itself to a `stream`
432 # The specific logic it let to the concrete subclasses
433 fun write_to(stream: Writer) is abstract
434
435 # Like `write_to` but return a new String (may be quite large)
436 #
437 # This funtionality is anectodical, since the point
438 # of streamable object to to be efficienlty written to a
439 # stream without having to allocate and concatenate strings
440 fun write_to_string: String
441 do
442 var stream = new StringWriter
443 write_to(stream)
444 return stream.to_s
445 end
446 end
447
448 redef class Bytes
449 super Writable
450 redef fun write_to(s) do s.write_bytes(self)
451
452 redef fun write_to_string do return to_s
453 end
454
455 redef class Text
456 super Writable
457 redef fun write_to(stream) do stream.write(self)
458 end
459
460 # Input streams with a buffered input for efficiency purposes
461 abstract class BufferedReader
462 super Reader
463 redef fun read_char
464 do
465 if last_error != null then return null
466 if eof then
467 last_error = new IOError("Stream has reached eof")
468 return null
469 end
470 # TODO: Fix when supporting UTF-8
471 var c = _buffer[_buffer_pos].to_i.code_point
472 _buffer_pos += 1
473 return c
474 end
475
476 redef fun read_byte
477 do
478 if last_error != null then return null
479 if eof then
480 last_error = new IOError("Stream has reached eof")
481 return null
482 end
483 var c = _buffer[_buffer_pos]
484 _buffer_pos += 1
485 return c
486 end
487
488 # Resets the internal buffer
489 fun buffer_reset do
490 _buffer_length = 0
491 _buffer_pos = 0
492 end
493
494 # Peeks up to `n` bytes in the buffer
495 #
496 # The operation does not consume the buffer
497 #
498 # ~~~nitish
499 # var x = new FileReader.open("File.txt")
500 # assert x.peek(5) == x.read(5)
501 # ~~~
502 fun peek(i: Int): Bytes do
503 if eof then return new Bytes.empty
504 var remsp = _buffer_length - _buffer_pos
505 if i <= remsp then
506 var bf = new Bytes.with_capacity(i)
507 bf.append_ns_from(_buffer, i, _buffer_pos)
508 return bf
509 end
510 var bf = new Bytes.with_capacity(i)
511 bf.append_ns_from(_buffer, remsp, _buffer_pos)
512 _buffer_pos = _buffer_length
513 read_intern(i - bf.length, bf)
514 remsp = _buffer_length - _buffer_pos
515 var full_len = bf.length + remsp
516 if full_len > _buffer_capacity then
517 var c = _buffer_capacity
518 while c < full_len do c = c * 2 + 2
519 _buffer_capacity = c
520 end
521 var nns = new NativeString(_buffer_capacity)
522 bf.items.copy_to(nns, bf.length, 0, 0)
523 _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
524 _buffer = nns
525 _buffer_pos = 0
526 _buffer_length = full_len
527 return bf
528 end
529
530 redef fun read_bytes(i)
531 do
532 if last_error != null then return new Bytes.empty
533 var buf = new Bytes.with_capacity(i)
534 read_intern(i, buf)
535 return buf
536 end
537
538 # Fills `buf` with at most `i` bytes read from `self`
539 private fun read_intern(i: Int, buf: Bytes): Int do
540 if eof then return 0
541 var p = _buffer_pos
542 var bufsp = _buffer_length - p
543 if bufsp >= i then
544 _buffer_pos += i
545 buf.append_ns_from(_buffer, i, p)
546 return i
547 end
548 _buffer_pos = _buffer_length
549 var readln = _buffer_length - p
550 buf.append_ns_from(_buffer, readln, p)
551 var rd = read_intern(i - readln, buf)
552 return rd + readln
553 end
554
555 redef fun read_all_bytes
556 do
557 if last_error != null then return new Bytes.empty
558 var s = new Bytes.with_capacity(10)
559 var b = _buffer
560 while not eof do
561 var j = _buffer_pos
562 var k = _buffer_length
563 var rd_sz = k - j
564 s.append_ns_from(b, rd_sz, j)
565 _buffer_pos = k
566 fill_buffer
567 end
568 return s
569 end
570
571 redef fun append_line_to(s)
572 do
573 var lb = new Bytes.with_capacity(10)
574 loop
575 # First phase: look for a '\n'
576 var i = _buffer_pos
577 while i < _buffer_length and _buffer[i] != 0xAu8 do
578 i += 1
579 end
580
581 var eol
582 if i < _buffer_length then
583 assert _buffer[i] == 0xAu8
584 i += 1
585 eol = true
586 else
587 eol = false
588 end
589
590 # if there is something to append
591 if i > _buffer_pos then
592 # Copy from the buffer to the string
593 var j = _buffer_pos
594 while j < i do
595 lb.add(_buffer[j])
596 j += 1
597 end
598 _buffer_pos = i
599 else
600 assert end_reached
601 s.append lb.to_s
602 return
603 end
604
605 if eol then
606 # so \n is found
607 s.append lb.to_s
608 return
609 else
610 # so \n is not found
611 if end_reached then
612 s.append lb.to_s
613 return
614 end
615 fill_buffer
616 end
617 end
618 end
619
620 redef fun eof
621 do
622 if _buffer_pos < _buffer_length then return false
623 if end_reached then return true
624 fill_buffer
625 return _buffer_pos >= _buffer_length and end_reached
626 end
627
628 # The buffer
629 private var buffer: NativeString = new NativeString(0)
630
631 # The current position in the buffer
632 private var buffer_pos = 0
633
634 # Length of the current buffer (i.e. nuber of bytes in the buffer)
635 private var buffer_length = 0
636
637 # Capacity of the buffer
638 private var buffer_capacity = 0
639
640 # Fill the buffer
641 protected fun fill_buffer is abstract
642
643 # Has the last fill_buffer reached the end
644 protected fun end_reached: Bool is abstract
645
646 # Allocate a `_buffer` for a given `capacity`.
647 protected fun prepare_buffer(capacity: Int)
648 do
649 _buffer = new NativeString(capacity)
650 _buffer_pos = 0 # need to read
651 _buffer_length = 0
652 _buffer_capacity = capacity
653 end
654 end
655
656 # A `Stream` that can be written to and read from
657 abstract class Duplex
658 super Reader
659 super Writer
660 end
661
662 # `Stream` that can be used to write to a `String`
663 #
664 # Mainly used for compatibility with Writer type and tests.
665 class StringWriter
666 super Writer
667
668 private var content = new Array[String]
669 redef fun to_s do return content.plain_to_s
670 redef fun is_writable do return not closed
671
672 redef fun write_bytes(b) do
673 content.add(b.to_s)
674 end
675
676 redef fun write(str)
677 do
678 assert not closed
679 content.add(str.to_s)
680 end
681
682 # Is the stream closed?
683 protected var closed = false
684
685 redef fun close do closed = true
686 end
687
688 # `Stream` used to read from a `String`
689 #
690 # Mainly used for compatibility with Reader type and tests.
691 class StringReader
692 super Reader
693
694 # The string to read from.
695 var source: String
696
697 # The current position in the string (bytewise).
698 private var cursor: Int = 0
699
700 redef fun read_char do
701 if cursor < source.length then
702 # Fix when supporting UTF-8
703 var c = source[cursor]
704 cursor += 1
705 return c
706 else
707 return null
708 end
709 end
710
711 redef fun read_byte do
712 if cursor < source.length then
713 var c = source.bytes[cursor]
714 cursor += 1
715 return c
716 else
717 return null
718 end
719 end
720
721 redef fun close do
722 source = ""
723 end
724
725 redef fun read_all_bytes do
726 var nslen = source.length - cursor
727 var nns = new NativeString(nslen)
728 source.copy_to_native(nns, nslen, cursor, 0)
729 return new Bytes(nns, nslen, nslen)
730 end
731
732 redef fun eof do return cursor >= source.bytelen
733 end