lib/core/stream: add start/finish for Stream
[nit.git] / lib / core / stream.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Input and output streams of characters
12 module stream
13
14 intrude import text::ropes
15 import error
16 intrude import bytes
17 import codecs
18
19 in "C" `{
20 #include <unistd.h>
21 #include <string.h>
22 #include <signal.h>
23 `}
24
25 # Any kind of error that could be produced by an operation on Streams
26 class IOError
27 super Error
28 end
29
30 # Any kind of stream to read/write/both to or from a source
31 abstract class Stream
32 # Error produced by the file stream
33 #
34 # var ifs = new FileReader.open("donotmakethisfile.binx")
35 # ifs.read_all
36 # ifs.close
37 # assert ifs.last_error != null
38 var last_error: nullable IOError = null
39
40 # close the stream
41 fun close is abstract
42
43 # Pre-work hook.
44 #
45 # Used to inform `self` that operations will start.
46 # Specific streams can use this to prepare some resources.
47 #
48 # Is automatically invoked at the beginning of `with` structures.
49 #
50 # Do nothing by default.
51 fun start do end
52
53 # Post-work hook.
54 #
55 # Used to inform `self` that the operations are over.
56 # Specific streams can use this to free some resources.
57 #
58 # Is automatically invoked at the end of `woth` structures.
59 #
60 # call `close` by default.
61 fun finish do close
62 end
63
64 # A `Stream` that can be read from
65 abstract class Reader
66 super Stream
67
68 # Decoder used to transform input bytes to UTF-8
69 var decoder: Decoder = utf8_decoder is writable
70
71 # Reads a character. Returns `null` on EOF or timeout
72 fun read_char: nullable Char is abstract
73
74 # Reads a byte. Returns `null` on EOF or timeout
75 fun read_byte: nullable Byte is abstract
76
77 # Reads a String of at most `i` length
78 fun read(i: Int): String do return read_bytes(i).to_s
79
80 # Read at most i bytes
81 fun read_bytes(i: Int): Bytes
82 do
83 if last_error != null then return new Bytes.empty
84 var s = new NativeString(i)
85 var buf = new Bytes(s, 0, 0)
86 while i > 0 and not eof do
87 var c = read_byte
88 if c != null then
89 buf.add c
90 i -= 1
91 end
92 end
93 return buf
94 end
95
96 # Read a string until the end of the line.
97 #
98 # The line terminator '\n' and '\r\n', if any, is removed in each line.
99 #
100 # ~~~
101 # var txt = "Hello\n\nWorld\n"
102 # var i = new StringReader(txt)
103 # assert i.read_line == "Hello"
104 # assert i.read_line == ""
105 # assert i.read_line == "World"
106 # assert i.eof
107 # ~~~
108 #
109 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
110 # the end or file (EOF) is considered to delimit the end of lines.
111 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
112 #
113 # ~~~
114 # var txt2 = "Hello\r\n\n\rWorld"
115 # var i2 = new StringReader(txt2)
116 # assert i2.read_line == "Hello"
117 # assert i2.read_line == ""
118 # assert i2.read_line == "\rWorld"
119 # assert i2.eof
120 # ~~~
121 #
122 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
123 fun read_line: String
124 do
125 if last_error != null then return ""
126 if eof then return ""
127 var s = new FlatBuffer
128 append_line_to(s)
129 return s.to_s.chomp
130 end
131
132 # Read all the lines until the eof.
133 #
134 # The line terminator '\n' and `\r\n` is removed in each line,
135 #
136 # ~~~
137 # var txt = "Hello\n\nWorld\n"
138 # var i = new StringReader(txt)
139 # assert i.read_lines == ["Hello", "", "World"]
140 # ~~~
141 #
142 # This method is more efficient that splitting
143 # the result of `read_all`.
144 #
145 # NOTE: SEE `read_line` for details.
146 fun read_lines: Array[String]
147 do
148 var res = new Array[String]
149 while not eof do
150 res.add read_line
151 end
152 return res
153 end
154
155 # Return an iterator that read each line.
156 #
157 # The line terminator '\n' and `\r\n` is removed in each line,
158 # The line are read with `read_line`. See this method for details.
159 #
160 # ~~~
161 # var txt = "Hello\n\nWorld\n"
162 # var i = new StringReader(txt)
163 # assert i.each_line.to_a == ["Hello", "", "World"]
164 # ~~~
165 #
166 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
167 # Therefore, the stream should no be closed until the end of the stream.
168 #
169 # ~~~
170 # i = new StringReader(txt)
171 # var el = i.each_line
172 #
173 # assert el.item == "Hello"
174 # el.next
175 # assert el.item == ""
176 # el.next
177 #
178 # i.close
179 #
180 # assert not el.is_ok
181 # # closed before "world" is read
182 # ~~~
183 fun each_line: LineIterator do return new LineIterator(self)
184
185 # Read all the stream until the eof.
186 #
187 # The content of the file is returned as a String.
188 #
189 # ~~~
190 # var txt = "Hello\n\nWorld\n"
191 # var i = new StringReader(txt)
192 # assert i.read_all == txt
193 # ~~~
194 fun read_all: String do
195 var s = read_all_bytes
196 var slen = s.length
197 if slen == 0 then return ""
198 var rets = ""
199 var pos = 0
200 var str = s.items.clean_utf8(slen)
201 slen = str.bytelen
202 var sits = str.items
203 var remsp = slen
204 while pos < slen do
205 # The 129 size was decided more or less arbitrarily
206 # It will require some more benchmarking to compute
207 # if this is the best size or not
208 var chunksz = 129
209 if chunksz > remsp then
210 rets += new FlatString.with_infos(sits, remsp, pos, pos + remsp - 1)
211 break
212 end
213 var st = sits.find_beginning_of_char_at(pos + chunksz - 1)
214 var bytelen = st - pos
215 rets += new FlatString.with_infos(sits, bytelen, pos, st - 1)
216 pos = st
217 remsp -= bytelen
218 end
219 if rets isa Concat then return rets.balance
220 return rets
221 end
222
223 # Read all the stream until the eof.
224 #
225 # The content of the file is returned verbatim.
226 fun read_all_bytes: Bytes
227 do
228 if last_error != null then return new Bytes.empty
229 var s = new Bytes.empty
230 while not eof do
231 var c = read_byte
232 if c != null then s.add(c)
233 end
234 return s
235 end
236
237 # Read a string until the end of the line and append it to `s`.
238 #
239 # Unlike `read_line` and other related methods,
240 # the line terminator '\n', if any, is preserved in each line.
241 # Use the method `Text::chomp` to safely remove it.
242 #
243 # ~~~
244 # var txt = "Hello\n\nWorld\n"
245 # var i = new StringReader(txt)
246 # var b = new FlatBuffer
247 # i.append_line_to(b)
248 # assert b == "Hello\n"
249 # i.append_line_to(b)
250 # assert b == "Hello\n\n"
251 # i.append_line_to(b)
252 # assert b == txt
253 # assert i.eof
254 # ~~~
255 #
256 # If `\n` is not present at the end of the result, it means that
257 # a non-eol terminated last line was returned.
258 #
259 # ~~~
260 # var i2 = new StringReader("hello")
261 # assert not i2.eof
262 # var b2 = new FlatBuffer
263 # i2.append_line_to(b2)
264 # assert b2 == "hello"
265 # assert i2.eof
266 # ~~~
267 #
268 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
269 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
270 fun append_line_to(s: Buffer)
271 do
272 if last_error != null then return
273 loop
274 var x = read_char
275 if x == null then
276 if eof then return
277 else
278 s.chars.push(x)
279 if x == '\n' then return
280 end
281 end
282 end
283
284 # Is there something to read.
285 # This function returns 'false' if there is something to read.
286 fun eof: Bool is abstract
287
288 # Read the next sequence of non whitespace characters.
289 #
290 # Leading whitespace characters are skipped.
291 # The first whitespace character that follows the result is consumed.
292 #
293 # An empty string is returned if the end of the file or an error is encounter.
294 #
295 # ~~~
296 # var w = new StringReader(" Hello, \n\t World!")
297 # assert w.read_word == "Hello,"
298 # assert w.read_char == '\n'
299 # assert w.read_word == "World!"
300 # assert w.read_word == ""
301 # ~~~
302 #
303 # `Char::is_whitespace` determines what is a whitespace.
304 fun read_word: String
305 do
306 var buf = new FlatBuffer
307 var c = read_nonwhitespace
308 if c != null then
309 buf.add(c)
310 while not eof do
311 c = read_char
312 if c == null then break
313 if c.is_whitespace then break
314 buf.add(c)
315 end
316 end
317 var res = buf.to_s
318 return res
319 end
320
321 # Skip whitespace characters (if any) then return the following non-whitespace character.
322 #
323 # Returns the code point of the character.
324 # Returns `null` on end of file or error.
325 #
326 # In fact, this method works like `read_char` except it skips whitespace.
327 #
328 # ~~~
329 # var w = new StringReader(" \nab\tc")
330 # assert w.read_nonwhitespace == 'a'
331 # assert w.read_nonwhitespace == 'b'
332 # assert w.read_nonwhitespace == 'c'
333 # assert w.read_nonwhitespace == null
334 # ~~~
335 #
336 # `Char::is_whitespace` determines what is a whitespace.
337 fun read_nonwhitespace: nullable Char
338 do
339 var c: nullable Char = null
340 while not eof do
341 c = read_char
342 if c == null or not c.is_whitespace then break
343 end
344 return c
345 end
346 end
347
348 # Iterator returned by `Reader::each_line`.
349 # See the aforementioned method for details.
350 class LineIterator
351 super Iterator[String]
352
353 # The original stream
354 var stream: Reader
355
356 redef fun is_ok
357 do
358 var res = not stream.eof
359 if not res and close_on_finish then stream.close
360 return res
361 end
362
363 redef fun item
364 do
365 var line = self.line
366 if line == null then
367 line = stream.read_line
368 end
369 self.line = line
370 return line
371 end
372
373 # The last line read (cache)
374 private var line: nullable String = null
375
376 redef fun next
377 do
378 # force the read
379 if line == null then item
380 # drop the line
381 line = null
382 end
383
384 # Close the stream when the stream is at the EOF.
385 #
386 # Default is false.
387 var close_on_finish = false is writable
388
389 redef fun finish
390 do
391 if close_on_finish then stream.close
392 end
393 end
394
395 # `Reader` capable of declaring if readable without blocking
396 abstract class PollableReader
397 super Reader
398
399 # Is there something to read? (without blocking)
400 fun poll_in: Bool is abstract
401
402 end
403
404 # A `Stream` that can be written to
405 abstract class Writer
406 super Stream
407
408 # The coder from a nit UTF-8 String to the output file
409 var coder: Coder = utf8_coder is writable
410
411 # Writes bytes from `s`
412 fun write_bytes(s: Bytes) is abstract
413
414 # write a string
415 fun write(s: Text) is abstract
416
417 # Write a single byte
418 fun write_byte(value: Byte) is abstract
419
420 # Can the stream be used to write
421 fun is_writable: Bool is abstract
422 end
423
424 # Things that can be efficienlty written to a `Writer`
425 #
426 # The point of this interface is to allow the instance to be efficiently
427 # written into a `Writer`.
428 #
429 # Ready-to-save documents usually provide this interface.
430 interface Writable
431 # Write itself to a `stream`
432 # The specific logic it let to the concrete subclasses
433 fun write_to(stream: Writer) is abstract
434
435 # Like `write_to` but return a new String (may be quite large)
436 #
437 # This funtionality is anectodical, since the point
438 # of streamable object to to be efficienlty written to a
439 # stream without having to allocate and concatenate strings
440 fun write_to_string: String
441 do
442 var stream = new StringWriter
443 write_to(stream)
444 return stream.to_s
445 end
446 end
447
448 redef class Text
449 super Writable
450 redef fun write_to(stream) do stream.write(self)
451 end
452
453 # Input streams with a buffered input for efficiency purposes
454 abstract class BufferedReader
455 super Reader
456 redef fun read_char
457 do
458 if last_error != null then return null
459 if eof then
460 last_error = new IOError("Stream has reached eof")
461 return null
462 end
463 # TODO: Fix when supporting UTF-8
464 var c = _buffer[_buffer_pos].to_i.code_point
465 _buffer_pos += 1
466 return c
467 end
468
469 redef fun read_byte
470 do
471 if last_error != null then return null
472 if eof then
473 last_error = new IOError("Stream has reached eof")
474 return null
475 end
476 var c = _buffer[_buffer_pos]
477 _buffer_pos += 1
478 return c
479 end
480
481 # Resets the internal buffer
482 fun buffer_reset do
483 _buffer_length = 0
484 _buffer_pos = 0
485 end
486
487 # Peeks up to `n` bytes in the buffer
488 #
489 # The operation does not consume the buffer
490 #
491 # ~~~nitish
492 # var x = new FileReader.open("File.txt")
493 # assert x.peek(5) == x.read(5)
494 # ~~~
495 fun peek(i: Int): Bytes do
496 if eof then return new Bytes.empty
497 var remsp = _buffer_length - _buffer_pos
498 if i <= remsp then
499 var bf = new Bytes.with_capacity(i)
500 bf.append_ns_from(_buffer, i, _buffer_pos)
501 return bf
502 end
503 var bf = new Bytes.with_capacity(i)
504 bf.append_ns_from(_buffer, remsp, _buffer_pos)
505 _buffer_pos = _buffer_length
506 read_intern(i - bf.length, bf)
507 remsp = _buffer_length - _buffer_pos
508 var full_len = bf.length + remsp
509 if full_len > _buffer_capacity then
510 var c = _buffer_capacity
511 while c < full_len do c = c * 2 + 2
512 _buffer_capacity = c
513 end
514 var nns = new NativeString(_buffer_capacity)
515 bf.items.copy_to(nns, bf.length, 0, 0)
516 _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
517 _buffer = nns
518 _buffer_pos = 0
519 _buffer_length = full_len
520 return bf
521 end
522
523 redef fun read_bytes(i)
524 do
525 if last_error != null then return new Bytes.empty
526 var buf = new Bytes.with_capacity(i)
527 read_intern(i, buf)
528 return buf
529 end
530
531 # Fills `buf` with at most `i` bytes read from `self`
532 private fun read_intern(i: Int, buf: Bytes): Int do
533 if eof then return 0
534 var p = _buffer_pos
535 var bufsp = _buffer_length - p
536 if bufsp >= i then
537 _buffer_pos += i
538 buf.append_ns_from(_buffer, i, p)
539 return i
540 end
541 _buffer_pos = _buffer_length
542 var readln = _buffer_length - p
543 buf.append_ns_from(_buffer, readln, p)
544 var rd = read_intern(i - readln, buf)
545 return rd + readln
546 end
547
548 redef fun read_all_bytes
549 do
550 if last_error != null then return new Bytes.empty
551 var s = new Bytes.with_capacity(10)
552 var b = _buffer
553 while not eof do
554 var j = _buffer_pos
555 var k = _buffer_length
556 var rd_sz = k - j
557 s.append_ns_from(b, rd_sz, j)
558 _buffer_pos = k
559 fill_buffer
560 end
561 return s
562 end
563
564 redef fun append_line_to(s)
565 do
566 var lb = new Bytes.with_capacity(10)
567 loop
568 # First phase: look for a '\n'
569 var i = _buffer_pos
570 while i < _buffer_length and _buffer[i] != 0xAu8 do
571 i += 1
572 end
573
574 var eol
575 if i < _buffer_length then
576 assert _buffer[i] == 0xAu8
577 i += 1
578 eol = true
579 else
580 eol = false
581 end
582
583 # if there is something to append
584 if i > _buffer_pos then
585 # Copy from the buffer to the string
586 var j = _buffer_pos
587 while j < i do
588 lb.add(_buffer[j])
589 j += 1
590 end
591 _buffer_pos = i
592 else
593 assert end_reached
594 s.append lb.to_s
595 return
596 end
597
598 if eol then
599 # so \n is found
600 s.append lb.to_s
601 return
602 else
603 # so \n is not found
604 if end_reached then
605 s.append lb.to_s
606 return
607 end
608 fill_buffer
609 end
610 end
611 end
612
613 redef fun eof
614 do
615 if _buffer_pos < _buffer_length then return false
616 if end_reached then return true
617 fill_buffer
618 return _buffer_pos >= _buffer_length and end_reached
619 end
620
621 # The buffer
622 private var buffer: NativeString = new NativeString(0)
623
624 # The current position in the buffer
625 private var buffer_pos = 0
626
627 # Length of the current buffer (i.e. nuber of bytes in the buffer)
628 private var buffer_length = 0
629
630 # Capacity of the buffer
631 private var buffer_capacity = 0
632
633 # Fill the buffer
634 protected fun fill_buffer is abstract
635
636 # Has the last fill_buffer reached the end
637 protected fun end_reached: Bool is abstract
638
639 # Allocate a `_buffer` for a given `capacity`.
640 protected fun prepare_buffer(capacity: Int)
641 do
642 _buffer = new NativeString(capacity)
643 _buffer_pos = 0 # need to read
644 _buffer_length = 0
645 _buffer_capacity = capacity
646 end
647 end
648
649 # A `Stream` that can be written to and read from
650 abstract class Duplex
651 super Reader
652 super Writer
653 end
654
655 # `Stream` that can be used to write to a `String`
656 #
657 # Mainly used for compatibility with Writer type and tests.
658 class StringWriter
659 super Writer
660
661 private var content = new Array[String]
662 redef fun to_s do return content.plain_to_s
663 redef fun is_writable do return not closed
664
665 redef fun write_bytes(b) do
666 content.add(b.to_s)
667 end
668
669 redef fun write(str)
670 do
671 assert not closed
672 content.add(str.to_s)
673 end
674
675 # Is the stream closed?
676 protected var closed = false
677
678 redef fun close do closed = true
679 end
680
681 # `Stream` used to read from a `String`
682 #
683 # Mainly used for compatibility with Reader type and tests.
684 class StringReader
685 super Reader
686
687 # The string to read from.
688 var source: String
689
690 # The current position in the string (bytewise).
691 private var cursor: Int = 0
692
693 redef fun read_char do
694 if cursor < source.length then
695 # Fix when supporting UTF-8
696 var c = source[cursor]
697 cursor += 1
698 return c
699 else
700 return null
701 end
702 end
703
704 redef fun read_byte do
705 if cursor < source.length then
706 var c = source.bytes[cursor]
707 cursor += 1
708 return c
709 else
710 return null
711 end
712 end
713
714 redef fun close do
715 source = ""
716 end
717
718 redef fun read_all_bytes do
719 var nslen = source.length - cursor
720 var nns = new NativeString(nslen)
721 source.copy_to_native(nns, nslen, cursor, 0)
722 return new Bytes(nns, nslen, nslen)
723 end
724
725 redef fun eof do return cursor >= source.bytelen
726 end