lib: rename `standard` as `core`
[nit.git] / lib / core / stream.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Input and output streams of characters
12 module stream
13
14 intrude import text::ropes
15 import error
16 intrude import bytes
17 import codecs
18
19 in "C" `{
20 #include <unistd.h>
21 #include <string.h>
22 #include <signal.h>
23 `}
24
25 # Any kind of error that could be produced by an operation on Streams
26 class IOError
27 super Error
28 end
29
30 # Any kind of stream to read/write/both to or from a source
31 abstract class Stream
32 # Error produced by the file stream
33 #
34 # var ifs = new FileReader.open("donotmakethisfile.binx")
35 # ifs.read_all
36 # ifs.close
37 # assert ifs.last_error != null
38 var last_error: nullable IOError = null
39
40 # close the stream
41 fun close is abstract
42 end
43
44 # A `Stream` that can be read from
45 abstract class Reader
46 super Stream
47
48 # Decoder used to transform input bytes to UTF-8
49 var decoder: Decoder = utf8_decoder is writable
50
51 # Reads a character. Returns `null` on EOF or timeout
52 fun read_char: nullable Char is abstract
53
54 # Reads a byte. Returns `null` on EOF or timeout
55 fun read_byte: nullable Byte is abstract
56
57 # Reads a String of at most `i` length
58 fun read(i: Int): String do return read_bytes(i).to_s
59
60 # Read at most i bytes
61 fun read_bytes(i: Int): Bytes
62 do
63 if last_error != null then return new Bytes.empty
64 var s = new NativeString(i)
65 var buf = new Bytes(s, 0, 0)
66 while i > 0 and not eof do
67 var c = read_byte
68 if c != null then
69 buf.add c
70 i -= 1
71 end
72 end
73 return buf
74 end
75
76 # Read a string until the end of the line.
77 #
78 # The line terminator '\n' and '\r\n', if any, is removed in each line.
79 #
80 # ~~~
81 # var txt = "Hello\n\nWorld\n"
82 # var i = new StringReader(txt)
83 # assert i.read_line == "Hello"
84 # assert i.read_line == ""
85 # assert i.read_line == "World"
86 # assert i.eof
87 # ~~~
88 #
89 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
90 # the end or file (EOF) is considered to delimit the end of lines.
91 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
92 #
93 # ~~~
94 # var txt2 = "Hello\r\n\n\rWorld"
95 # var i2 = new StringReader(txt2)
96 # assert i2.read_line == "Hello"
97 # assert i2.read_line == ""
98 # assert i2.read_line == "\rWorld"
99 # assert i2.eof
100 # ~~~
101 #
102 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
103 fun read_line: String
104 do
105 if last_error != null then return ""
106 if eof then return ""
107 var s = new FlatBuffer
108 append_line_to(s)
109 return s.to_s.chomp
110 end
111
112 # Read all the lines until the eof.
113 #
114 # The line terminator '\n' and `\r\n` is removed in each line,
115 #
116 # ~~~
117 # var txt = "Hello\n\nWorld\n"
118 # var i = new StringReader(txt)
119 # assert i.read_lines == ["Hello", "", "World"]
120 # ~~~
121 #
122 # This method is more efficient that splitting
123 # the result of `read_all`.
124 #
125 # NOTE: SEE `read_line` for details.
126 fun read_lines: Array[String]
127 do
128 var res = new Array[String]
129 while not eof do
130 res.add read_line
131 end
132 return res
133 end
134
135 # Return an iterator that read each line.
136 #
137 # The line terminator '\n' and `\r\n` is removed in each line,
138 # The line are read with `read_line`. See this method for details.
139 #
140 # ~~~
141 # var txt = "Hello\n\nWorld\n"
142 # var i = new StringReader(txt)
143 # assert i.each_line.to_a == ["Hello", "", "World"]
144 # ~~~
145 #
146 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
147 # Therefore, the stream should no be closed until the end of the stream.
148 #
149 # ~~~
150 # i = new StringReader(txt)
151 # var el = i.each_line
152 #
153 # assert el.item == "Hello"
154 # el.next
155 # assert el.item == ""
156 # el.next
157 #
158 # i.close
159 #
160 # assert not el.is_ok
161 # # closed before "world" is read
162 # ~~~
163 fun each_line: LineIterator do return new LineIterator(self)
164
165 # Read all the stream until the eof.
166 #
167 # The content of the file is returned as a String.
168 #
169 # ~~~
170 # var txt = "Hello\n\nWorld\n"
171 # var i = new StringReader(txt)
172 # assert i.read_all == txt
173 # ~~~
174 fun read_all: String do
175 var s = read_all_bytes
176 if not s.is_utf8 then s = s.clean_utf8
177 var slen = s.length
178 if slen == 0 then return ""
179 var rets = ""
180 var pos = 0
181 var sits = s.items
182 var remsp = slen
183 while pos < slen do
184 # The 129 size was decided more or less arbitrarily
185 # It will require some more benchmarking to compute
186 # if this is the best size or not
187 var chunksz = 129
188 if chunksz > remsp then
189 rets += new FlatString.with_infos(sits, remsp, pos, pos + remsp - 1)
190 break
191 end
192 var st = sits.find_beginning_of_char_at(pos + chunksz - 1)
193 var bytelen = st - pos
194 rets += new FlatString.with_infos(sits, bytelen, pos, st - 1)
195 pos = st
196 remsp -= bytelen
197 end
198 if rets isa Concat then return rets.balance
199 return rets
200 end
201
202 # Read all the stream until the eof.
203 #
204 # The content of the file is returned verbatim.
205 fun read_all_bytes: Bytes
206 do
207 if last_error != null then return new Bytes.empty
208 var s = new Bytes.empty
209 while not eof do
210 var c = read_byte
211 if c != null then s.add(c)
212 end
213 return s
214 end
215
216 # Read a string until the end of the line and append it to `s`.
217 #
218 # Unlike `read_line` and other related methods,
219 # the line terminator '\n', if any, is preserved in each line.
220 # Use the method `Text::chomp` to safely remove it.
221 #
222 # ~~~
223 # var txt = "Hello\n\nWorld\n"
224 # var i = new StringReader(txt)
225 # var b = new FlatBuffer
226 # i.append_line_to(b)
227 # assert b == "Hello\n"
228 # i.append_line_to(b)
229 # assert b == "Hello\n\n"
230 # i.append_line_to(b)
231 # assert b == txt
232 # assert i.eof
233 # ~~~
234 #
235 # If `\n` is not present at the end of the result, it means that
236 # a non-eol terminated last line was returned.
237 #
238 # ~~~
239 # var i2 = new StringReader("hello")
240 # assert not i2.eof
241 # var b2 = new FlatBuffer
242 # i2.append_line_to(b2)
243 # assert b2 == "hello"
244 # assert i2.eof
245 # ~~~
246 #
247 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
248 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
249 fun append_line_to(s: Buffer)
250 do
251 if last_error != null then return
252 loop
253 var x = read_char
254 if x == null then
255 if eof then return
256 else
257 s.chars.push(x)
258 if x == '\n' then return
259 end
260 end
261 end
262
263 # Is there something to read.
264 # This function returns 'false' if there is something to read.
265 fun eof: Bool is abstract
266
267 # Read the next sequence of non whitespace characters.
268 #
269 # Leading whitespace characters are skipped.
270 # The first whitespace character that follows the result is consumed.
271 #
272 # An empty string is returned if the end of the file or an error is encounter.
273 #
274 # ~~~
275 # var w = new StringReader(" Hello, \n\t World!")
276 # assert w.read_word == "Hello,"
277 # assert w.read_char == '\n'.ascii
278 # assert w.read_word == "World!"
279 # assert w.read_word == ""
280 # ~~~
281 #
282 # `Char::is_whitespace` determines what is a whitespace.
283 fun read_word: String
284 do
285 var buf = new FlatBuffer
286 var c = read_nonwhitespace
287 if c != null then
288 buf.add(c)
289 while not eof do
290 c = read_char
291 if c == null then break
292 if c.is_whitespace then break
293 buf.add(c)
294 end
295 end
296 var res = buf.to_s
297 return res
298 end
299
300 # Skip whitespace characters (if any) then return the following non-whitespace character.
301 #
302 # Returns the code point of the character.
303 # Returns `null` on end of file or error.
304 #
305 # In fact, this method works like `read_char` except it skips whitespace.
306 #
307 # ~~~
308 # var w = new StringReader(" \nab\tc")
309 # assert w.read_nonwhitespace == 'a'
310 # assert w.read_nonwhitespace == 'b'
311 # assert w.read_nonwhitespace == 'c'
312 # assert w.read_nonwhitespace == null
313 # ~~~
314 #
315 # `Char::is_whitespace` determines what is a whitespace.
316 fun read_nonwhitespace: nullable Char
317 do
318 var c: nullable Char = null
319 while not eof do
320 c = read_char
321 if c == null or not c.is_whitespace then break
322 end
323 return c
324 end
325 end
326
327 # Iterator returned by `Reader::each_line`.
328 # See the aforementioned method for details.
329 class LineIterator
330 super Iterator[String]
331
332 # The original stream
333 var stream: Reader
334
335 redef fun is_ok
336 do
337 var res = not stream.eof
338 if not res and close_on_finish then stream.close
339 return res
340 end
341
342 redef fun item
343 do
344 var line = self.line
345 if line == null then
346 line = stream.read_line
347 end
348 self.line = line
349 return line
350 end
351
352 # The last line read (cache)
353 private var line: nullable String = null
354
355 redef fun next
356 do
357 # force the read
358 if line == null then item
359 # drop the line
360 line = null
361 end
362
363 # Close the stream when the stream is at the EOF.
364 #
365 # Default is false.
366 var close_on_finish = false is writable
367
368 redef fun finish
369 do
370 if close_on_finish then stream.close
371 end
372 end
373
374 # `Reader` capable of declaring if readable without blocking
375 abstract class PollableReader
376 super Reader
377
378 # Is there something to read? (without blocking)
379 fun poll_in: Bool is abstract
380
381 end
382
383 # A `Stream` that can be written to
384 abstract class Writer
385 super Stream
386
387 # The coder from a nit UTF-8 String to the output file
388 var coder: Coder = utf8_coder is writable
389
390 # Writes bytes from `s`
391 fun write_bytes(s: Bytes) is abstract
392
393 # write a string
394 fun write(s: Text) is abstract
395
396 # Write a single byte
397 fun write_byte(value: Byte) is abstract
398
399 # Can the stream be used to write
400 fun is_writable: Bool is abstract
401 end
402
403 # Things that can be efficienlty written to a `Writer`
404 #
405 # The point of this interface is to allow the instance to be efficiently
406 # written into a `Writer`.
407 #
408 # Ready-to-save documents usually provide this interface.
409 interface Writable
410 # Write itself to a `stream`
411 # The specific logic it let to the concrete subclasses
412 fun write_to(stream: Writer) is abstract
413
414 # Like `write_to` but return a new String (may be quite large)
415 #
416 # This funtionality is anectodical, since the point
417 # of streamable object to to be efficienlty written to a
418 # stream without having to allocate and concatenate strings
419 fun write_to_string: String
420 do
421 var stream = new StringWriter
422 write_to(stream)
423 return stream.to_s
424 end
425 end
426
427 redef class Text
428 super Writable
429 redef fun write_to(stream) do stream.write(self)
430 end
431
432 # Input streams with a buffered input for efficiency purposes
433 abstract class BufferedReader
434 super Reader
435 redef fun read_char
436 do
437 if last_error != null then return null
438 if eof then
439 last_error = new IOError("Stream has reached eof")
440 return null
441 end
442 # TODO: Fix when supporting UTF-8
443 var c = _buffer[_buffer_pos].to_i.ascii
444 _buffer_pos += 1
445 return c
446 end
447
448 redef fun read_byte
449 do
450 if last_error != null then return null
451 if eof then
452 last_error = new IOError("Stream has reached eof")
453 return null
454 end
455 var c = _buffer[_buffer_pos]
456 _buffer_pos += 1
457 return c
458 end
459
460 # Resets the internal buffer
461 fun buffer_reset do
462 _buffer_length = 0
463 _buffer_pos = 0
464 end
465
466 # Peeks up to `n` bytes in the buffer
467 #
468 # The operation does not consume the buffer
469 #
470 # ~~~nitish
471 # var x = new FileReader.open("File.txt")
472 # assert x.peek(5) == x.read(5)
473 # ~~~
474 fun peek(i: Int): Bytes do
475 if eof then return new Bytes.empty
476 var remsp = _buffer_length - _buffer_pos
477 if i <= remsp then
478 var bf = new Bytes.with_capacity(i)
479 bf.append_ns_from(_buffer, i, _buffer_pos)
480 return bf
481 end
482 var bf = new Bytes.with_capacity(i)
483 bf.append_ns_from(_buffer, remsp, _buffer_pos)
484 _buffer_pos = _buffer_length
485 read_intern(i - bf.length, bf)
486 remsp = _buffer_length - _buffer_pos
487 var full_len = bf.length + remsp
488 if full_len > _buffer_capacity then
489 var c = _buffer_capacity
490 while c < full_len do c = c * 2 + 2
491 _buffer_capacity = c
492 end
493 var nns = new NativeString(_buffer_capacity)
494 bf.items.copy_to(nns, bf.length, 0, 0)
495 _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
496 _buffer = nns
497 _buffer_pos = 0
498 _buffer_length = full_len
499 return bf
500 end
501
502 redef fun read_bytes(i)
503 do
504 if last_error != null then return new Bytes.empty
505 var buf = new Bytes.with_capacity(i)
506 read_intern(i, buf)
507 return buf
508 end
509
510 # Fills `buf` with at most `i` bytes read from `self`
511 private fun read_intern(i: Int, buf: Bytes): Int do
512 if eof then return 0
513 var p = _buffer_pos
514 var bufsp = _buffer_length - p
515 if bufsp >= i then
516 _buffer_pos += i
517 buf.append_ns_from(_buffer, i, p)
518 return i
519 end
520 _buffer_pos = _buffer_length
521 var readln = _buffer_length - p
522 buf.append_ns_from(_buffer, readln, p)
523 var rd = read_intern(i - readln, buf)
524 return rd + readln
525 end
526
527 redef fun read_all_bytes
528 do
529 if last_error != null then return new Bytes.empty
530 var s = new Bytes.with_capacity(10)
531 var b = _buffer
532 while not eof do
533 var j = _buffer_pos
534 var k = _buffer_length
535 var rd_sz = k - j
536 s.append_ns_from(b, rd_sz, j)
537 _buffer_pos = k
538 fill_buffer
539 end
540 return s
541 end
542
543 redef fun append_line_to(s)
544 do
545 var lb = new Bytes.with_capacity(10)
546 loop
547 # First phase: look for a '\n'
548 var i = _buffer_pos
549 while i < _buffer_length and _buffer[i] != 0xAu8 do
550 i += 1
551 end
552
553 var eol
554 if i < _buffer_length then
555 assert _buffer[i] == 0xAu8
556 i += 1
557 eol = true
558 else
559 eol = false
560 end
561
562 # if there is something to append
563 if i > _buffer_pos then
564 # Copy from the buffer to the string
565 var j = _buffer_pos
566 while j < i do
567 lb.add(_buffer[j])
568 j += 1
569 end
570 _buffer_pos = i
571 else
572 assert end_reached
573 s.append lb.to_s
574 return
575 end
576
577 if eol then
578 # so \n is found
579 s.append lb.to_s
580 return
581 else
582 # so \n is not found
583 if end_reached then
584 s.append lb.to_s
585 return
586 end
587 fill_buffer
588 end
589 end
590 end
591
592 redef fun eof
593 do
594 if _buffer_pos < _buffer_length then return false
595 if end_reached then return true
596 fill_buffer
597 return _buffer_pos >= _buffer_length and end_reached
598 end
599
600 # The buffer
601 private var buffer: NativeString = new NativeString(0)
602
603 # The current position in the buffer
604 private var buffer_pos = 0
605
606 # Length of the current buffer (i.e. nuber of bytes in the buffer)
607 private var buffer_length = 0
608
609 # Capacity of the buffer
610 private var buffer_capacity = 0
611
612 # Fill the buffer
613 protected fun fill_buffer is abstract
614
615 # Has the last fill_buffer reached the end
616 protected fun end_reached: Bool is abstract
617
618 # Allocate a `_buffer` for a given `capacity`.
619 protected fun prepare_buffer(capacity: Int)
620 do
621 _buffer = new NativeString(capacity)
622 _buffer_pos = 0 # need to read
623 _buffer_length = 0
624 _buffer_capacity = capacity
625 end
626 end
627
628 # A `Stream` that can be written to and read from
629 abstract class Duplex
630 super Reader
631 super Writer
632 end
633
634 # `Stream` that can be used to write to a `String`
635 #
636 # Mainly used for compatibility with Writer type and tests.
637 class StringWriter
638 super Writer
639
640 private var content = new Array[String]
641 redef fun to_s do return content.plain_to_s
642 redef fun is_writable do return not closed
643
644 redef fun write_bytes(b) do
645 content.add(b.to_s)
646 end
647
648 redef fun write(str)
649 do
650 assert not closed
651 content.add(str.to_s)
652 end
653
654 # Is the stream closed?
655 protected var closed = false
656
657 redef fun close do closed = true
658 end
659
660 # `Stream` used to read from a `String`
661 #
662 # Mainly used for compatibility with Reader type and tests.
663 class StringReader
664 super Reader
665
666 # The string to read from.
667 var source: String
668
669 # The current position in the string (bytewise).
670 private var cursor: Int = 0
671
672 redef fun read_char do
673 if cursor < source.length then
674 # Fix when supporting UTF-8
675 var c = source[cursor]
676 cursor += 1
677 return c
678 else
679 return null
680 end
681 end
682
683 redef fun read_byte do
684 if cursor < source.length then
685 var c = source.bytes[cursor]
686 cursor += 1
687 return c
688 else
689 return null
690 end
691 end
692
693 redef fun close do
694 source = ""
695 end
696
697 redef fun read_all_bytes do
698 var nslen = source.length - cursor
699 var nns = new NativeString(nslen)
700 source.copy_to_native(nns, nslen, cursor, 0)
701 return new Bytes(nns, nslen, nslen)
702 end
703
704 redef fun eof do return cursor >= source.bytelen
705 end