b4a0e2b953ff294fcdc2992583a2ef063df7f753
[nit.git] / lib / core / stream.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Input and output streams of characters
12 module stream
13
14 intrude import text::ropes
15 import error
16 intrude import bytes
17 import codecs
18
19 in "C" `{
20 #include <unistd.h>
21 #include <string.h>
22 #include <signal.h>
23 `}
24
25 # Any kind of error that could be produced by an operation on Streams
26 class IOError
27 super Error
28 end
29
30 # Any kind of stream to read/write/both to or from a source
31 abstract class Stream
32 # Error produced by the file stream
33 #
34 # var ifs = new FileReader.open("donotmakethisfile.binx")
35 # ifs.read_all
36 # ifs.close
37 # assert ifs.last_error != null
38 var last_error: nullable IOError = null
39
40 # close the stream
41 fun close is abstract
42 end
43
44 # A `Stream` that can be read from
45 abstract class Reader
46 super Stream
47
48 # Decoder used to transform input bytes to UTF-8
49 var decoder: Decoder = utf8_decoder is writable
50
51 # Reads a character. Returns `null` on EOF or timeout
52 fun read_char: nullable Char is abstract
53
54 # Reads a byte. Returns `null` on EOF or timeout
55 fun read_byte: nullable Byte is abstract
56
57 # Reads a String of at most `i` length
58 fun read(i: Int): String do return read_bytes(i).to_s
59
60 # Read at most i bytes
61 fun read_bytes(i: Int): Bytes
62 do
63 if last_error != null then return new Bytes.empty
64 var s = new NativeString(i)
65 var buf = new Bytes(s, 0, 0)
66 while i > 0 and not eof do
67 var c = read_byte
68 if c != null then
69 buf.add c
70 i -= 1
71 end
72 end
73 return buf
74 end
75
76 # Read a string until the end of the line.
77 #
78 # The line terminator '\n' and '\r\n', if any, is removed in each line.
79 #
80 # ~~~
81 # var txt = "Hello\n\nWorld\n"
82 # var i = new StringReader(txt)
83 # assert i.read_line == "Hello"
84 # assert i.read_line == ""
85 # assert i.read_line == "World"
86 # assert i.eof
87 # ~~~
88 #
89 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
90 # the end or file (EOF) is considered to delimit the end of lines.
91 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
92 #
93 # ~~~
94 # var txt2 = "Hello\r\n\n\rWorld"
95 # var i2 = new StringReader(txt2)
96 # assert i2.read_line == "Hello"
97 # assert i2.read_line == ""
98 # assert i2.read_line == "\rWorld"
99 # assert i2.eof
100 # ~~~
101 #
102 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
103 fun read_line: String
104 do
105 if last_error != null then return ""
106 if eof then return ""
107 var s = new FlatBuffer
108 append_line_to(s)
109 return s.to_s.chomp
110 end
111
112 # Read all the lines until the eof.
113 #
114 # The line terminator '\n' and `\r\n` is removed in each line,
115 #
116 # ~~~
117 # var txt = "Hello\n\nWorld\n"
118 # var i = new StringReader(txt)
119 # assert i.read_lines == ["Hello", "", "World"]
120 # ~~~
121 #
122 # This method is more efficient that splitting
123 # the result of `read_all`.
124 #
125 # NOTE: SEE `read_line` for details.
126 fun read_lines: Array[String]
127 do
128 var res = new Array[String]
129 while not eof do
130 res.add read_line
131 end
132 return res
133 end
134
135 # Return an iterator that read each line.
136 #
137 # The line terminator '\n' and `\r\n` is removed in each line,
138 # The line are read with `read_line`. See this method for details.
139 #
140 # ~~~
141 # var txt = "Hello\n\nWorld\n"
142 # var i = new StringReader(txt)
143 # assert i.each_line.to_a == ["Hello", "", "World"]
144 # ~~~
145 #
146 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
147 # Therefore, the stream should no be closed until the end of the stream.
148 #
149 # ~~~
150 # i = new StringReader(txt)
151 # var el = i.each_line
152 #
153 # assert el.item == "Hello"
154 # el.next
155 # assert el.item == ""
156 # el.next
157 #
158 # i.close
159 #
160 # assert not el.is_ok
161 # # closed before "world" is read
162 # ~~~
163 fun each_line: LineIterator do return new LineIterator(self)
164
165 # Read all the stream until the eof.
166 #
167 # The content of the file is returned as a String.
168 #
169 # ~~~
170 # var txt = "Hello\n\nWorld\n"
171 # var i = new StringReader(txt)
172 # assert i.read_all == txt
173 # ~~~
174 fun read_all: String do
175 var s = read_all_bytes
176 var slen = s.length
177 if slen == 0 then return ""
178 var rets = ""
179 var pos = 0
180 var str = s.items.clean_utf8(slen)
181 slen = str.bytelen
182 var sits = str.items
183 var remsp = slen
184 while pos < slen do
185 # The 129 size was decided more or less arbitrarily
186 # It will require some more benchmarking to compute
187 # if this is the best size or not
188 var chunksz = 129
189 if chunksz > remsp then
190 rets += new FlatString.with_infos(sits, remsp, pos, pos + remsp - 1)
191 break
192 end
193 var st = sits.find_beginning_of_char_at(pos + chunksz - 1)
194 var bytelen = st - pos
195 rets += new FlatString.with_infos(sits, bytelen, pos, st - 1)
196 pos = st
197 remsp -= bytelen
198 end
199 if rets isa Concat then return rets.balance
200 return rets
201 end
202
203 # Read all the stream until the eof.
204 #
205 # The content of the file is returned verbatim.
206 fun read_all_bytes: Bytes
207 do
208 if last_error != null then return new Bytes.empty
209 var s = new Bytes.empty
210 while not eof do
211 var c = read_byte
212 if c != null then s.add(c)
213 end
214 return s
215 end
216
217 # Read a string until the end of the line and append it to `s`.
218 #
219 # Unlike `read_line` and other related methods,
220 # the line terminator '\n', if any, is preserved in each line.
221 # Use the method `Text::chomp` to safely remove it.
222 #
223 # ~~~
224 # var txt = "Hello\n\nWorld\n"
225 # var i = new StringReader(txt)
226 # var b = new FlatBuffer
227 # i.append_line_to(b)
228 # assert b == "Hello\n"
229 # i.append_line_to(b)
230 # assert b == "Hello\n\n"
231 # i.append_line_to(b)
232 # assert b == txt
233 # assert i.eof
234 # ~~~
235 #
236 # If `\n` is not present at the end of the result, it means that
237 # a non-eol terminated last line was returned.
238 #
239 # ~~~
240 # var i2 = new StringReader("hello")
241 # assert not i2.eof
242 # var b2 = new FlatBuffer
243 # i2.append_line_to(b2)
244 # assert b2 == "hello"
245 # assert i2.eof
246 # ~~~
247 #
248 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
249 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
250 fun append_line_to(s: Buffer)
251 do
252 if last_error != null then return
253 loop
254 var x = read_char
255 if x == null then
256 if eof then return
257 else
258 s.chars.push(x)
259 if x == '\n' then return
260 end
261 end
262 end
263
264 # Is there something to read.
265 # This function returns 'false' if there is something to read.
266 fun eof: Bool is abstract
267
268 # Read the next sequence of non whitespace characters.
269 #
270 # Leading whitespace characters are skipped.
271 # The first whitespace character that follows the result is consumed.
272 #
273 # An empty string is returned if the end of the file or an error is encounter.
274 #
275 # ~~~
276 # var w = new StringReader(" Hello, \n\t World!")
277 # assert w.read_word == "Hello,"
278 # assert w.read_char == '\n'
279 # assert w.read_word == "World!"
280 # assert w.read_word == ""
281 # ~~~
282 #
283 # `Char::is_whitespace` determines what is a whitespace.
284 fun read_word: String
285 do
286 var buf = new FlatBuffer
287 var c = read_nonwhitespace
288 if c != null then
289 buf.add(c)
290 while not eof do
291 c = read_char
292 if c == null then break
293 if c.is_whitespace then break
294 buf.add(c)
295 end
296 end
297 var res = buf.to_s
298 return res
299 end
300
301 # Skip whitespace characters (if any) then return the following non-whitespace character.
302 #
303 # Returns the code point of the character.
304 # Returns `null` on end of file or error.
305 #
306 # In fact, this method works like `read_char` except it skips whitespace.
307 #
308 # ~~~
309 # var w = new StringReader(" \nab\tc")
310 # assert w.read_nonwhitespace == 'a'
311 # assert w.read_nonwhitespace == 'b'
312 # assert w.read_nonwhitespace == 'c'
313 # assert w.read_nonwhitespace == null
314 # ~~~
315 #
316 # `Char::is_whitespace` determines what is a whitespace.
317 fun read_nonwhitespace: nullable Char
318 do
319 var c: nullable Char = null
320 while not eof do
321 c = read_char
322 if c == null or not c.is_whitespace then break
323 end
324 return c
325 end
326 end
327
328 # Iterator returned by `Reader::each_line`.
329 # See the aforementioned method for details.
330 class LineIterator
331 super Iterator[String]
332
333 # The original stream
334 var stream: Reader
335
336 redef fun is_ok
337 do
338 var res = not stream.eof
339 if not res and close_on_finish then stream.close
340 return res
341 end
342
343 redef fun item
344 do
345 var line = self.line
346 if line == null then
347 line = stream.read_line
348 end
349 self.line = line
350 return line
351 end
352
353 # The last line read (cache)
354 private var line: nullable String = null
355
356 redef fun next
357 do
358 # force the read
359 if line == null then item
360 # drop the line
361 line = null
362 end
363
364 # Close the stream when the stream is at the EOF.
365 #
366 # Default is false.
367 var close_on_finish = false is writable
368
369 redef fun finish
370 do
371 if close_on_finish then stream.close
372 end
373 end
374
375 # `Reader` capable of declaring if readable without blocking
376 abstract class PollableReader
377 super Reader
378
379 # Is there something to read? (without blocking)
380 fun poll_in: Bool is abstract
381
382 end
383
384 # A `Stream` that can be written to
385 abstract class Writer
386 super Stream
387
388 # The coder from a nit UTF-8 String to the output file
389 var coder: Coder = utf8_coder is writable
390
391 # Writes bytes from `s`
392 fun write_bytes(s: Bytes) is abstract
393
394 # write a string
395 fun write(s: Text) is abstract
396
397 # Write a single byte
398 fun write_byte(value: Byte) is abstract
399
400 # Can the stream be used to write
401 fun is_writable: Bool is abstract
402 end
403
404 # Things that can be efficienlty written to a `Writer`
405 #
406 # The point of this interface is to allow the instance to be efficiently
407 # written into a `Writer`.
408 #
409 # Ready-to-save documents usually provide this interface.
410 interface Writable
411 # Write itself to a `stream`
412 # The specific logic it let to the concrete subclasses
413 fun write_to(stream: Writer) is abstract
414
415 # Like `write_to` but return a new String (may be quite large)
416 #
417 # This funtionality is anectodical, since the point
418 # of streamable object to to be efficienlty written to a
419 # stream without having to allocate and concatenate strings
420 fun write_to_string: String
421 do
422 var stream = new StringWriter
423 write_to(stream)
424 return stream.to_s
425 end
426 end
427
428 redef class Text
429 super Writable
430 redef fun write_to(stream) do stream.write(self)
431 end
432
433 # Input streams with a buffered input for efficiency purposes
434 abstract class BufferedReader
435 super Reader
436 redef fun read_char
437 do
438 if last_error != null then return null
439 if eof then
440 last_error = new IOError("Stream has reached eof")
441 return null
442 end
443 # TODO: Fix when supporting UTF-8
444 var c = _buffer[_buffer_pos].to_i.code_point
445 _buffer_pos += 1
446 return c
447 end
448
449 redef fun read_byte
450 do
451 if last_error != null then return null
452 if eof then
453 last_error = new IOError("Stream has reached eof")
454 return null
455 end
456 var c = _buffer[_buffer_pos]
457 _buffer_pos += 1
458 return c
459 end
460
461 # Resets the internal buffer
462 fun buffer_reset do
463 _buffer_length = 0
464 _buffer_pos = 0
465 end
466
467 # Peeks up to `n` bytes in the buffer
468 #
469 # The operation does not consume the buffer
470 #
471 # ~~~nitish
472 # var x = new FileReader.open("File.txt")
473 # assert x.peek(5) == x.read(5)
474 # ~~~
475 fun peek(i: Int): Bytes do
476 if eof then return new Bytes.empty
477 var remsp = _buffer_length - _buffer_pos
478 if i <= remsp then
479 var bf = new Bytes.with_capacity(i)
480 bf.append_ns_from(_buffer, i, _buffer_pos)
481 return bf
482 end
483 var bf = new Bytes.with_capacity(i)
484 bf.append_ns_from(_buffer, remsp, _buffer_pos)
485 _buffer_pos = _buffer_length
486 read_intern(i - bf.length, bf)
487 remsp = _buffer_length - _buffer_pos
488 var full_len = bf.length + remsp
489 if full_len > _buffer_capacity then
490 var c = _buffer_capacity
491 while c < full_len do c = c * 2 + 2
492 _buffer_capacity = c
493 end
494 var nns = new NativeString(_buffer_capacity)
495 bf.items.copy_to(nns, bf.length, 0, 0)
496 _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
497 _buffer = nns
498 _buffer_pos = 0
499 _buffer_length = full_len
500 return bf
501 end
502
503 redef fun read_bytes(i)
504 do
505 if last_error != null then return new Bytes.empty
506 var buf = new Bytes.with_capacity(i)
507 read_intern(i, buf)
508 return buf
509 end
510
511 # Fills `buf` with at most `i` bytes read from `self`
512 private fun read_intern(i: Int, buf: Bytes): Int do
513 if eof then return 0
514 var p = _buffer_pos
515 var bufsp = _buffer_length - p
516 if bufsp >= i then
517 _buffer_pos += i
518 buf.append_ns_from(_buffer, i, p)
519 return i
520 end
521 _buffer_pos = _buffer_length
522 var readln = _buffer_length - p
523 buf.append_ns_from(_buffer, readln, p)
524 var rd = read_intern(i - readln, buf)
525 return rd + readln
526 end
527
528 redef fun read_all_bytes
529 do
530 if last_error != null then return new Bytes.empty
531 var s = new Bytes.with_capacity(10)
532 var b = _buffer
533 while not eof do
534 var j = _buffer_pos
535 var k = _buffer_length
536 var rd_sz = k - j
537 s.append_ns_from(b, rd_sz, j)
538 _buffer_pos = k
539 fill_buffer
540 end
541 return s
542 end
543
544 redef fun append_line_to(s)
545 do
546 var lb = new Bytes.with_capacity(10)
547 loop
548 # First phase: look for a '\n'
549 var i = _buffer_pos
550 while i < _buffer_length and _buffer[i] != 0xAu8 do
551 i += 1
552 end
553
554 var eol
555 if i < _buffer_length then
556 assert _buffer[i] == 0xAu8
557 i += 1
558 eol = true
559 else
560 eol = false
561 end
562
563 # if there is something to append
564 if i > _buffer_pos then
565 # Copy from the buffer to the string
566 var j = _buffer_pos
567 while j < i do
568 lb.add(_buffer[j])
569 j += 1
570 end
571 _buffer_pos = i
572 else
573 assert end_reached
574 s.append lb.to_s
575 return
576 end
577
578 if eol then
579 # so \n is found
580 s.append lb.to_s
581 return
582 else
583 # so \n is not found
584 if end_reached then
585 s.append lb.to_s
586 return
587 end
588 fill_buffer
589 end
590 end
591 end
592
593 redef fun eof
594 do
595 if _buffer_pos < _buffer_length then return false
596 if end_reached then return true
597 fill_buffer
598 return _buffer_pos >= _buffer_length and end_reached
599 end
600
601 # The buffer
602 private var buffer: NativeString = new NativeString(0)
603
604 # The current position in the buffer
605 private var buffer_pos = 0
606
607 # Length of the current buffer (i.e. nuber of bytes in the buffer)
608 private var buffer_length = 0
609
610 # Capacity of the buffer
611 private var buffer_capacity = 0
612
613 # Fill the buffer
614 protected fun fill_buffer is abstract
615
616 # Has the last fill_buffer reached the end
617 protected fun end_reached: Bool is abstract
618
619 # Allocate a `_buffer` for a given `capacity`.
620 protected fun prepare_buffer(capacity: Int)
621 do
622 _buffer = new NativeString(capacity)
623 _buffer_pos = 0 # need to read
624 _buffer_length = 0
625 _buffer_capacity = capacity
626 end
627 end
628
629 # A `Stream` that can be written to and read from
630 abstract class Duplex
631 super Reader
632 super Writer
633 end
634
635 # `Stream` that can be used to write to a `String`
636 #
637 # Mainly used for compatibility with Writer type and tests.
638 class StringWriter
639 super Writer
640
641 private var content = new Array[String]
642 redef fun to_s do return content.plain_to_s
643 redef fun is_writable do return not closed
644
645 redef fun write_bytes(b) do
646 content.add(b.to_s)
647 end
648
649 redef fun write(str)
650 do
651 assert not closed
652 content.add(str.to_s)
653 end
654
655 # Is the stream closed?
656 protected var closed = false
657
658 redef fun close do closed = true
659 end
660
661 # `Stream` used to read from a `String`
662 #
663 # Mainly used for compatibility with Reader type and tests.
664 class StringReader
665 super Reader
666
667 # The string to read from.
668 var source: String
669
670 # The current position in the string (bytewise).
671 private var cursor: Int = 0
672
673 redef fun read_char do
674 if cursor < source.length then
675 # Fix when supporting UTF-8
676 var c = source[cursor]
677 cursor += 1
678 return c
679 else
680 return null
681 end
682 end
683
684 redef fun read_byte do
685 if cursor < source.length then
686 var c = source.bytes[cursor]
687 cursor += 1
688 return c
689 else
690 return null
691 end
692 end
693
694 redef fun close do
695 source = ""
696 end
697
698 redef fun read_all_bytes do
699 var nslen = source.length - cursor
700 var nns = new NativeString(nslen)
701 source.copy_to_native(nns, nslen, cursor, 0)
702 return new Bytes(nns, nslen, nslen)
703 end
704
705 redef fun eof do return cursor >= source.bytelen
706 end