lib/core: Added simple write char method to `Writer`
[nit.git] / lib / core / stream.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Input and output streams of characters
12 module stream
13
14 intrude import text::ropes
15 import error
16 intrude import bytes
17 import codecs
18
19 in "C" `{
20 #include <unistd.h>
21 #include <string.h>
22 #include <signal.h>
23 `}
24
25 # Any kind of error that could be produced by an operation on Streams
26 class IOError
27 super Error
28 end
29
30 # Any kind of stream to read/write/both to or from a source
31 abstract class Stream
32 # Error produced by the file stream
33 #
34 # var ifs = new FileReader.open("donotmakethisfile.binx")
35 # ifs.read_all
36 # ifs.close
37 # assert ifs.last_error != null
38 var last_error: nullable IOError = null
39
40 # close the stream
41 fun close is abstract
42
43 # Pre-work hook.
44 #
45 # Used to inform `self` that operations will start.
46 # Specific streams can use this to prepare some resources.
47 #
48 # Is automatically invoked at the beginning of `with` structures.
49 #
50 # Do nothing by default.
51 fun start do end
52
53 # Post-work hook.
54 #
55 # Used to inform `self` that the operations are over.
56 # Specific streams can use this to free some resources.
57 #
58 # Is automatically invoked at the end of `woth` structures.
59 #
60 # call `close` by default.
61 fun finish do close
62 end
63
64 # A `Stream` that can be read from
65 abstract class Reader
66 super Stream
67
68 # Decoder used to transform input bytes to UTF-8
69 var decoder: Codec = utf8_codec is writable
70
71 # Reads a character. Returns `null` on EOF or timeout
72 fun read_char: nullable Char is abstract
73
74 # Reads a byte. Returns `null` on EOF or timeout
75 fun read_byte: nullable Byte is abstract
76
77 # Reads a String of at most `i` length
78 fun read(i: Int): String do return read_bytes(i).to_s
79
80 # Read at most i bytes
81 fun read_bytes(i: Int): Bytes
82 do
83 if last_error != null then return new Bytes.empty
84 var s = new NativeString(i)
85 var buf = new Bytes(s, 0, 0)
86 while i > 0 and not eof do
87 var c = read_byte
88 if c != null then
89 buf.add c
90 i -= 1
91 end
92 end
93 return buf
94 end
95
96 # Read a string until the end of the line.
97 #
98 # The line terminator '\n' and '\r\n', if any, is removed in each line.
99 #
100 # ~~~
101 # var txt = "Hello\n\nWorld\n"
102 # var i = new StringReader(txt)
103 # assert i.read_line == "Hello"
104 # assert i.read_line == ""
105 # assert i.read_line == "World"
106 # assert i.eof
107 # ~~~
108 #
109 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
110 # the end or file (EOF) is considered to delimit the end of lines.
111 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
112 #
113 # ~~~
114 # var txt2 = "Hello\r\n\n\rWorld"
115 # var i2 = new StringReader(txt2)
116 # assert i2.read_line == "Hello"
117 # assert i2.read_line == ""
118 # assert i2.read_line == "\rWorld"
119 # assert i2.eof
120 # ~~~
121 #
122 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
123 fun read_line: String
124 do
125 if last_error != null then return ""
126 if eof then return ""
127 var s = new FlatBuffer
128 append_line_to(s)
129 return s.to_s.chomp
130 end
131
132 # Read all the lines until the eof.
133 #
134 # The line terminator '\n' and `\r\n` is removed in each line,
135 #
136 # ~~~
137 # var txt = "Hello\n\nWorld\n"
138 # var i = new StringReader(txt)
139 # assert i.read_lines == ["Hello", "", "World"]
140 # ~~~
141 #
142 # This method is more efficient that splitting
143 # the result of `read_all`.
144 #
145 # NOTE: SEE `read_line` for details.
146 fun read_lines: Array[String]
147 do
148 var res = new Array[String]
149 while not eof do
150 res.add read_line
151 end
152 return res
153 end
154
155 # Return an iterator that read each line.
156 #
157 # The line terminator '\n' and `\r\n` is removed in each line,
158 # The line are read with `read_line`. See this method for details.
159 #
160 # ~~~
161 # var txt = "Hello\n\nWorld\n"
162 # var i = new StringReader(txt)
163 # assert i.each_line.to_a == ["Hello", "", "World"]
164 # ~~~
165 #
166 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
167 # Therefore, the stream should no be closed until the end of the stream.
168 #
169 # ~~~
170 # i = new StringReader(txt)
171 # var el = i.each_line
172 #
173 # assert el.item == "Hello"
174 # el.next
175 # assert el.item == ""
176 # el.next
177 #
178 # i.close
179 #
180 # assert not el.is_ok
181 # # closed before "world" is read
182 # ~~~
183 fun each_line: LineIterator do return new LineIterator(self)
184
185 # Read all the stream until the eof.
186 #
187 # The content of the file is returned as a String.
188 #
189 # ~~~
190 # var txt = "Hello\n\nWorld\n"
191 # var i = new StringReader(txt)
192 # assert i.read_all == txt
193 # ~~~
194 fun read_all: String do
195 var s = read_all_bytes
196 var slen = s.length
197 if slen == 0 then return ""
198 var rets = ""
199 var pos = 0
200 var str = s.items.clean_utf8(slen)
201 slen = str.bytelen
202 var sits = str.items
203 var remsp = slen
204 while pos < slen do
205 # The 129 size was decided more or less arbitrarily
206 # It will require some more benchmarking to compute
207 # if this is the best size or not
208 var chunksz = 129
209 if chunksz > remsp then
210 rets += new FlatString.with_infos(sits, remsp, pos)
211 break
212 end
213 var st = sits.find_beginning_of_char_at(pos + chunksz - 1)
214 var bytelen = st - pos
215 rets += new FlatString.with_infos(sits, bytelen, pos)
216 pos = st
217 remsp -= bytelen
218 end
219 if rets isa Concat then return rets.balance
220 return rets
221 end
222
223 # Read all the stream until the eof.
224 #
225 # The content of the file is returned verbatim.
226 fun read_all_bytes: Bytes
227 do
228 if last_error != null then return new Bytes.empty
229 var s = new Bytes.empty
230 while not eof do
231 var c = read_byte
232 if c != null then s.add(c)
233 end
234 return s
235 end
236
237 # Read a string until the end of the line and append it to `s`.
238 #
239 # Unlike `read_line` and other related methods,
240 # the line terminator '\n', if any, is preserved in each line.
241 # Use the method `Text::chomp` to safely remove it.
242 #
243 # ~~~
244 # var txt = "Hello\n\nWorld\n"
245 # var i = new StringReader(txt)
246 # var b = new FlatBuffer
247 # i.append_line_to(b)
248 # assert b == "Hello\n"
249 # i.append_line_to(b)
250 # assert b == "Hello\n\n"
251 # i.append_line_to(b)
252 # assert b == txt
253 # assert i.eof
254 # ~~~
255 #
256 # If `\n` is not present at the end of the result, it means that
257 # a non-eol terminated last line was returned.
258 #
259 # ~~~
260 # var i2 = new StringReader("hello")
261 # assert not i2.eof
262 # var b2 = new FlatBuffer
263 # i2.append_line_to(b2)
264 # assert b2 == "hello"
265 # assert i2.eof
266 # ~~~
267 #
268 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
269 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
270 fun append_line_to(s: Buffer)
271 do
272 if last_error != null then return
273 loop
274 var x = read_char
275 if x == null then
276 if eof then return
277 else
278 s.chars.push(x)
279 if x == '\n' then return
280 end
281 end
282 end
283
284 # Is there something to read.
285 # This function returns 'false' if there is something to read.
286 fun eof: Bool is abstract
287
288 # Read the next sequence of non whitespace characters.
289 #
290 # Leading whitespace characters are skipped.
291 # The first whitespace character that follows the result is consumed.
292 #
293 # An empty string is returned if the end of the file or an error is encounter.
294 #
295 # ~~~
296 # var w = new StringReader(" Hello, \n\t World!")
297 # assert w.read_word == "Hello,"
298 # assert w.read_char == '\n'
299 # assert w.read_word == "World!"
300 # assert w.read_word == ""
301 # ~~~
302 #
303 # `Char::is_whitespace` determines what is a whitespace.
304 fun read_word: String
305 do
306 var buf = new FlatBuffer
307 var c = read_nonwhitespace
308 if c != null then
309 buf.add(c)
310 while not eof do
311 c = read_char
312 if c == null then break
313 if c.is_whitespace then break
314 buf.add(c)
315 end
316 end
317 var res = buf.to_s
318 return res
319 end
320
321 # Skip whitespace characters (if any) then return the following non-whitespace character.
322 #
323 # Returns the code point of the character.
324 # Returns `null` on end of file or error.
325 #
326 # In fact, this method works like `read_char` except it skips whitespace.
327 #
328 # ~~~
329 # var w = new StringReader(" \nab\tc")
330 # assert w.read_nonwhitespace == 'a'
331 # assert w.read_nonwhitespace == 'b'
332 # assert w.read_nonwhitespace == 'c'
333 # assert w.read_nonwhitespace == null
334 # ~~~
335 #
336 # `Char::is_whitespace` determines what is a whitespace.
337 fun read_nonwhitespace: nullable Char
338 do
339 var c: nullable Char = null
340 while not eof do
341 c = read_char
342 if c == null or not c.is_whitespace then break
343 end
344 return c
345 end
346 end
347
348 # Iterator returned by `Reader::each_line`.
349 # See the aforementioned method for details.
350 class LineIterator
351 super Iterator[String]
352
353 # The original stream
354 var stream: Reader
355
356 redef fun is_ok
357 do
358 var res = not stream.eof
359 if not res and close_on_finish then stream.close
360 return res
361 end
362
363 redef fun item
364 do
365 var line = self.line
366 if line == null then
367 line = stream.read_line
368 end
369 self.line = line
370 return line
371 end
372
373 # The last line read (cache)
374 private var line: nullable String = null
375
376 redef fun next
377 do
378 # force the read
379 if line == null then item
380 # drop the line
381 line = null
382 end
383
384 # Close the stream when the stream is at the EOF.
385 #
386 # Default is false.
387 var close_on_finish = false is writable
388
389 redef fun finish
390 do
391 if close_on_finish then stream.close
392 end
393 end
394
395 # `Reader` capable of declaring if readable without blocking
396 abstract class PollableReader
397 super Reader
398
399 # Is there something to read? (without blocking)
400 fun poll_in: Bool is abstract
401
402 end
403
404 # A `Stream` that can be written to
405 abstract class Writer
406 super Stream
407
408 # The coder from a nit UTF-8 String to the output file
409 var coder: Codec = utf8_codec is writable
410
411 # Writes bytes from `s`
412 fun write_bytes(s: Bytes) is abstract
413
414 # write a string
415 fun write(s: Text) is abstract
416
417 # Write a single byte
418 fun write_byte(value: Byte) is abstract
419
420 # Writes a single char
421 fun write_char(c: Char) do write(c.to_s)
422
423 # Can the stream be used to write
424 fun is_writable: Bool is abstract
425 end
426
427 # Things that can be efficienlty written to a `Writer`
428 #
429 # The point of this interface is to allow the instance to be efficiently
430 # written into a `Writer`.
431 #
432 # Ready-to-save documents usually provide this interface.
433 interface Writable
434 # Write itself to a `stream`
435 # The specific logic it let to the concrete subclasses
436 fun write_to(stream: Writer) is abstract
437
438 # Like `write_to` but return a new String (may be quite large)
439 #
440 # This funtionality is anectodical, since the point
441 # of streamable object to to be efficienlty written to a
442 # stream without having to allocate and concatenate strings
443 fun write_to_string: String
444 do
445 var stream = new StringWriter
446 write_to(stream)
447 return stream.to_s
448 end
449 end
450
451 redef class Bytes
452 super Writable
453 redef fun write_to(s) do s.write_bytes(self)
454
455 redef fun write_to_string do return to_s
456 end
457
458 redef class Text
459 super Writable
460 redef fun write_to(stream) do stream.write(self)
461 end
462
463 # Input streams with a buffered input for efficiency purposes
464 abstract class BufferedReader
465 super Reader
466 redef fun read_char
467 do
468 if last_error != null then return null
469 if eof then
470 last_error = new IOError("Stream has reached eof")
471 return null
472 end
473 # TODO: Fix when supporting UTF-8
474 var c = _buffer[_buffer_pos].to_i.code_point
475 _buffer_pos += 1
476 return c
477 end
478
479 redef fun read_byte
480 do
481 if last_error != null then return null
482 if eof then
483 last_error = new IOError("Stream has reached eof")
484 return null
485 end
486 var c = _buffer[_buffer_pos]
487 _buffer_pos += 1
488 return c
489 end
490
491 # Resets the internal buffer
492 fun buffer_reset do
493 _buffer_length = 0
494 _buffer_pos = 0
495 end
496
497 # Peeks up to `n` bytes in the buffer
498 #
499 # The operation does not consume the buffer
500 #
501 # ~~~nitish
502 # var x = new FileReader.open("File.txt")
503 # assert x.peek(5) == x.read(5)
504 # ~~~
505 fun peek(i: Int): Bytes do
506 if eof then return new Bytes.empty
507 var remsp = _buffer_length - _buffer_pos
508 if i <= remsp then
509 var bf = new Bytes.with_capacity(i)
510 bf.append_ns_from(_buffer, i, _buffer_pos)
511 return bf
512 end
513 var bf = new Bytes.with_capacity(i)
514 bf.append_ns_from(_buffer, remsp, _buffer_pos)
515 _buffer_pos = _buffer_length
516 read_intern(i - bf.length, bf)
517 remsp = _buffer_length - _buffer_pos
518 var full_len = bf.length + remsp
519 if full_len > _buffer_capacity then
520 var c = _buffer_capacity
521 while c < full_len do c = c * 2 + 2
522 _buffer_capacity = c
523 end
524 var nns = new NativeString(_buffer_capacity)
525 bf.items.copy_to(nns, bf.length, 0, 0)
526 _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
527 _buffer = nns
528 _buffer_pos = 0
529 _buffer_length = full_len
530 return bf
531 end
532
533 redef fun read_bytes(i)
534 do
535 if last_error != null then return new Bytes.empty
536 var buf = new Bytes.with_capacity(i)
537 read_intern(i, buf)
538 return buf
539 end
540
541 # Fills `buf` with at most `i` bytes read from `self`
542 private fun read_intern(i: Int, buf: Bytes): Int do
543 if eof then return 0
544 var p = _buffer_pos
545 var bufsp = _buffer_length - p
546 if bufsp >= i then
547 _buffer_pos += i
548 buf.append_ns_from(_buffer, i, p)
549 return i
550 end
551 _buffer_pos = _buffer_length
552 var readln = _buffer_length - p
553 buf.append_ns_from(_buffer, readln, p)
554 var rd = read_intern(i - readln, buf)
555 return rd + readln
556 end
557
558 redef fun read_all_bytes
559 do
560 if last_error != null then return new Bytes.empty
561 var s = new Bytes.with_capacity(10)
562 var b = _buffer
563 while not eof do
564 var j = _buffer_pos
565 var k = _buffer_length
566 var rd_sz = k - j
567 s.append_ns_from(b, rd_sz, j)
568 _buffer_pos = k
569 fill_buffer
570 end
571 return s
572 end
573
574 redef fun append_line_to(s)
575 do
576 var lb = new Bytes.with_capacity(10)
577 loop
578 # First phase: look for a '\n'
579 var i = _buffer_pos
580 while i < _buffer_length and _buffer[i] != 0xAu8 do
581 i += 1
582 end
583
584 var eol
585 if i < _buffer_length then
586 assert _buffer[i] == 0xAu8
587 i += 1
588 eol = true
589 else
590 eol = false
591 end
592
593 # if there is something to append
594 if i > _buffer_pos then
595 # Copy from the buffer to the string
596 var j = _buffer_pos
597 while j < i do
598 lb.add(_buffer[j])
599 j += 1
600 end
601 _buffer_pos = i
602 else
603 assert end_reached
604 s.append lb.to_s
605 return
606 end
607
608 if eol then
609 # so \n is found
610 s.append lb.to_s
611 return
612 else
613 # so \n is not found
614 if end_reached then
615 s.append lb.to_s
616 return
617 end
618 fill_buffer
619 end
620 end
621 end
622
623 redef fun eof
624 do
625 if _buffer_pos < _buffer_length then return false
626 if end_reached then return true
627 fill_buffer
628 return _buffer_pos >= _buffer_length and end_reached
629 end
630
631 # The buffer
632 private var buffer: NativeString = new NativeString(0)
633
634 # The current position in the buffer
635 private var buffer_pos = 0
636
637 # Length of the current buffer (i.e. nuber of bytes in the buffer)
638 private var buffer_length = 0
639
640 # Capacity of the buffer
641 private var buffer_capacity = 0
642
643 # Fill the buffer
644 protected fun fill_buffer is abstract
645
646 # Has the last fill_buffer reached the end
647 protected fun end_reached: Bool is abstract
648
649 # Allocate a `_buffer` for a given `capacity`.
650 protected fun prepare_buffer(capacity: Int)
651 do
652 _buffer = new NativeString(capacity)
653 _buffer_pos = 0 # need to read
654 _buffer_length = 0
655 _buffer_capacity = capacity
656 end
657 end
658
659 # A `Stream` that can be written to and read from
660 abstract class Duplex
661 super Reader
662 super Writer
663 end
664
665 # `Stream` that can be used to write to a `String`
666 #
667 # Mainly used for compatibility with Writer type and tests.
668 class StringWriter
669 super Writer
670
671 private var content = new Array[String]
672 redef fun to_s do return content.plain_to_s
673 redef fun is_writable do return not closed
674
675 redef fun write_bytes(b) do
676 content.add(b.to_s)
677 end
678
679 redef fun write(str)
680 do
681 assert not closed
682 content.add(str.to_s)
683 end
684
685 # Is the stream closed?
686 protected var closed = false
687
688 redef fun close do closed = true
689 end
690
691 # `Stream` used to read from a `String`
692 #
693 # Mainly used for compatibility with Reader type and tests.
694 class StringReader
695 super Reader
696
697 # The string to read from.
698 var source: String
699
700 # The current position in the string (bytewise).
701 private var cursor: Int = 0
702
703 redef fun read_char do
704 if cursor < source.length then
705 # Fix when supporting UTF-8
706 var c = source[cursor]
707 cursor += 1
708 return c
709 else
710 return null
711 end
712 end
713
714 redef fun read_byte do
715 if cursor < source.length then
716 var c = source.bytes[cursor]
717 cursor += 1
718 return c
719 else
720 return null
721 end
722 end
723
724 redef fun close do
725 source = ""
726 end
727
728 redef fun read_all_bytes do
729 var nslen = source.length - cursor
730 var nns = new NativeString(nslen)
731 source.copy_to_native(nns, nslen, cursor, 0)
732 return new Bytes(nns, nslen, nslen)
733 end
734
735 redef fun eof do return cursor >= source.bytelen
736 end