misc/vim: highlight the new Int literals
[nit.git] / lib / standard / stream.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Input and output streams of characters
12 module stream
13
14 intrude import text::ropes
15 import error
16 intrude import bytes
17
18 in "C" `{
19 #include <unistd.h>
20 #include <string.h>
21 #include <signal.h>
22 `}
23
24 # Any kind of error that could be produced by an operation on Streams
25 class IOError
26 super Error
27 end
28
29 # Any kind of stream to read/write/both to or from a source
30 abstract class Stream
31 # Error produced by the file stream
32 #
33 # var ifs = new FileReader.open("donotmakethisfile.binx")
34 # ifs.read_all
35 # ifs.close
36 # assert ifs.last_error != null
37 var last_error: nullable IOError = null
38
39 # close the stream
40 fun close is abstract
41 end
42
43 # A `Stream` that can be read from
44 abstract class Reader
45 super Stream
46 # Reads a character. Returns `null` on EOF or timeout
47 fun read_char: nullable Char is abstract
48
49 # Reads a byte. Returns `null` on EOF or timeout
50 fun read_byte: nullable Byte is abstract
51
52 # Reads a String of at most `i` length
53 fun read(i: Int): String do return read_bytes(i).to_s
54
55 # Read at most i bytes
56 fun read_bytes(i: Int): Bytes
57 do
58 if last_error != null then return new Bytes.empty
59 var s = new NativeString(i)
60 var buf = new Bytes(s, 0, 0)
61 while i > 0 and not eof do
62 var c = read_byte
63 if c != null then
64 buf.add c
65 i -= 1
66 end
67 end
68 return buf
69 end
70
71 # Read a string until the end of the line.
72 #
73 # The line terminator '\n' and '\r\n', if any, is removed in each line.
74 #
75 # ~~~
76 # var txt = "Hello\n\nWorld\n"
77 # var i = new StringReader(txt)
78 # assert i.read_line == "Hello"
79 # assert i.read_line == ""
80 # assert i.read_line == "World"
81 # assert i.eof
82 # ~~~
83 #
84 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
85 # the end or file (EOF) is considered to delimit the end of lines.
86 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
87 #
88 # ~~~
89 # var txt2 = "Hello\r\n\n\rWorld"
90 # var i2 = new StringReader(txt2)
91 # assert i2.read_line == "Hello"
92 # assert i2.read_line == ""
93 # assert i2.read_line == "\rWorld"
94 # assert i2.eof
95 # ~~~
96 #
97 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
98 fun read_line: String
99 do
100 if last_error != null then return ""
101 if eof then return ""
102 var s = new FlatBuffer
103 append_line_to(s)
104 return s.to_s.chomp
105 end
106
107 # Read all the lines until the eof.
108 #
109 # The line terminator '\n' and `\r\n` is removed in each line,
110 #
111 # ~~~
112 # var txt = "Hello\n\nWorld\n"
113 # var i = new StringReader(txt)
114 # assert i.read_lines == ["Hello", "", "World"]
115 # ~~~
116 #
117 # This method is more efficient that splitting
118 # the result of `read_all`.
119 #
120 # NOTE: SEE `read_line` for details.
121 fun read_lines: Array[String]
122 do
123 var res = new Array[String]
124 while not eof do
125 res.add read_line
126 end
127 return res
128 end
129
130 # Return an iterator that read each line.
131 #
132 # The line terminator '\n' and `\r\n` is removed in each line,
133 # The line are read with `read_line`. See this method for details.
134 #
135 # ~~~
136 # var txt = "Hello\n\nWorld\n"
137 # var i = new StringReader(txt)
138 # assert i.each_line.to_a == ["Hello", "", "World"]
139 # ~~~
140 #
141 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
142 # Therefore, the stream should no be closed until the end of the stream.
143 #
144 # ~~~
145 # i = new StringReader(txt)
146 # var el = i.each_line
147 #
148 # assert el.item == "Hello"
149 # el.next
150 # assert el.item == ""
151 # el.next
152 #
153 # i.close
154 #
155 # assert not el.is_ok
156 # # closed before "world" is read
157 # ~~~
158 fun each_line: LineIterator do return new LineIterator(self)
159
160 # Read all the stream until the eof.
161 #
162 # The content of the file is returned as a String.
163 #
164 # ~~~
165 # var txt = "Hello\n\nWorld\n"
166 # var i = new StringReader(txt)
167 # assert i.read_all == txt
168 # ~~~
169 fun read_all: String do
170 var s = read_all_bytes
171 var slen = s.length
172 if slen == 0 then return ""
173 var rets = ""
174 var pos = 0
175 var sits = s.items
176 var remsp = slen
177 while pos < slen do
178 # The 129 size was decided more or less arbitrarily
179 # It will require some more benchmarking to compute
180 # if this is the best size or not
181 var chunksz = 129
182 if chunksz > remsp then
183 rets += new FlatString.with_infos(sits, remsp, pos, pos + remsp - 1)
184 break
185 end
186 var st = sits.find_beginning_of_char_at(pos + chunksz - 1)
187 var bytelen = st - pos
188 rets += new FlatString.with_infos(sits, bytelen, pos, st - 1)
189 pos = st
190 remsp -= bytelen
191 end
192 if rets isa Concat then return rets.balance
193 return rets
194 end
195
196 # Read all the stream until the eof.
197 #
198 # The content of the file is returned verbatim.
199 fun read_all_bytes: Bytes
200 do
201 if last_error != null then return new Bytes.empty
202 var s = new Bytes.empty
203 while not eof do
204 var c = read_byte
205 if c != null then s.add(c)
206 end
207 return s
208 end
209
210 # Read a string until the end of the line and append it to `s`.
211 #
212 # Unlike `read_line` and other related methods,
213 # the line terminator '\n', if any, is preserved in each line.
214 # Use the method `Text::chomp` to safely remove it.
215 #
216 # ~~~
217 # var txt = "Hello\n\nWorld\n"
218 # var i = new StringReader(txt)
219 # var b = new FlatBuffer
220 # i.append_line_to(b)
221 # assert b == "Hello\n"
222 # i.append_line_to(b)
223 # assert b == "Hello\n\n"
224 # i.append_line_to(b)
225 # assert b == txt
226 # assert i.eof
227 # ~~~
228 #
229 # If `\n` is not present at the end of the result, it means that
230 # a non-eol terminated last line was returned.
231 #
232 # ~~~
233 # var i2 = new StringReader("hello")
234 # assert not i2.eof
235 # var b2 = new FlatBuffer
236 # i2.append_line_to(b2)
237 # assert b2 == "hello"
238 # assert i2.eof
239 # ~~~
240 #
241 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
242 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
243 fun append_line_to(s: Buffer)
244 do
245 if last_error != null then return
246 loop
247 var x = read_char
248 if x == null then
249 if eof then return
250 else
251 s.chars.push(x)
252 if x == '\n' then return
253 end
254 end
255 end
256
257 # Is there something to read.
258 # This function returns 'false' if there is something to read.
259 fun eof: Bool is abstract
260
261 # Read the next sequence of non whitespace characters.
262 #
263 # Leading whitespace characters are skipped.
264 # The first whitespace character that follows the result is consumed.
265 #
266 # An empty string is returned if the end of the file or an error is encounter.
267 #
268 # ~~~
269 # var w = new StringReader(" Hello, \n\t World!")
270 # assert w.read_word == "Hello,"
271 # assert w.read_char == '\n'.ascii
272 # assert w.read_word == "World!"
273 # assert w.read_word == ""
274 # ~~~
275 #
276 # `Char::is_whitespace` determines what is a whitespace.
277 fun read_word: String
278 do
279 var buf = new FlatBuffer
280 var c = read_nonwhitespace
281 if c != null then
282 buf.add(c)
283 while not eof do
284 c = read_char
285 if c == null then break
286 if c.is_whitespace then break
287 buf.add(c)
288 end
289 end
290 var res = buf.to_s
291 return res
292 end
293
294 # Skip whitespace characters (if any) then return the following non-whitespace character.
295 #
296 # Returns the code point of the character.
297 # Returns `null` on end of file or error.
298 #
299 # In fact, this method works like `read_char` except it skips whitespace.
300 #
301 # ~~~
302 # var w = new StringReader(" \nab\tc")
303 # assert w.read_nonwhitespace == 'a'
304 # assert w.read_nonwhitespace == 'b'
305 # assert w.read_nonwhitespace == 'c'
306 # assert w.read_nonwhitespace == null
307 # ~~~
308 #
309 # `Char::is_whitespace` determines what is a whitespace.
310 fun read_nonwhitespace: nullable Char
311 do
312 var c: nullable Char = null
313 while not eof do
314 c = read_char
315 if c == null or not c.is_whitespace then break
316 end
317 return c
318 end
319 end
320
321 # Iterator returned by `Reader::each_line`.
322 # See the aforementioned method for details.
323 class LineIterator
324 super Iterator[String]
325
326 # The original stream
327 var stream: Reader
328
329 redef fun is_ok
330 do
331 var res = not stream.eof
332 if not res and close_on_finish then stream.close
333 return res
334 end
335
336 redef fun item
337 do
338 var line = self.line
339 if line == null then
340 line = stream.read_line
341 end
342 self.line = line
343 return line
344 end
345
346 # The last line read (cache)
347 private var line: nullable String = null
348
349 redef fun next
350 do
351 # force the read
352 if line == null then item
353 # drop the line
354 line = null
355 end
356
357 # Close the stream when the stream is at the EOF.
358 #
359 # Default is false.
360 var close_on_finish = false is writable
361
362 redef fun finish
363 do
364 if close_on_finish then stream.close
365 end
366 end
367
368 # `Reader` capable of declaring if readable without blocking
369 abstract class PollableReader
370 super Reader
371
372 # Is there something to read? (without blocking)
373 fun poll_in: Bool is abstract
374
375 end
376
377 # A `Stream` that can be written to
378 abstract class Writer
379 super Stream
380
381 # Writes bytes from `s`
382 fun write_bytes(s: Bytes) is abstract
383
384 # write a string
385 fun write(s: Text) is abstract
386
387 # Write a single byte
388 fun write_byte(value: Byte) is abstract
389
390 # Can the stream be used to write
391 fun is_writable: Bool is abstract
392 end
393
394 # Things that can be efficienlty written to a `Writer`
395 #
396 # The point of this interface is to allow the instance to be efficiently
397 # written into a `Writer`.
398 #
399 # Ready-to-save documents usually provide this interface.
400 interface Writable
401 # Write itself to a `stream`
402 # The specific logic it let to the concrete subclasses
403 fun write_to(stream: Writer) is abstract
404
405 # Like `write_to` but return a new String (may be quite large)
406 #
407 # This funtionality is anectodical, since the point
408 # of streamable object to to be efficienlty written to a
409 # stream without having to allocate and concatenate strings
410 fun write_to_string: String
411 do
412 var stream = new StringWriter
413 write_to(stream)
414 return stream.to_s
415 end
416 end
417
418 redef class Text
419 super Writable
420 redef fun write_to(stream) do stream.write(self)
421 end
422
423 # Input streams with a buffered input for efficiency purposes
424 abstract class BufferedReader
425 super Reader
426 redef fun read_char
427 do
428 if last_error != null then return null
429 if eof then
430 last_error = new IOError("Stream has reached eof")
431 return null
432 end
433 # TODO: Fix when supporting UTF-8
434 var c = _buffer[_buffer_pos].to_i.ascii
435 _buffer_pos += 1
436 return c
437 end
438
439 redef fun read_byte
440 do
441 if last_error != null then return null
442 if eof then
443 last_error = new IOError("Stream has reached eof")
444 return null
445 end
446 var c = _buffer[_buffer_pos]
447 _buffer_pos += 1
448 return c
449 end
450
451 fun buffer_reset do
452 _buffer_length = 0
453 _buffer_pos = 0
454 end
455
456 # Peeks up to `n` bytes in the buffer
457 #
458 # The operation does not consume the buffer
459 #
460 # ~~~nitish
461 # var x = new FileReader.open("File.txt")
462 # assert x.peek(5) == x.read(5)
463 # ~~~
464 fun peek(i: Int): Bytes do
465 if eof then return new Bytes.empty
466 var remsp = _buffer_length - _buffer_pos
467 if i <= remsp then
468 var bf = new Bytes.with_capacity(i)
469 bf.append_ns_from(_buffer, i, _buffer_pos)
470 return bf
471 end
472 var bf = new Bytes.with_capacity(i)
473 bf.append_ns_from(_buffer, remsp, _buffer_pos)
474 _buffer_pos = _buffer_length
475 read_intern(i - bf.length, bf)
476 remsp = _buffer_length - _buffer_pos
477 var full_len = bf.length + remsp
478 if full_len > _buffer_capacity then
479 var c = _buffer_capacity
480 while c < full_len do c = c * 2 + 2
481 _buffer_capacity = c
482 end
483 var nns = new NativeString(_buffer_capacity)
484 bf.items.copy_to(nns, bf.length, 0, 0)
485 _buffer.copy_to(nns, remsp, _buffer_pos, bf.length)
486 _buffer = nns
487 _buffer_pos = 0
488 _buffer_length = full_len
489 return bf
490 end
491
492 redef fun read_bytes(i)
493 do
494 if last_error != null then return new Bytes.empty
495 var buf = new Bytes.with_capacity(i)
496 read_intern(i, buf)
497 return buf
498 end
499
500 # Fills `buf` with at most `i` bytes read from `self`
501 private fun read_intern(i: Int, buf: Bytes): Int do
502 if eof then return 0
503 var p = _buffer_pos
504 var bufsp = _buffer_length - p
505 if bufsp >= i then
506 _buffer_pos += i
507 buf.append_ns_from(_buffer, i, p)
508 return i
509 end
510 _buffer_pos = _buffer_length
511 var readln = _buffer_length - p
512 buf.append_ns_from(_buffer, readln, p)
513 var rd = read_intern(i - readln, buf)
514 return rd + readln
515 end
516
517 redef fun read_all_bytes
518 do
519 if last_error != null then return new Bytes.empty
520 var s = new Bytes.with_capacity(10)
521 while not eof do
522 var j = _buffer_pos
523 var k = _buffer_length
524 while j < k do
525 s.add(_buffer[j])
526 j += 1
527 end
528 _buffer_pos = j
529 fill_buffer
530 end
531 return s
532 end
533
534 redef fun append_line_to(s)
535 do
536 loop
537 # First phase: look for a '\n'
538 var i = _buffer_pos
539 while i < _buffer_length and _buffer[i] != 0xAu8 do
540 i += 1
541 end
542
543 var eol
544 if i < _buffer_length then
545 assert _buffer[i] == 0xAu8
546 i += 1
547 eol = true
548 else
549 eol = false
550 end
551
552 # if there is something to append
553 if i > _buffer_pos then
554 # Enlarge the string (if needed)
555 s.enlarge(s.bytelen + i - _buffer_pos)
556
557 # Copy from the buffer to the string
558 var j = _buffer_pos
559 while j < i do
560 s.bytes.add(_buffer[j])
561 j += 1
562 end
563 _buffer_pos = i
564 else
565 assert end_reached
566 return
567 end
568
569 if eol then
570 # so \n is found
571 return
572 else
573 # so \n is not found
574 if end_reached then return
575 fill_buffer
576 end
577 end
578 end
579
580 redef fun eof
581 do
582 if _buffer_pos < _buffer_length then return false
583 if end_reached then return true
584 fill_buffer
585 return _buffer_pos >= _buffer_length and end_reached
586 end
587
588 # The buffer
589 private var buffer: NativeString = new NativeString(0)
590
591 # The current position in the buffer
592 private var buffer_pos = 0
593
594 # Length of the current buffer (i.e. nuber of bytes in the buffer)
595 private var buffer_length = 0
596
597 # Capacity of the buffer
598 private var buffer_capacity = 0
599
600 # Fill the buffer
601 protected fun fill_buffer is abstract
602
603 # Has the last fill_buffer reached the end
604 protected fun end_reached: Bool is abstract
605
606 # Allocate a `_buffer` for a given `capacity`.
607 protected fun prepare_buffer(capacity: Int)
608 do
609 _buffer = new NativeString(capacity)
610 _buffer_pos = 0 # need to read
611 _buffer_length = 0
612 _buffer_capacity = capacity
613 end
614 end
615
616 # A `Stream` that can be written to and read from
617 abstract class Duplex
618 super Reader
619 super Writer
620 end
621
622 # `Stream` that can be used to write to a `String`
623 #
624 # Mainly used for compatibility with Writer type and tests.
625 class StringWriter
626 super Writer
627
628 private var content = new Array[String]
629 redef fun to_s do return content.plain_to_s
630 redef fun is_writable do return not closed
631
632 redef fun write_bytes(b) do
633 content.add(b.to_s)
634 end
635
636 redef fun write(str)
637 do
638 assert not closed
639 content.add(str.to_s)
640 end
641
642 # Is the stream closed?
643 protected var closed = false
644
645 redef fun close do closed = true
646 end
647
648 # `Stream` used to read from a `String`
649 #
650 # Mainly used for compatibility with Reader type and tests.
651 class StringReader
652 super Reader
653
654 # The string to read from.
655 var source: String
656
657 # The current position in the string (bytewise).
658 private var cursor: Int = 0
659
660 redef fun read_char do
661 if cursor < source.length then
662 # Fix when supporting UTF-8
663 var c = source[cursor]
664 cursor += 1
665 return c
666 else
667 return null
668 end
669 end
670
671 redef fun read_byte do
672 if cursor < source.length then
673 var c = source.bytes[cursor]
674 cursor += 1
675 return c
676 else
677 return null
678 end
679 end
680
681 redef fun close do
682 source = ""
683 end
684
685 redef fun read_all_bytes do
686 var nslen = source.length - cursor
687 var nns = new NativeString(nslen)
688 source.copy_to_native(nns, nslen, cursor, 0)
689 return new Bytes(nns, nslen, nslen)
690 end
691
692 redef fun eof do return cursor >= source.bytelen
693 end