e0d3a676d2c110ea7405b05b232d700744dfc238
[nit.git] / lib / standard / stream.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Input and output streams of characters
12 module stream
13
14 intrude import ropes
15 import error
16
17 in "C" `{
18 #include <unistd.h>
19 #include <string.h>
20 #include <signal.h>
21 `}
22
23 # Any kind of error that could be produced by an operation on Streams
24 class IOError
25 super Error
26 end
27
28 # Any kind of stream to read/write/both to or from a source
29 abstract class Stream
30 # Error produced by the file stream
31 #
32 # var ifs = new FileReader.open("donotmakethisfile.binx")
33 # ifs.read_all
34 # ifs.close
35 # assert ifs.last_error != null
36 var last_error: nullable IOError = null
37
38 # close the stream
39 fun close is abstract
40 end
41
42 # A `Stream` that can be read from
43 abstract class Reader
44 super Stream
45 # Read a character. Return its ASCII value, -1 on EOF or timeout
46 fun read_char: Int is abstract
47
48 # Read at most i bytes
49 fun read(i: Int): String
50 do
51 if last_error != null then return ""
52 var s = new FlatBuffer.with_capacity(i)
53 while i > 0 and not eof do
54 var c = read_char
55 if c >= 0 then
56 s.add(c.ascii)
57 i -= 1
58 end
59 end
60 return s.to_s
61 end
62
63 # Read a string until the end of the line.
64 #
65 # The line terminator '\n' and '\r\n', if any, is removed in each line.
66 #
67 # ~~~
68 # var txt = "Hello\n\nWorld\n"
69 # var i = new StringReader(txt)
70 # assert i.read_line == "Hello"
71 # assert i.read_line == ""
72 # assert i.read_line == "World"
73 # assert i.eof
74 # ~~~
75 #
76 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
77 # the end or file (EOF) is considered to delimit the end of lines.
78 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
79 #
80 # ~~~
81 # var txt2 = "Hello\r\n\n\rWorld"
82 # var i2 = new StringReader(txt2)
83 # assert i2.read_line == "Hello"
84 # assert i2.read_line == ""
85 # assert i2.read_line == "\rWorld"
86 # assert i2.eof
87 # ~~~
88 #
89 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
90 fun read_line: String
91 do
92 if last_error != null then return ""
93 if eof then return ""
94 var s = new FlatBuffer
95 append_line_to(s)
96 return s.to_s.chomp
97 end
98
99 # Read all the lines until the eof.
100 #
101 # The line terminator '\n' and `\r\n` is removed in each line,
102 #
103 # ~~~
104 # var txt = "Hello\n\nWorld\n"
105 # var i = new StringReader(txt)
106 # assert i.read_lines == ["Hello", "", "World"]
107 # ~~~
108 #
109 # This method is more efficient that splitting
110 # the result of `read_all`.
111 #
112 # NOTE: SEE `read_line` for details.
113 fun read_lines: Array[String]
114 do
115 var res = new Array[String]
116 while not eof do
117 res.add read_line
118 end
119 return res
120 end
121
122 # Return an iterator that read each line.
123 #
124 # The line terminator '\n' and `\r\n` is removed in each line,
125 # The line are read with `read_line`. See this method for details.
126 #
127 # ~~~
128 # var txt = "Hello\n\nWorld\n"
129 # var i = new StringReader(txt)
130 # assert i.each_line.to_a == ["Hello", "", "World"]
131 # ~~~
132 #
133 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
134 # Therefore, the stream should no be closed until the end of the stream.
135 #
136 # ~~~
137 # i = new StringReader(txt)
138 # var el = i.each_line
139 #
140 # assert el.item == "Hello"
141 # el.next
142 # assert el.item == ""
143 # el.next
144 #
145 # i.close
146 #
147 # assert not el.is_ok
148 # # closed before "world" is read
149 # ~~~
150 fun each_line: LineIterator do return new LineIterator(self)
151
152 # Read all the stream until the eof.
153 #
154 # The content of the file is returned verbatim.
155 #
156 # ~~~
157 # var txt = "Hello\n\nWorld\n"
158 # var i = new StringReader(txt)
159 # assert i.read_all == txt
160 # ~~~
161 fun read_all: String
162 do
163 if last_error != null then return ""
164 var s = new FlatBuffer
165 while not eof do
166 var c = read_char
167 if c >= 0 then s.add(c.ascii)
168 end
169 return s.to_s
170 end
171
172 # Read a string until the end of the line and append it to `s`.
173 #
174 # Unlike `read_line` and other related methods,
175 # the line terminator '\n', if any, is preserved in each line.
176 # Use the method `Text::chomp` to safely remove it.
177 #
178 # ~~~
179 # var txt = "Hello\n\nWorld\n"
180 # var i = new StringReader(txt)
181 # var b = new FlatBuffer
182 # i.append_line_to(b)
183 # assert b == "Hello\n"
184 # i.append_line_to(b)
185 # assert b == "Hello\n\n"
186 # i.append_line_to(b)
187 # assert b == txt
188 # assert i.eof
189 # ~~~
190 #
191 # If `\n` is not present at the end of the result, it means that
192 # a non-eol terminated last line was returned.
193 #
194 # ~~~
195 # var i2 = new StringReader("hello")
196 # assert not i2.eof
197 # var b2 = new FlatBuffer
198 # i2.append_line_to(b2)
199 # assert b2 == "hello"
200 # assert i2.eof
201 # ~~~
202 #
203 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
204 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
205 fun append_line_to(s: Buffer)
206 do
207 if last_error != null then return
208 loop
209 var x = read_char
210 if x == -1 then
211 if eof then return
212 else
213 var c = x.ascii
214 s.chars.push(c)
215 if c == '\n' then return
216 end
217 end
218 end
219
220 # Is there something to read.
221 # This function returns 'false' if there is something to read.
222 fun eof: Bool is abstract
223
224 # Read the next sequence of non whitespace characters.
225 #
226 # Leading whitespace characters are skipped.
227 # The first whitespace character that follows the result is consumed.
228 #
229 # An empty string is returned if the end of the file or an error is encounter.
230 #
231 # ~~~
232 # var w = new StringReader(" Hello, \n\t World!")
233 # assert w.read_word == "Hello,"
234 # assert w.read_char == '\n'.ascii
235 # assert w.read_word == "World!"
236 # assert w.read_word == ""
237 # ~~~
238 #
239 # `Char::is_whitespace` determines what is a whitespace.
240 fun read_word: String
241 do
242 var buf = new FlatBuffer
243 var c = read_nonwhitespace
244 if c > 0 then
245 buf.add(c.ascii)
246 while not eof do
247 c = read_char
248 if c < 0 then break
249 var a = c.ascii
250 if a.is_whitespace then break
251 buf.add(a)
252 end
253 end
254 var res = buf.to_s
255 return res
256 end
257
258 # Skip whitespace characters (if any) then return the following non-whitespace character.
259 #
260 # Returns the code point of the character.
261 # Return -1 on end of file or error.
262 #
263 # In fact, this method works like `read_char` except it skips whitespace.
264 #
265 # ~~~
266 # var w = new StringReader(" \nab\tc")
267 # assert w.read_nonwhitespace == 'a'.ascii
268 # assert w.read_nonwhitespace == 'b'.ascii
269 # assert w.read_nonwhitespace == 'c'.ascii
270 # assert w.read_nonwhitespace == -1
271 # ~~~
272 #
273 # `Char::is_whitespace` determines what is a whitespace.
274 fun read_nonwhitespace: Int
275 do
276 var c = -1
277 while not eof do
278 c = read_char
279 if c < 0 or not c.ascii.is_whitespace then break
280 end
281 return c
282 end
283 end
284
285 # Iterator returned by `Reader::each_line`.
286 # See the aforementioned method for details.
287 class LineIterator
288 super Iterator[String]
289
290 # The original stream
291 var stream: Reader
292
293 redef fun is_ok
294 do
295 var res = not stream.eof
296 if not res and close_on_finish then stream.close
297 return res
298 end
299
300 redef fun item
301 do
302 var line = self.line
303 if line == null then
304 line = stream.read_line
305 end
306 self.line = line
307 return line
308 end
309
310 # The last line read (cache)
311 private var line: nullable String = null
312
313 redef fun next
314 do
315 # force the read
316 if line == null then item
317 # drop the line
318 line = null
319 end
320
321 # Close the stream when the stream is at the EOF.
322 #
323 # Default is false.
324 var close_on_finish = false is writable
325
326 redef fun finish
327 do
328 if close_on_finish then stream.close
329 end
330 end
331
332 # `Reader` capable of declaring if readable without blocking
333 abstract class PollableReader
334 super Reader
335
336 # Is there something to read? (without blocking)
337 fun poll_in: Bool is abstract
338
339 end
340
341 # A `Stream` that can be written to
342 abstract class Writer
343 super Stream
344 # write a string
345 fun write(s: Text) is abstract
346
347 # Can the stream be used to write
348 fun is_writable: Bool is abstract
349 end
350
351 # Things that can be efficienlty written to a `Writer`
352 #
353 # The point of this interface is to allow the instance to be efficiently
354 # written into a `Writer`.
355 #
356 # Ready-to-save documents usually provide this interface.
357 interface Writable
358 # Write itself to a `stream`
359 # The specific logic it let to the concrete subclasses
360 fun write_to(stream: Writer) is abstract
361
362 # Like `write_to` but return a new String (may be quite large)
363 #
364 # This funtionnality is anectodical, since the point
365 # of streamable object to to be efficienlty written to a
366 # stream without having to allocate and concatenate strings
367 fun write_to_string: String
368 do
369 var stream = new StringWriter
370 write_to(stream)
371 return stream.to_s
372 end
373 end
374
375 redef class Text
376 super Writable
377 redef fun write_to(stream) do stream.write(self)
378 end
379
380 # Input streams with a buffered input for efficiency purposes
381 abstract class BufferedReader
382 super Reader
383 redef fun read_char
384 do
385 if last_error != null then return -1
386 if eof then
387 last_error = new IOError("Stream has reached eof")
388 return -1
389 end
390 var c = _buffer.chars[_buffer_pos]
391 _buffer_pos += 1
392 return c.ascii
393 end
394
395 redef fun read(i)
396 do
397 if last_error != null then return ""
398 if eof then return ""
399 var p = _buffer_pos
400 var bufsp = _buffer.length - p
401 if bufsp >= i then
402 _buffer_pos += i
403 return _buffer.substring(p, i).to_s
404 end
405 _buffer_pos = _buffer.length
406 var readln = _buffer.length - p
407 var s = _buffer.substring(p, readln).to_s
408 fill_buffer
409 return s + read(i - readln)
410 end
411
412 redef fun read_all
413 do
414 if last_error != null then return ""
415 var s = new FlatBuffer
416 while not eof do
417 var j = _buffer_pos
418 var k = _buffer.length
419 while j < k do
420 s.add(_buffer[j])
421 j += 1
422 end
423 _buffer_pos = j
424 fill_buffer
425 end
426 return s.to_s
427 end
428
429 redef fun append_line_to(s)
430 do
431 loop
432 # First phase: look for a '\n'
433 var i = _buffer_pos
434 while i < _buffer.length and _buffer.chars[i] != '\n' do i += 1
435
436 var eol
437 if i < _buffer.length then
438 assert _buffer.chars[i] == '\n'
439 i += 1
440 eol = true
441 else
442 eol = false
443 end
444
445 # if there is something to append
446 if i > _buffer_pos then
447 # Enlarge the string (if needed)
448 s.enlarge(s.length + i - _buffer_pos)
449
450 # Copy from the buffer to the string
451 var j = _buffer_pos
452 while j < i do
453 s.add(_buffer.chars[j])
454 j += 1
455 end
456 _buffer_pos = i
457 else
458 assert end_reached
459 return
460 end
461
462 if eol then
463 # so \n is found
464 return
465 else
466 # so \n is not found
467 if end_reached then return
468 fill_buffer
469 end
470 end
471 end
472
473 redef fun eof
474 do
475 if _buffer_pos < _buffer.length then return false
476 if end_reached then return true
477 fill_buffer
478 return _buffer_pos >= _buffer.length and end_reached
479 end
480
481 # The buffer
482 private var buffer: nullable FlatBuffer = null
483
484 # The current position in the buffer
485 private var buffer_pos: Int = 0
486
487 # Fill the buffer
488 protected fun fill_buffer is abstract
489
490 # Is the last fill_buffer reach the end
491 protected fun end_reached: Bool is abstract
492
493 # Allocate a `_buffer` for a given `capacity`.
494 protected fun prepare_buffer(capacity: Int)
495 do
496 _buffer = new FlatBuffer.with_capacity(capacity)
497 _buffer_pos = 0 # need to read
498 end
499 end
500
501 # A `Stream` that can be written to and read from
502 abstract class Duplex
503 super Reader
504 super Writer
505 end
506
507 # `Stream` that can be used to write to a `String`
508 #
509 # Mainly used for compatibility with Writer type and tests.
510 class StringWriter
511 super Writer
512
513 private var content = new Array[String]
514 redef fun to_s do return content.to_s
515 redef fun is_writable do return not closed
516 redef fun write(str)
517 do
518 assert not closed
519 content.add(str.to_s)
520 end
521
522 # Is the stream closed?
523 protected var closed = false
524
525 redef fun close do closed = true
526 end
527
528 # `Stream` used to read from a `String`
529 #
530 # Mainly used for compatibility with Reader type and tests.
531 class StringReader
532 super Reader
533
534 # The string to read from.
535 var source: String
536
537 # The current position in the string.
538 private var cursor: Int = 0
539
540 redef fun read_char do
541 if cursor < source.length then
542 var c = source[cursor].ascii
543
544 cursor += 1
545 return c
546 else
547 return -1
548 end
549 end
550
551 redef fun close do
552 source = ""
553 end
554
555 redef fun read_all do
556 var c = cursor
557 cursor = source.length
558 if c == 0 then return source
559 return source.substring_from(c)
560 end
561
562 redef fun eof do return cursor >= source.length
563 end