parser: ComputeProdLocationVisitor also compute *_looses tokens
[nit.git] / lib / standard / stream.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # This file is free software, which comes along with NIT. This software is
4 # distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
5 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
6 # PARTICULAR PURPOSE. You can modify it is you want, provided this header
7 # is kept unaltered, and a notification of the changes is added.
8 # You are allowed to redistribute it and sell it, alone or is a part of
9 # another product.
10
11 # Input and output streams of characters
12 module stream
13
14 intrude import ropes
15 import error
16
17 in "C" `{
18 #include <unistd.h>
19 #include <string.h>
20 #include <signal.h>
21 `}
22
23 # Any kind of error that could be produced by an operation on Streams
24 class IOError
25 super Error
26 end
27
28 # Any kind of stream to read/write/both to or from a source
29 abstract class Stream
30 # Error produced by the file stream
31 #
32 # var ifs = new FileReader.open("donotmakethisfile.binx")
33 # ifs.read_all
34 # ifs.close
35 # assert ifs.last_error != null
36 var last_error: nullable IOError = null
37
38 # close the stream
39 fun close is abstract
40 end
41
42 # A `Stream` that can be read from
43 abstract class Reader
44 super Stream
45 # Read a character. Return its ASCII value, -1 on EOF or timeout
46 fun read_char: Int is abstract
47
48 # Read at most i bytes
49 fun read(i: Int): String
50 do
51 if last_error != null then return ""
52 var s = new FlatBuffer.with_capacity(i)
53 while i > 0 and not eof do
54 var c = read_char
55 if c >= 0 then
56 s.add(c.ascii)
57 i -= 1
58 end
59 end
60 return s.to_s
61 end
62
63 # Read a string until the end of the line.
64 #
65 # The line terminator '\n' and '\r\n', if any, is removed in each line.
66 #
67 # ~~~
68 # var txt = "Hello\n\nWorld\n"
69 # var i = new StringReader(txt)
70 # assert i.read_line == "Hello"
71 # assert i.read_line == ""
72 # assert i.read_line == "World"
73 # assert i.eof
74 # ~~~
75 #
76 # Only LINE FEED (`\n`), CARRIAGE RETURN & LINE FEED (`\r\n`), and
77 # the end or file (EOF) is considered to delimit the end of lines.
78 # CARRIAGE RETURN (`\r`) alone is not used for the end of line.
79 #
80 # ~~~
81 # var txt2 = "Hello\r\n\n\rWorld"
82 # var i2 = new StringReader(txt2)
83 # assert i2.read_line == "Hello"
84 # assert i2.read_line == ""
85 # assert i2.read_line == "\rWorld"
86 # assert i2.eof
87 # ~~~
88 #
89 # NOTE: Use `append_line_to` if the line terminator needs to be preserved.
90 fun read_line: String
91 do
92 if last_error != null then return ""
93 if eof then return ""
94 var s = new FlatBuffer
95 append_line_to(s)
96 return s.to_s.chomp
97 end
98
99 # Read all the lines until the eof.
100 #
101 # The line terminator '\n' and `\r\n` is removed in each line,
102 #
103 # ~~~
104 # var txt = "Hello\n\nWorld\n"
105 # var i = new StringReader(txt)
106 # assert i.read_lines == ["Hello", "", "World"]
107 # ~~~
108 #
109 # This method is more efficient that splitting
110 # the result of `read_all`.
111 #
112 # NOTE: SEE `read_line` for details.
113 fun read_lines: Array[String]
114 do
115 var res = new Array[String]
116 while not eof do
117 res.add read_line
118 end
119 return res
120 end
121
122 # Return an iterator that read each line.
123 #
124 # The line terminator '\n' and `\r\n` is removed in each line,
125 # The line are read with `read_line`. See this method for details.
126 #
127 # ~~~
128 # var txt = "Hello\n\nWorld\n"
129 # var i = new StringReader(txt)
130 # assert i.each_line.to_a == ["Hello", "", "World"]
131 # ~~~
132 #
133 # Unlike `read_lines` that read all lines at the call, `each_line` is lazy.
134 # Therefore, the stream should no be closed until the end of the stream.
135 #
136 # ~~~
137 # i = new StringReader(txt)
138 # var el = i.each_line
139 #
140 # assert el.item == "Hello"
141 # el.next
142 # assert el.item == ""
143 # el.next
144 #
145 # i.close
146 #
147 # assert not el.is_ok
148 # # closed before "world" is read
149 # ~~~
150 fun each_line: LineIterator do return new LineIterator(self)
151
152 # Read all the stream until the eof.
153 #
154 # The content of the file is returned verbatim.
155 #
156 # ~~~
157 # var txt = "Hello\n\nWorld\n"
158 # var i = new StringReader(txt)
159 # assert i.read_all == txt
160 # ~~~
161 fun read_all: String
162 do
163 if last_error != null then return ""
164 var s = new FlatBuffer
165 while not eof do
166 var c = read_char
167 if c >= 0 then s.add(c.ascii)
168 end
169 return s.to_s
170 end
171
172 # Read a string until the end of the line and append it to `s`.
173 #
174 # Unlike `read_line` and other related methods,
175 # the line terminator '\n', if any, is preserved in each line.
176 # Use the method `Text::chomp` to safely remove it.
177 #
178 # ~~~
179 # var txt = "Hello\n\nWorld\n"
180 # var i = new StringReader(txt)
181 # var b = new FlatBuffer
182 # i.append_line_to(b)
183 # assert b == "Hello\n"
184 # i.append_line_to(b)
185 # assert b == "Hello\n\n"
186 # i.append_line_to(b)
187 # assert b == txt
188 # assert i.eof
189 # ~~~
190 #
191 # If `\n` is not present at the end of the result, it means that
192 # a non-eol terminated last line was returned.
193 #
194 # ~~~
195 # var i2 = new StringReader("hello")
196 # assert not i2.eof
197 # var b2 = new FlatBuffer
198 # i2.append_line_to(b2)
199 # assert b2 == "hello"
200 # assert i2.eof
201 # ~~~
202 #
203 # NOTE: The single character LINE FEED (`\n`) delimits the end of lines.
204 # Therefore CARRIAGE RETURN & LINE FEED (`\r\n`) is also recognized.
205 fun append_line_to(s: Buffer)
206 do
207 if last_error != null then return
208 loop
209 var x = read_char
210 if x == -1 then
211 if eof then return
212 else
213 var c = x.ascii
214 s.chars.push(c)
215 if c == '\n' then return
216 end
217 end
218 end
219
220 # Is there something to read.
221 # This function returns 'false' if there is something to read.
222 fun eof: Bool is abstract
223
224 # Read the next sequence of non whitespace characters.
225 #
226 # Leading whitespace characters are skipped.
227 # The first whitespace character that follows the result is consumed.
228 #
229 # An empty string is returned if the end of the file or an error is encounter.
230 #
231 # ~~~
232 # var w = new StringReader(" Hello, \n\t World!")
233 # assert w.read_word == "Hello,"
234 # assert w.read_char == '\n'.ascii
235 # assert w.read_word == "World!"
236 # assert w.read_word == ""
237 # ~~~
238 #
239 # `Char::is_whitespace` determines what is a whitespace.
240 fun read_word: String
241 do
242 var buf = new FlatBuffer
243 var c = read_nonwhitespace
244 if c > 0 then
245 buf.add(c.ascii)
246 while not eof do
247 c = read_char
248 if c < 0 then break
249 var a = c.ascii
250 if a.is_whitespace then break
251 buf.add(a)
252 end
253 end
254 var res = buf.to_s
255 return res
256 end
257
258 # Skip whitespace characters (if any) then return the following non-whitespace character.
259 #
260 # Returns the code point of the character.
261 # Return -1 on end of file or error.
262 #
263 # In fact, this method works like `read_char` except it skips whitespace.
264 #
265 # ~~~
266 # var w = new StringReader(" \nab\tc")
267 # assert w.read_nonwhitespace == 'a'.ascii
268 # assert w.read_nonwhitespace == 'b'.ascii
269 # assert w.read_nonwhitespace == 'c'.ascii
270 # assert w.read_nonwhitespace == -1
271 # ~~~
272 #
273 # `Char::is_whitespace` determines what is a whitespace.
274 fun read_nonwhitespace: Int
275 do
276 var c = -1
277 while not eof do
278 c = read_char
279 if c < 0 or not c.ascii.is_whitespace then break
280 end
281 return c
282 end
283 end
284
285 # Iterator returned by `Reader::each_line`.
286 # See the aforementioned method for details.
287 class LineIterator
288 super Iterator[String]
289
290 # The original stream
291 var stream: Reader
292
293 redef fun is_ok
294 do
295 var res = not stream.eof
296 if not res and close_on_finish then stream.close
297 return res
298 end
299
300 redef fun item
301 do
302 var line = self.line
303 if line == null then
304 line = stream.read_line
305 end
306 self.line = line
307 return line
308 end
309
310 # The last line read (cache)
311 private var line: nullable String = null
312
313 redef fun next
314 do
315 # force the read
316 if line == null then item
317 # drop the line
318 line = null
319 end
320
321 # Close the stream when the stream is at the EOF.
322 #
323 # Default is false.
324 var close_on_finish = false is writable
325
326 redef fun finish
327 do
328 if close_on_finish then stream.close
329 end
330 end
331
332 # `Reader` capable of declaring if readable without blocking
333 abstract class PollableReader
334 super Reader
335
336 # Is there something to read? (without blocking)
337 fun poll_in: Bool is abstract
338
339 end
340
341 # A `Stream` that can be written to
342 abstract class Writer
343 super Stream
344 # write a string
345 fun write(s: Text) is abstract
346
347 # Can the stream be used to write
348 fun is_writable: Bool is abstract
349 end
350
351 # Things that can be efficienlty written to a `Writer`
352 #
353 # The point of this interface is to allow the instance to be efficiently
354 # written into a `Writer`.
355 #
356 # Ready-to-save documents usually provide this interface.
357 interface Writable
358 # Write itself to a `stream`
359 # The specific logic it let to the concrete subclasses
360 fun write_to(stream: Writer) is abstract
361
362 # Like `write_to` but return a new String (may be quite large)
363 #
364 # This funtionnality is anectodical, since the point
365 # of streamable object to to be efficienlty written to a
366 # stream without having to allocate and concatenate strings
367 fun write_to_string: String
368 do
369 var stream = new StringWriter
370 write_to(stream)
371 return stream.to_s
372 end
373 end
374
375 redef class Text
376 super Writable
377 redef fun write_to(stream) do stream.write(self)
378 end
379
380 # Input streams with a buffered input for efficiency purposes
381 abstract class BufferedReader
382 super Reader
383 redef fun read_char
384 do
385 if last_error != null then return -1
386 if eof then
387 last_error = new IOError("Stream has reached eof")
388 return -1
389 end
390 var c = _buffer.chars[_buffer_pos]
391 _buffer_pos += 1
392 return c.ascii
393 end
394
395 # Peeks up to `n` bytes in the buffer, returns an empty string on EOF
396 #
397 # The operation does not consume the buffer
398 #
399 # ~~~nitish
400 # var x = new FileReader("File.txt")
401 # assert x.peek(5) == x.read(5)
402 # ~~~
403 fun peek(i: Int): String do
404 if eof then return ""
405 var b = new FlatBuffer.with_capacity(i)
406 while i > 0 and not eof do
407 b.add _buffer[_buffer_pos]
408 _buffer_pos += 1
409 i -= 1
410 end
411 var nbuflen = b.length + (_buffer.length - _buffer_pos)
412 var nbuf = new FlatBuffer.with_capacity(nbuflen)
413 nbuf.append(b)
414 while _buffer_pos < _buffer.length do
415 nbuf.add(_buffer[_buffer_pos])
416 _buffer_pos += 1
417 end
418 _buffer_pos = 0
419 _buffer = nbuf
420 return b.to_s
421 end
422
423 redef fun read(i)
424 do
425 if last_error != null then return ""
426 if eof then return ""
427 var p = _buffer_pos
428 var bufsp = _buffer.length - p
429 if bufsp >= i then
430 _buffer_pos += i
431 return _buffer.substring(p, i).to_s
432 end
433 _buffer_pos = _buffer.length
434 var readln = _buffer.length - p
435 var s = _buffer.substring(p, readln).to_s
436 fill_buffer
437 return s + read(i - readln)
438 end
439
440 redef fun read_all
441 do
442 if last_error != null then return ""
443 var s = new FlatBuffer
444 while not eof do
445 var j = _buffer_pos
446 var k = _buffer.length
447 while j < k do
448 s.add(_buffer[j])
449 j += 1
450 end
451 _buffer_pos = j
452 fill_buffer
453 end
454 return s.to_s
455 end
456
457 redef fun append_line_to(s)
458 do
459 loop
460 # First phase: look for a '\n'
461 var i = _buffer_pos
462 while i < _buffer.length and _buffer.chars[i] != '\n' do i += 1
463
464 var eol
465 if i < _buffer.length then
466 assert _buffer.chars[i] == '\n'
467 i += 1
468 eol = true
469 else
470 eol = false
471 end
472
473 # if there is something to append
474 if i > _buffer_pos then
475 # Enlarge the string (if needed)
476 s.enlarge(s.length + i - _buffer_pos)
477
478 # Copy from the buffer to the string
479 var j = _buffer_pos
480 while j < i do
481 s.add(_buffer.chars[j])
482 j += 1
483 end
484 _buffer_pos = i
485 else
486 assert end_reached
487 return
488 end
489
490 if eol then
491 # so \n is found
492 return
493 else
494 # so \n is not found
495 if end_reached then return
496 fill_buffer
497 end
498 end
499 end
500
501 redef fun eof
502 do
503 if _buffer_pos < _buffer.length then return false
504 if end_reached then return true
505 fill_buffer
506 return _buffer_pos >= _buffer.length and end_reached
507 end
508
509 # The buffer
510 private var buffer: nullable FlatBuffer = null
511
512 # The current position in the buffer
513 private var buffer_pos: Int = 0
514
515 # Fill the buffer
516 protected fun fill_buffer is abstract
517
518 # Is the last fill_buffer reach the end
519 protected fun end_reached: Bool is abstract
520
521 # Allocate a `_buffer` for a given `capacity`.
522 protected fun prepare_buffer(capacity: Int)
523 do
524 _buffer = new FlatBuffer.with_capacity(capacity)
525 _buffer_pos = 0 # need to read
526 end
527 end
528
529 # A `Stream` that can be written to and read from
530 abstract class Duplex
531 super Reader
532 super Writer
533 end
534
535 # `Stream` that can be used to write to a `String`
536 #
537 # Mainly used for compatibility with Writer type and tests.
538 class StringWriter
539 super Writer
540
541 private var content = new Array[String]
542 redef fun to_s do return content.to_s
543 redef fun is_writable do return not closed
544 redef fun write(str)
545 do
546 assert not closed
547 content.add(str.to_s)
548 end
549
550 # Is the stream closed?
551 protected var closed = false
552
553 redef fun close do closed = true
554 end
555
556 # `Stream` used to read from a `String`
557 #
558 # Mainly used for compatibility with Reader type and tests.
559 class StringReader
560 super Reader
561
562 # The string to read from.
563 var source: String
564
565 # The current position in the string.
566 private var cursor: Int = 0
567
568 redef fun read_char do
569 if cursor < source.length then
570 var c = source[cursor].ascii
571
572 cursor += 1
573 return c
574 else
575 return -1
576 end
577 end
578
579 redef fun close do
580 source = ""
581 end
582
583 redef fun read_all do
584 var c = cursor
585 cursor = source.length
586 if c == 0 then return source
587 return source.substring_from(c)
588 end
589
590 redef fun eof do return cursor >= source.length
591 end