examples: annotate examples
[nit.git] / lib / nitcc_runtime.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Runtime library required by parsers and lexers generated by nitcc
16 module nitcc_runtime
17
18 import serialization
19
20 # A abstract parser engine generated by nitcc
21 abstract class Parser
22 # The list of tokens
23 # FIXME: provide something better, like a lexer?
24 var tokens = new CircularArray[NToken]
25
26 # Look at the next token
27 # Used by generated parsers
28 fun peek_token: NToken do return tokens.first
29
30 # Consume the next token
31 # Used by generated parsers
32 fun get_token: NToken do return tokens.shift
33
34 # Consume the next token and shift to the state `dest`.
35 # Used by generated parsers
36 fun shift(dest: LRState)
37 do
38 var t = get_token
39 #print "shift {t} -> {dest}"
40 node_stack.push t
41 state_stack.push state
42 state = dest
43 end
44
45 # After a reduction on `goto` go to the next state
46 # Used by generated parsers
47 fun goto(goto: LRGoto)
48 do
49 #print "reduce from {state} -> {prod}"
50 state.goto(self, goto)
51 end
52
53 # push a new state on the stack of states (
54 # Used by generated parsers
55 fun push(dest: LRState)
56 do
57 #print "push prod {prod} -> {dest}"
58 state_stack.push state
59 state = dest
60 end
61
62 # Pop and return the last node
63 # Also pop (and discard) the associated state
64 # Used by generated parsers
65 fun pop: Node
66 do
67 var res = node_stack.pop
68 state = state_stack.pop
69 return res
70 end
71
72 # Produce a parse error and stop the parsing
73 # Used by generated parsers
74 fun parse_error
75 do
76 var token = peek_token
77 #print "* parse error in state {state} on token {token}"
78 #print " expected: {state.error_msg}"
79 #print " node_stack={node_stack.join(", ")}"
80 #print " state_stack={state_stack.join(", ")}"
81 node_stack.push(token)
82 var error: NError
83 if token isa NLexerError then
84 error = token
85 else
86 error = new NParserError
87 error.position = token.position
88 error.text = token.text
89 error.token = token
90 end
91 error.error_tree.children.add_all(node_stack)
92 error.expected = state.error_msg
93 node_stack.clear
94 node_stack.add error
95 stop = true
96 end
97
98 # The stating state for parsing
99 # Used by generated parsers
100 protected fun start_state: LRState is abstract
101
102 # The current state
103 # Used by generated parsers
104 var state: LRState is noinit
105
106 init
107 do
108 state = start_state
109 end
110
111 # The stack of nodes
112 # Used by generated parsers
113 var node_stack = new Array[Node]
114
115 # The stack of states
116 # Used by generated parsers
117 var state_stack = new Array[LRState]
118
119 # Should the parser stop
120 # Used by generated parsers
121 var stop = true is writable
122
123 # Parse a full sequence of tokens and return a complete syntactic tree
124 fun parse: Node
125 do
126 state = start_state
127 state_stack.clear
128 node_stack.clear
129 stop = false
130 while not stop do
131 #print "* current state {state}"
132 #print " tokens={tokens.join(" ")}"
133 #print " node_stack={node_stack.join(" ")}"
134 #print " state_stack={state_stack.join(" ")}"
135 state.action(self)
136 end
137 #print "* over"
138 return node_stack.first
139 end
140 end
141
142 # A state in a parser LR automaton generated by nitcc
143 # Used by generated parsers
144 abstract class LRState
145 fun action(parser: Parser) is abstract
146 fun goto(parser: Parser, goto: LRGoto) is abstract
147 fun error_msg: String do return "FIXME"
148 end
149
150 # A concrete production in a parser LR automation generated by nitcc
151 # Used by generated parsers
152 abstract class LRGoto
153 end
154
155 ###
156
157 # A abstract lexer engine generated by nitcc
158 abstract class Lexer
159 # The input stream of characters
160 var stream: String
161
162 # The stating state for lexing
163 # Used by generated parsers
164 protected fun start_state: DFAState is abstract
165
166 # Lexize a stream of characters and return a sequence of tokens
167 fun lex: CircularArray[NToken]
168 do
169 var res = new CircularArray[NToken]
170 loop
171 var t = next_token
172 if t != null then res.add t
173 if t isa NEof or t isa NError then break
174 end
175 return res
176 end
177
178 # Cursor current position (in chars, starting from 0)
179 var pos_start = 0
180
181 # Cursor current line (starting from 1)
182 var line_start = 1
183
184 # Cursor current column (in chars, starting from 1)
185 var col_start = 1
186
187 # Move the cursor and return the next token.
188 #
189 # Returns a `NEof` and the end.
190 # Returns `null` if the token is ignored.
191 fun next_token: nullable NToken
192 do
193 var state = start_state
194 var pos = pos_start
195 var pos_end = pos_start - 1
196 var line = line_start
197 var line_end = line_start - 1
198 var col = col_start
199 var col_end = col_start - 1
200 var last_state: nullable DFAState = null
201 var text = stream
202 var length = text.length
203 loop
204 if state.is_accept then
205 pos_end = pos - 1
206 line_end = line
207 col_end = col
208 last_state = state
209 end
210 var c
211 var next
212 if pos >= length then
213 c = '\0'
214 next = null
215 else
216 c = text.chars[pos]
217 next = state.trans(c)
218 end
219 if next == null then
220 var token
221 if pos_start < length then
222 if last_state == null then
223 token = new NLexerError
224 var position = new Position(pos_start, pos, line_start, line, col_start, col)
225 token.position = position
226 token.text = text.substring(pos_start, pos-pos_start+1)
227 else if not last_state.is_ignored then
228 var position = new Position(pos_start, pos_end, line_start, line_end, col_start, col_end)
229 token = last_state.make_token(position, text)
230 else
231 token = null
232 end
233 else
234 token = new NEof
235 var position = new Position(pos, pos, line, line, col, col)
236 token.position = position
237 token.text = ""
238 end
239 pos_start = pos_end + 1
240 line_start = line_end
241 col_start = col_end
242
243 return token
244 end
245 state = next
246 pos += 1
247 col += 1
248 if c == '\n' then
249 line += 1
250 col = 1
251 end
252 end
253 end
254 end
255
256 # A state in a lexer automaton generated by nitcc
257 # Used by generated lexers
258 interface DFAState
259 fun is_accept: Bool do return false
260 fun trans(c: Char): nullable DFAState do return null
261 fun make_token(position: Position, source: String): nullable NToken is abstract
262 fun is_ignored: Bool do return false
263 end
264
265 ###
266
267 # A abstract visitor on syntactic trees generated by nitcc
268 abstract class Visitor
269 # The main entry point to visit a node `n`
270 # Should not be redefined
271 fun enter_visit(n: Node)
272 do
273 visit(n)
274 end
275
276 # The specific implementation of a visit
277 #
278 # Should be redefined by concrete visitors
279 #
280 # Should not be called directly (use `enter_visit` instead)
281 #
282 # By default, the visitor just rescursively visit the children of `n`
283 protected fun visit(n: Node)
284 do
285 n.visit_children(self)
286 end
287 end
288
289 # Print a node (using to_s) on a line and recustively each children indented (with two spaces)
290 class TreePrinterVisitor
291 super Visitor
292 var writer: Writer
293 private var indent = 0
294 redef fun visit(n)
295 do
296 for i in [0..indent[ do writer.write(" ")
297 writer.write(n.to_s)
298 writer.write("\n")
299 indent += 1
300 super
301 indent -= 1
302 end
303 end
304
305 # A position into a input stream
306 # Used to give position to tokens
307 class Position
308 serialize
309
310 var pos_start: Int
311 var pos_end: Int
312 var line_start: Int
313 var line_end: Int
314 var col_start: Int
315 var col_end: Int
316
317 redef fun to_s do return "{line_start}:{col_start}-{line_end}:{col_end}"
318
319 # Extract the content from the given source
320 fun extract(source: String): String
321 do
322 return source.substring(pos_start, pos_end-pos_start+1)
323 end
324
325 # Get the lines covered by `self` and underline the target columns.
326 #
327 # This is useful for pretty printing errors or debug the output
328 #
329 # ~~~
330 # var src = "var Foo = new Array[Int]"
331 # var pos = new Position(0,0, 1, 1, 5, 8)
332 #
333 # assert pos.underline(src) == """
334 # var Foo = new Array[Int]
335 # ^^^"""
336 # ~~~
337 fun underline(source: Text): String
338 do
339 var res = new FlatBuffer
340
341 # All the concerned lines
342 var lines = source.split("\n")
343 for line in [line_start..line_end] do
344 res.append lines[line-1]
345 res.append "\n"
346 end
347
348 # Cover all columns, no matter their lines
349 var col_start = col_start.min(col_end)
350 var col_end = self.col_start.max(col_end)
351
352 # " ^^^^"
353 var ptr = " "*(col_start-1).max(0) + "^"*(col_end-col_start)
354 res.append ptr
355
356 return res.to_s
357 end
358 end
359
360 # A node of a syntactic tree
361 abstract class Node
362 # The name of the node (as used in the grammar file)
363 fun node_name: String do return class_name
364
365 # A point of view on the direct children of the node
366 fun children: SequenceRead[nullable Node] is abstract
367
368 # A point of view of a depth-first visit of all non-null children
369 var depth: Collection[Node] = new DephCollection(self) is lazy
370
371 # Visit all the children of the node with the visitor `v`
372 protected fun visit_children(v: Visitor)
373 do
374 for c in children do if c != null then v.enter_visit(c)
375 end
376
377 # The position of the node in the input stream
378 var position: nullable Position = null is writable
379
380 # Produce a graphiz file for the syntaxtic tree rooted at `self`.
381 fun to_dot(filepath: String)
382 do
383 var f = new FileWriter.open(filepath)
384 f.write("digraph g \{\n")
385 f.write("rankdir=BT;\n")
386
387 var a = new Array[NToken]
388 to_dot_visitor(f, a)
389
390 f.write("\{ rank=same\n")
391 var first = true
392 for n in a do
393 if first then
394 first = false
395 else
396 f.write("->")
397 end
398 f.write("n{n.object_id}")
399 end
400 f.write("[style=invis];\n")
401 f.write("\}\n")
402
403 f.write("\}\n")
404 f.close
405 end
406
407 private fun to_dot_visitor(f: Writer, a: Array[NToken])
408 do
409 f.write("n{object_id} [label=\"{node_name}\"];\n")
410 for x in children do
411 if x == null then continue
412 f.write("n{x.object_id} -> n{object_id};\n")
413 x.to_dot_visitor(f,a )
414 end
415 end
416
417 redef fun to_s do
418 var pos = position
419 if pos == null then
420 return "{node_name}"
421 else
422 return "{node_name}@({pos})"
423 end
424 end
425 end
426
427 private class DephCollection
428 super Collection[Node]
429 var node: Node
430 redef fun iterator do return new DephIterator([node].iterator)
431 end
432
433 private class DephIterator
434 super Iterator[Node]
435
436 var stack = new Array[Iterator[nullable Node]]
437
438 init(i: Iterator[nullable Node]) is old_style_init do
439 stack.push i
440 end
441
442 redef fun is_ok do return not stack.is_empty
443 redef fun item do return stack.last.item.as(not null)
444 redef fun next
445 do
446 var i = stack.last
447 stack.push i.item.children.iterator
448 i.next
449 while is_ok do
450 if not stack.last.is_ok then
451 stack.pop
452 continue
453 end
454 if stack.last.item == null then
455 stack.last.next
456 continue
457 end
458 return
459 end
460 end
461 end
462
463 # A token produced by the lexer and used in a syntactic tree
464 abstract class NToken
465 super Node
466
467 redef fun children do return once new Array[Node]
468
469 redef fun to_dot_visitor(f, a)
470 do
471 var labe = "{node_name}"
472 var pos = position
473 if pos != null then labe += "\\n{pos}"
474 var text = self.text
475 if node_name != "'{text}'" then
476 labe += "\\n'{text.escape_to_c}'"
477 end
478 f.write("n{object_id} [label=\"{labe}\",shape=box];\n")
479 a.add(self)
480 end
481
482 # The text associated with the token
483 var text: String = "" is writable
484
485 redef fun to_s do
486 var res = super
487 var text = self.text
488 if node_name != "'{text}'" then
489 res += "='{text.escape_to_c}'"
490 end
491 return res
492 end
493 end
494
495 # The special token for the end of stream
496 class NEof
497 super NToken
498 end
499
500 # A special token used to represent a parser or lexer error
501 abstract class NError
502 super NToken
503
504 # All the partially built tree during parsing (aka the node_stack)
505 var error_tree = new Nodes[Node]
506
507 # The things unexpected
508 fun unexpected: String is abstract
509
510 # The things expected (if any)
511 var expected: nullable String = null
512
513 # The error message,using `expected` and `unexpected`
514 fun message: String
515 do
516 var exp = expected
517 var res = "Unexpected {unexpected}"
518 if exp != null then res += "; is acceptable instead: {exp}"
519 return res
520 end
521 end
522
523 # A lexer error as a token for the unexpected characted
524 class NLexerError
525 super NError
526
527 redef fun unexpected do return "character '{text.chars.first}'"
528 end
529
530 # A parser error linked to a unexpected token
531 class NParserError
532 super NError
533
534 # The unexpected token
535 var token: nullable NToken = null
536
537 redef fun unexpected
538 do
539 var res = token.node_name
540 var text = token.text
541 if not text.is_empty and res != "'{text}'" then
542 res += " '{text.escape_to_c}'"
543 end
544 return res
545 end
546 end
547
548 # A hogeneous sequence of node, used to represent unbounded lists (and + modifier)
549 class Nodes[T: Node]
550 super Node
551 redef var children: Array[T] = new Array[T]
552 end
553
554 # A production with a specific, named and statically typed children
555 abstract class NProd
556 super Node
557 redef var children: SequenceRead[nullable Node] = new NProdChildren(self)
558
559 # The exact number of direct children
560 # Used to implement `children` by generated parsers
561 fun number_of_children: Int is abstract
562
563 # The specific children got by its index
564 # Used to implement `children` by generated parsers
565 fun child(x: Int): nullable Node is abstract
566 end
567
568
569 private class NProdChildren
570 super SequenceRead[nullable Node]
571 var prod: NProd
572 redef fun iterator do return new NProdIterator(prod)
573 redef fun length do return prod.number_of_children
574 redef fun is_empty do return prod.number_of_children == 0
575 redef fun [](i) do return prod.child(i)
576 end
577
578 private class NProdIterator
579 super IndexedIterator[nullable Node]
580 var prod: NProd
581 redef var index = 0
582 redef fun is_ok do return index < prod.number_of_children
583 redef fun next do index += 1
584 redef fun item do return prod.child(index)
585 end
586
587 # All-in-one abstract class to test generated parsers on a given
588 abstract class TestParser
589 # How to get a new lexer on a given stream of character
590 fun new_lexer(text: String): Lexer is abstract
591
592 # How to get a new parser
593 fun new_parser: Parser is abstract
594
595 # The name of the language (used for generated files)
596 fun name: String is abstract
597
598 # Use the class as the main enrty point of the program
599 # - parse arguments and options of the command
600 # - test the parser (see `work`)
601 fun main: Node
602 do
603 if args.is_empty then
604 print "usage {name}_test <filepath> | - | -e <text>"
605 exit 0
606 end
607
608 var filepath = args.shift
609 var text
610 if filepath == "-" then
611 text = sys.stdin.read_all
612 else if filepath == "-e" then
613 if args.is_empty then
614 print "Error: -e need a text"
615 exit 1
616 end
617 text = args.shift
618 else
619 var f = new FileReader.open(filepath)
620 text = f.read_all
621 f.close
622 end
623
624 if not args.is_empty then
625 print "Error: superfluous arguments."
626 exit 1
627 end
628
629 return work(text)
630 end
631
632 # Produce a full syntactic tree for a given stream of character
633 # Produce also statistics and output files
634 fun work(text: String): Node
635 do
636 print "INPUT: {text.length} chars"
637 var l = new_lexer(text)
638 var tokens = l.lex
639
640 var tokout = "{name}.tokens.out"
641 print "TOKEN: {tokens.length} tokens (see {tokout})"
642
643 var f = new FileWriter.open(tokout)
644 for t in tokens do
645 f.write "{t.to_s}\n"
646 end
647 f.close
648
649 var p = new_parser
650 p.tokens.add_all(tokens)
651
652 var n = p.parse
653
654 var astout = "{name}.ast.out"
655 f = new FileWriter.open(astout)
656 var tpv = new TreePrinterVisitor(f)
657 var astdotout = "{name}.ast.dot"
658 if n isa NError then
659 print "Syntax error: {n.message}"
660 print "ERROR: {n} (see {astout} and {astdotout})"
661 tpv.enter_visit(n)
662 n = n.error_tree
663 else
664 print "ROOT: {n}; {n.depth.length} nodes (see {astout} and {astdotout})"
665 end
666 tpv.enter_visit(n)
667 n.to_dot(astdotout)
668 f.close
669
670 return n
671 end
672 end