c71ea6b50b53b13d49dc0369190de02282678941
[nit.git] / lib / nitcc_runtime.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Runtime library required by parsers and lexers generated by nitcc
16 module nitcc_runtime
17
18 # A abstract parser engine generated by nitcc
19 abstract class Parser
20 # The list of tokens
21 # FIXME: provide something better, like a lexer?
22 var tokens = new List[NToken]
23
24 # Look at the next token
25 # Used by generated parsers
26 fun peek_token: NToken do return tokens.first
27
28 # Consume the next token
29 # Used by generated parsers
30 fun get_token: NToken do return tokens.shift
31
32 # Consume the next token and shift to the state `dest`.
33 # Used by generated parsers
34 fun shift(dest: LRState)
35 do
36 var t = get_token
37 #print "shift {t} -> {dest}"
38 node_stack.push t
39 state_stack.push state
40 state = dest
41 end
42
43 # After a reduction on `goto` go to the next state
44 # Used by generated parsers
45 fun goto(goto: LRGoto)
46 do
47 #print "reduce from {state} -> {prod}"
48 state.goto(self, goto)
49 end
50
51 # push a new state on the stack of states (
52 # Used by generated parsers
53 fun push(dest: LRState)
54 do
55 #print "push prod {prod} -> {dest}"
56 state_stack.push state
57 state = dest
58 end
59
60 # Pop and return the last node
61 # Also pop (and discard) the associated state
62 # Used by generated parsers
63 fun pop: Node
64 do
65 var res = node_stack.pop
66 state = state_stack.pop
67 return res
68 end
69
70 # Produce a parse error and stop the parsing
71 # Used by generated parsers
72 fun parse_error
73 do
74 var token = peek_token
75 #print "* parse error in state {state} on token {token}"
76 #print " expected: {state.error_msg}"
77 #print " node_stack={node_stack.join(", ")}"
78 #print " state_stack={state_stack.join(", ")}"
79 node_stack.add(token)
80 var error: NError
81 if token isa NLexerError then
82 error = token
83 else
84 error = new NParserError
85 error.position = token.position
86 error.text = token.text
87 error.token = token
88 end
89 error.error_tree.children.add_all(node_stack)
90 error.expected = state.error_msg
91 node_stack.clear
92 node_stack.add error
93 stop = true
94 end
95
96 # The stating state for parsing
97 # Used by generated parsers
98 protected fun start_state: LRState is abstract
99
100 # The current state
101 # Used by generated parsers
102 var state: LRState is noinit
103
104 init
105 do
106 state = start_state
107 end
108
109 # The stack of nodes
110 # Used by generated parsers
111 var node_stack = new Array[Node]
112
113 # The stack of states
114 # Used by generated parsers
115 var state_stack = new Array[LRState]
116
117 # Should the parser stop
118 # Used by generated parsers
119 var stop writable = true
120
121 # Parse a full sequence of tokens and return a complete syntactic tree
122 fun parse: Node
123 do
124 state = start_state
125 state_stack.clear
126 node_stack.clear
127 stop = false
128 while not stop do
129 #print "* current state {state}"
130 #print " tokens={tokens.join(" ")}"
131 #print " node_stack={node_stack.join(" ")}"
132 #print " state_stack={state_stack.join(" ")}"
133 state.action(self)
134 end
135 #print "* over"
136 return node_stack.first
137 end
138 end
139
140 # A state in a parser LR automaton generated by nitcc
141 # Used by generated parsers
142 abstract class LRState
143 fun action(parser: Parser) is abstract
144 fun goto(parser: Parser, goto: LRGoto) is abstract
145 fun error_msg: String do return "FIXME"
146 end
147
148 # A concrete production in a parser LR automation generated by nitcc
149 # Used by generated parsers
150 abstract class LRGoto
151 end
152
153 ###
154
155 # A abstract lexer engine generated by nitcc
156 abstract class Lexer
157 # The input stream of characters
158 var stream: String
159
160 # The stating state for lexing
161 # Used by generated parsers
162 protected fun start_state: DFAState is abstract
163
164 # Lexize a stream of characters and return a sequence of tokens
165 fun lex: List[NToken]
166 do
167 var res = new List[NToken]
168 var state = start_state
169 var pos = 0
170 var pos_start = 0
171 var pos_end = 0
172 var line = 1
173 var line_start = 1
174 var line_end = 0
175 var col = 1
176 var col_start = 1
177 var col_end = 0
178 var last_state: nullable DFAState = null
179 var text = stream
180 var length = text.length
181 loop
182 if state.is_accept then
183 pos_end = pos - 1
184 line_end = line
185 col_end = col
186 last_state = state
187 end
188 var c
189 var next
190 if pos >= length then
191 c = '\0'
192 next = null
193 else
194 c = text.chars[pos]
195 next = state.trans(c)
196 end
197 if next == null then
198 if pos_start < length then
199 if last_state == null then
200 var token = new NLexerError
201 var position = new Position(pos_start, pos, line_start, line, col_start, col)
202 token.position = position
203 token.text = text.substring(pos_start, pos-pos_start+1)
204 res.add token
205 break
206 end
207 var position = new Position(pos_start, pos_end, line_start, line_end, col_start, col_end)
208 var token = last_state.make_token(position, text.substring(pos_start, pos_end-pos_start+1))
209 if token != null then res.add(token)
210 end
211 if pos >= length then
212 var token = new NEof
213 var position = new Position(pos, pos, line, line, col, col)
214 token.position = position
215 token.text = ""
216 res.add token
217 break
218 end
219 state = start_state
220 pos_start = pos_end + 1
221 pos = pos_start
222 line_start = line_end
223 line = line_start
224 col_start = col_end
225 col = col_start
226
227 last_state = null
228 continue
229 end
230 state = next
231 pos += 1
232 col += 1
233 if c == '\n' then
234 line += 1
235 col = 1
236 end
237 end
238 return res
239 end
240 end
241
242 # A state in a lexer automaton generated by nitcc
243 # Used by generated lexers
244 interface DFAState
245 fun is_accept: Bool do return false
246 fun trans(c: Char): nullable DFAState do return null
247 fun make_token(position: Position, text: String): nullable NToken is abstract
248 end
249
250 ###
251
252 # A abstract visitor on syntactic trees generated by nitcc
253 abstract class Visitor
254 # The main entry point to visit a node `n`
255 # Should not be redefined
256 fun enter_visit(n: Node)
257 do
258 visit(n)
259 end
260
261 # The specific implementation of a visit
262 #
263 # Should be redefined by concrete visitors
264 #
265 # Should not be called directly (use `enter_visit` instead)
266 #
267 # By default, the visitor just rescursively visit the children of `n`
268 protected fun visit(n: Node)
269 do
270 n.visit_children(self)
271 end
272 end
273
274 # Print a node (using to_s) on a line and recustively each children indented (with two spaces)
275 class TreePrinterVisitor
276 super Visitor
277 var writer: OStream
278 private var indent = 0
279 init(writer: OStream) do self.writer = writer
280 redef fun visit(n)
281 do
282 for i in [0..indent[ do writer.write(" ")
283 writer.write(n.to_s)
284 writer.write("\n")
285 indent += 1
286 super
287 indent -= 1
288 end
289 end
290
291 # A position into a input stream
292 # Used to give position to tokens
293 class Position
294 var pos_start: Int
295 var pos_end: Int
296 var line_start: Int
297 var line_end: Int
298 var col_start: Int
299 var col_end: Int
300 redef fun to_s do return "{line_start}:{col_start}-{line_end}:{col_end}"
301 end
302
303 # A node of a syntactic tree
304 abstract class Node
305 # The name of the node (as used in the grammar file)
306 fun node_name: String do return class_name
307
308 # A point of view on the direct children of the node
309 fun children: SequenceRead[nullable Node] is abstract
310
311 # A point of view of a depth-first visit of all non-null children
312 var depth: Collection[Node] = new DephCollection(self)
313
314 # Visit all the children of the node with the visitor `v`
315 protected fun visit_children(v: Visitor)
316 do
317 for c in children do if c != null then v.enter_visit(c)
318 end
319
320 # The position of the node in the input stream
321 var position: nullable Position writable = null
322
323 # Produce a graphiz file for the syntaxtic tree rooted at `self`.
324 fun to_dot(filepath: String)
325 do
326 var f = new OFStream.open(filepath)
327 f.write("digraph g \{\n")
328 f.write("rankdir=BT;\n")
329
330 var a = new Array[NToken]
331 to_dot_visitor(f, a)
332
333 f.write("\{ rank=same\n")
334 var first = true
335 for n in a do
336 if first then
337 first = false
338 else
339 f.write("->")
340 end
341 f.write("n{n.object_id}")
342 end
343 f.write("[style=invis];\n")
344 f.write("\}\n")
345
346 f.write("\}\n")
347 f.close
348 end
349
350 private fun to_dot_visitor(f: OStream, a: Array[NToken])
351 do
352 f.write("n{object_id} [label=\"{node_name}\"];\n")
353 for x in children do
354 if x == null then continue
355 f.write("n{x.object_id} -> n{object_id};\n")
356 x.to_dot_visitor(f,a )
357 end
358 end
359
360 redef fun to_s do
361 var pos = position
362 if pos == null then
363 return "{node_name}"
364 else
365 return "{node_name}@({pos})"
366 end
367 end
368 end
369
370 private class DephCollection
371 super Collection[Node]
372 var node: Node
373 redef fun iterator do return new DephIterator([node].iterator)
374 end
375
376 private class DephIterator
377 super Iterator[Node]
378 var stack = new List[Iterator[nullable Node]]
379
380 init(i: Iterator[nullable Node])
381 do
382 stack.add i
383 end
384
385 redef fun is_ok do return not stack.is_empty
386 redef fun item do return stack.last.item.as(not null)
387 redef fun next
388 do
389 var i = stack.last
390 stack.push i.item.children.iterator
391 i.next
392 while is_ok do
393 if not stack.last.is_ok then
394 stack.pop
395 continue
396 end
397 if stack.last.item == null then
398 stack.last.next
399 continue
400 end
401 return
402 end
403 end
404 end
405
406 # A token produced by the lexer and used in a syntactic tree
407 abstract class NToken
408 super Node
409
410 redef fun children do return once new Array[Node]
411
412 redef fun to_dot_visitor(f, a)
413 do
414 var labe = "{node_name}"
415 var pos = position
416 if pos != null then labe += "\\n{pos}"
417 var text = self.text
418 if node_name != "'{text}'" then
419 labe += "\\n'{text.escape_to_c}'"
420 end
421 f.write("n{object_id} [label=\"{labe}\",shape=box];\n")
422 a.add(self)
423 end
424
425 # The text associated with the token
426 var text: String writable = ""
427
428 redef fun to_s do
429 var res = super
430 var text = self.text
431 if node_name != "'{text}'" then
432 res += "='{text.escape_to_c}'"
433 end
434 return res
435 end
436 end
437
438 # The special token for the end of stream
439 class NEof
440 super NToken
441 end
442
443 # A special token used to represent a parser or lexer error
444 abstract class NError
445 super NToken
446
447 # All the partially built tree during parsing (aka the node_stack)
448 var error_tree = new Nodes[Node]
449
450 # The things unexpected
451 fun unexpected: String is abstract
452
453 # The things expected (if any)
454 var expected: nullable String = null
455
456 # The error message,using `expected` and `unexpected`
457 fun message: String
458 do
459 var exp = expected
460 var res = "Unexpected {unexpected}"
461 if exp != null then res += "; is acceptable instead: {exp}"
462 return res
463 end
464 end
465
466 # A lexer error as a token for the unexpected characted
467 class NLexerError
468 super NError
469
470 redef fun unexpected do return "character '{text.chars.first}'"
471 end
472
473 # A parser error linked to a unexpected token
474 class NParserError
475 super NError
476
477 # The unexpected token
478 var token: nullable NToken = null
479
480 redef fun unexpected
481 do
482 var res = token.node_name
483 var text = token.text
484 if not text.is_empty and res != "'{text}'" then
485 res += " '{text.escape_to_c}'"
486 end
487 return res
488 end
489 end
490
491 # A hogeneous sequence of node, used to represent unbounded lists (and + modifier)
492 class Nodes[T: Node]
493 super Node
494 redef var children: Array[T] = new Array[T]
495 end
496
497 # A production with a specific, named and statically typed children
498 class NProd
499 super Node
500 redef var children: SequenceRead[nullable Node] = new NProdChildren(self)
501
502 # The exact number of direct children
503 # Used to implement `children` by generated parsers
504 fun number_of_children: Int is abstract
505
506 # The specific children got by its index
507 # Used to implement `children` by generated parsers
508 fun child(x: Int): nullable Node is abstract
509 end
510
511
512 private class NProdChildren
513 super SequenceRead[nullable Node]
514 var prod: NProd
515 redef fun iterator do return new NProdIterator(prod)
516 redef fun length do return prod.number_of_children
517 redef fun is_empty do return prod.number_of_children == 0
518 redef fun [](i) do return prod.child(i)
519 end
520
521 private class NProdIterator
522 super IndexedIterator[nullable Node]
523 var prod: NProd
524 redef var index = 0
525 redef fun is_ok do return index < prod.number_of_children
526 redef fun next do index += 1
527 redef fun item do return prod.child(index)
528 end
529
530 # All-in-one abstract class to test generated parsers on a given
531 abstract class TestParser
532 # How to get a new lexer on a given stream of character
533 fun new_lexer(text: String): Lexer is abstract
534
535 # How to get a new parser
536 fun new_parser: Parser is abstract
537
538 # The name of the language (used for generated files)
539 fun name: String is abstract
540
541 # Use the class as the main enrty point of the program
542 # - parse arguments and options of the command
543 # - test the parser (see `work`)
544 fun main: Node
545 do
546 if args.is_empty then
547 print "usage {name}_test <filepath> | - | -e <text>"
548 exit 0
549 end
550
551 var filepath = args.shift
552 var text
553 if filepath == "-" then
554 text = sys.stdin.read_all
555 else if filepath == "-e" then
556 if args.is_empty then
557 print "Error: -e need a text"
558 exit 1
559 end
560 text = args.shift
561 else
562 var f = new IFStream.open(filepath)
563 text = f.read_all
564 f.close
565 end
566
567 if not args.is_empty then
568 print "Error: superfluous arguments."
569 exit 1
570 end
571
572 return work(text)
573 end
574
575 # Produce a full syntactic tree for a given stream of character
576 # Produce also statistics and output files
577 fun work(text: String): Node
578 do
579 print "INPUT: {text.length} chars"
580 var l = new_lexer(text)
581 var tokens = l.lex
582
583 var tokout = "{name}.tokens.out"
584 print "TOKEN: {tokens.length} tokens (see {tokout})"
585
586 var f = new OFStream.open(tokout)
587 for t in tokens do
588 f.write "{t.to_s}\n"
589 end
590 f.close
591
592 var p = new_parser
593 p.tokens.add_all(tokens)
594
595 var n = p.parse
596
597 var astout = "{name}.ast.out"
598 f = new OFStream.open(astout)
599 var tpv = new TreePrinterVisitor(f)
600 var astdotout = "{name}.ast.dot"
601 if n isa NError then
602 print "Syntax error: {n.message}"
603 print "ERROR: {n} (see {astout} and {astdotout})"
604 tpv.enter_visit(n)
605 n = n.error_tree
606 else
607 print "ROOT: {n}; {n.depth.length} nodes (see {astout} and {astdotout})"
608 end
609 tpv.enter_visit(n)
610 n.to_dot(astdotout)
611 f.close
612
613 return n
614 end
615 end