1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Runtime library required by parsers and lexers generated by nitcc
18 # A abstract parser engine generated by nitcc
21 # FIXME: provide something better, like a lexer?
22 var tokens
= new List[NToken]
24 # Look at the next token
25 # Used by generated parsers
26 fun peek_token
: NToken do return tokens
.first
28 # Consume the next token
29 # Used by generated parsers
30 fun get_token
: NToken do return tokens
.shift
32 # Consume the next token and shift to the state `dest`.
33 # Used by generated parsers
34 fun shift
(dest
: LRState)
37 #print "shift {t} -> {dest}"
39 state_stack
.push state
43 # After a reduction on `goto` go to the next state
44 # Used by generated parsers
45 fun goto
(goto
: LRGoto)
47 #print "reduce from {state} -> {prod}"
48 state
.goto
(self, goto
)
51 # push a new state on the stack of states (
52 # Used by generated parsers
53 fun push
(dest
: LRState)
55 #print "push prod {prod} -> {dest}"
56 state_stack
.push state
60 # Pop and return the last node
61 # Also pop (and discard) the associated state
62 # Used by generated parsers
65 var res
= node_stack
.pop
66 state
= state_stack
.pop
70 # Produce a parse error and stop the parsing
71 # Used by generated parsers
74 var token
= peek_token
75 #print "* parse error in state {state} on token {token}"
76 #print " expected: {state.error_msg}"
77 #print " node_stack={node_stack.join(", ")}"
78 #print " state_stack={state_stack.join(", ")}"
81 if token
isa NLexerError then
84 error
= new NParserError
85 error
.position
= token
.position
86 error
.text
= token
.text
89 error
.error_tree
.children
.add_all
(node_stack
)
90 error
.expected
= state
.error_msg
96 # The stating state for parsing
97 # Used by generated parsers
98 protected fun start_state
: LRState is abstract
101 # Used by generated parsers
102 var state
: LRState is noinit
110 # Used by generated parsers
111 var node_stack
= new Array[Node]
113 # The stack of states
114 # Used by generated parsers
115 var state_stack
= new Array[LRState]
117 # Should the parser stop
118 # Used by generated parsers
119 var stop
writable = true
121 # Parse a full sequence of tokens and return a complete syntactic tree
129 #print "* current state {state}"
130 #print " tokens={tokens.join(" ")}"
131 #print " node_stack={node_stack.join(" ")}"
132 #print " state_stack={state_stack.join(" ")}"
136 return node_stack
.first
140 # A state in a parser LR automaton generated by nitcc
141 # Used by generated parsers
142 abstract class LRState
143 fun action
(parser
: Parser) is abstract
144 fun goto
(parser
: Parser, goto
: LRGoto) is abstract
145 fun error_msg
: String do return "FIXME"
148 # A concrete production in a parser LR automation generated by nitcc
149 # Used by generated parsers
150 abstract class LRGoto
155 # A abstract lexer engine generated by nitcc
157 # The input stream of characters
160 # The stating state for lexing
161 # Used by generated parsers
162 protected fun start_state
: DFAState is abstract
164 # Lexize a stream of characters and return a sequence of tokens
165 fun lex
: List[NToken]
167 var res
= new List[NToken]
168 var state
= start_state
178 var last_state
: nullable DFAState = null
180 var length
= text
.length
182 if state
.is_accept
then
190 if pos
>= length
then
195 next
= state
.trans
(c
)
198 if pos_start
< length
then
199 if last_state
== null then
200 var token
= new NLexerError
201 var position
= new Position(pos_start
, pos
, line_start
, line
, col_start
, col
)
202 token
.position
= position
203 token
.text
= text
.substring
(pos_start
, pos-pos_start
+1)
207 var position
= new Position(pos_start
, pos_end
, line_start
, line_end
, col_start
, col_end
)
208 var token
= last_state
.make_token
(position
, text
.substring
(pos_start
, pos_end-pos_start
+1))
209 if token
!= null then res
.add
(token
)
211 if pos
>= length
then
213 var position
= new Position(pos
, pos
, line
, line
, col
, col
)
214 token
.position
= position
220 pos_start
= pos_end
+ 1
222 line_start
= line_end
242 # A state in a lexer automaton generated by nitcc
243 # Used by generated lexers
245 fun is_accept
: Bool do return false
246 fun trans
(c
: Char): nullable DFAState do return null
247 fun make_token
(position
: Position, text
: String): nullable NToken is abstract
252 # A abstract visitor on syntactic trees generated by nitcc
253 abstract class Visitor
254 # The main entry point to visit a node `n`
255 # Should not be redefined
256 fun enter_visit
(n
: Node)
261 # The specific implementation of a visit
263 # Should be redefined by concrete visitors
265 # Should not be called directly (use `enter_visit` instead)
267 # By default, the visitor just rescursively visit the children of `n`
268 protected fun visit
(n
: Node)
270 n
.visit_children
(self)
274 # Print a node (using to_s) on a line and recustively each children indented (with two spaces)
275 class TreePrinterVisitor
278 private var indent
= 0
279 init(writer
: OStream) do self.writer
= writer
282 for i
in [0..indent
[ do writer
.write
(" ")
291 # A position into a input stream
292 # Used to give position to tokens
300 redef fun to_s
do return "{line_start}:{col_start}-{line_end}:{col_end}"
303 # A node of a syntactic tree
305 # The name of the node (as used in the grammar file)
306 fun node_name
: String do return class_name
308 # A point of view on the direct children of the node
309 fun children
: SequenceRead[nullable Node] is abstract
311 # A point of view of a depth-first visit of all non-null children
312 var depth
: Collection[Node] = new DephCollection(self)
314 # Visit all the children of the node with the visitor `v`
315 protected fun visit_children
(v
: Visitor)
317 for c
in children
do if c
!= null then v
.enter_visit
(c
)
320 # The position of the node in the input stream
321 var position
: nullable Position writable = null
323 # Produce a graphiz file for the syntaxtic tree rooted at `self`.
324 fun to_dot
(filepath
: String)
326 var f
= new OFStream.open
(filepath
)
327 f
.write
("digraph g \{\n")
328 f
.write
("rankdir=BT;\n")
330 var a
= new Array[NToken]
333 f
.write
("\{ rank=same\n")
341 f
.write
("n{n.object_id}")
343 f
.write
("[style=invis];\n")
350 private fun to_dot_visitor
(f
: OStream, a
: Array[NToken])
352 f
.write
("n{object_id} [label=\"{node_name}\
"];\n")
354 if x
== null then continue
355 f
.write
("n{x.object_id} -> n{object_id};\n")
356 x
.to_dot_visitor
(f
,a
)
365 return "{node_name}@({pos})"
370 private class DephCollection
371 super Collection[Node]
373 redef fun iterator
do return new DephIterator([node
].iterator
)
376 private class DephIterator
378 var stack
= new List[Iterator[nullable Node]]
380 init(i
: Iterator[nullable Node])
385 redef fun is_ok
do return not stack
.is_empty
386 redef fun item
do return stack
.last
.item
.as(not null)
390 stack
.push i
.item
.children
.iterator
393 if not stack
.last
.is_ok
then
397 if stack
.last
.item
== null then
406 # A token produced by the lexer and used in a syntactic tree
407 abstract class NToken
410 redef fun children
do return once
new Array[Node]
412 redef fun to_dot_visitor
(f
, a
)
414 var labe
= "{node_name}"
416 if pos
!= null then labe
+= "\\n{pos}"
418 if node_name
!= "'{text}'" then
419 labe
+= "\\n'{text.escape_to_c}'"
421 f
.write
("n{object_id} [label=\"{labe}\
",shape=box];\n")
425 # The text associated with the token
426 var text
: String writable = ""
431 if node_name
!= "'{text}'" then
432 res
+= "='{text.escape_to_c}'"
438 # The special token for the end of stream
443 # A special token used to represent a parser or lexer error
444 abstract class NError
447 # All the partially built tree during parsing (aka the node_stack)
448 var error_tree
= new Nodes[Node]
450 # The things unexpected
451 fun unexpected
: String is abstract
453 # The things expected (if any)
454 var expected
: nullable String = null
456 # The error message,using `expected` and `unexpected`
460 var res
= "Unexpected {unexpected}"
461 if exp
!= null then res
+= "; is acceptable instead: {exp}"
466 # A lexer error as a token for the unexpected characted
470 redef fun unexpected
do return "character '{text.chars.first}'"
473 # A parser error linked to a unexpected token
477 # The unexpected token
478 var token
: nullable NToken = null
482 var res
= token
.node_name
483 var text
= token
.text
484 if not text
.is_empty
and res
!= "'{text}'" then
485 res
+= " '{text.escape_to_c}'"
491 # A hogeneous sequence of node, used to represent unbounded lists (and + modifier)
494 redef var children
: Array[T
] = new Array[T
]
497 # A production with a specific, named and statically typed children
500 redef var children
: SequenceRead[nullable Node] = new NProdChildren(self)
502 # The exact number of direct children
503 # Used to implement `children` by generated parsers
504 fun number_of_children
: Int is abstract
506 # The specific children got by its index
507 # Used to implement `children` by generated parsers
508 fun child
(x
: Int): nullable Node is abstract
512 private class NProdChildren
513 super SequenceRead[nullable Node]
515 redef fun iterator
do return new NProdIterator(prod
)
516 redef fun length
do return prod
.number_of_children
517 redef fun is_empty
do return prod
.number_of_children
== 0
518 redef fun [](i
) do return prod
.child
(i
)
521 private class NProdIterator
522 super IndexedIterator[nullable Node]
525 redef fun is_ok
do return index
< prod
.number_of_children
526 redef fun next
do index
+= 1
527 redef fun item
do return prod
.child
(index
)
530 # All-in-one abstract class to test generated parsers on a given
531 abstract class TestParser
532 # How to get a new lexer on a given stream of character
533 fun new_lexer
(text
: String): Lexer is abstract
535 # How to get a new parser
536 fun new_parser
: Parser is abstract
538 # The name of the language (used for generated files)
539 fun name
: String is abstract
541 # Use the class as the main enrty point of the program
542 # - parse arguments and options of the command
543 # - test the parser (see `work`)
546 if args
.is_empty
then
547 print
"usage {name}_test <filepath> | - | -e <text>"
551 var filepath
= args
.shift
553 if filepath
== "-" then
554 text
= sys
.stdin
.read_all
555 else if filepath
== "-e" then
556 if args
.is_empty
then
557 print
"Error: -e need a text"
562 var f
= new IFStream.open
(filepath
)
567 if not args
.is_empty
then
568 print
"Error: superfluous arguments."
575 # Produce a full syntactic tree for a given stream of character
576 # Produce also statistics and output files
577 fun work
(text
: String): Node
579 print
"INPUT: {text.length} chars"
580 var l
= new_lexer
(text
)
583 var tokout
= "{name}.tokens.out"
584 print
"TOKEN: {tokens.length} tokens (see {tokout})"
586 var f
= new OFStream.open
(tokout
)
593 p
.tokens
.add_all
(tokens
)
597 var astout
= "{name}.ast.out"
598 f
= new OFStream.open
(astout
)
599 var tpv
= new TreePrinterVisitor(f
)
600 var astdotout
= "{name}.ast.dot"
602 print
"Syntax error: {n.message}"
603 print
"ERROR: {n} (see {astout} and {astdotout})"
607 print
"ROOT: {n}; {n.depth.length} nodes (see {astout} and {astdotout})"