1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Runtime library required by parsers and lexers generated by nitcc
20 # A abstract parser engine generated by nitcc
23 # FIXME: provide something better, like a lexer?
24 var tokens
= new CircularArray[NToken]
26 # Look at the next token
27 # Used by generated parsers
28 fun peek_token
: NToken do return tokens
.first
30 # Consume the next token
31 # Used by generated parsers
32 fun get_token
: NToken do return tokens
.shift
34 # Consume the next token and shift to the state `dest`.
35 # Used by generated parsers
36 fun shift
(dest
: LRState)
39 #print "shift {t} -> {dest}"
41 state_stack
.push state
45 # After a reduction on `goto` go to the next state
46 # Used by generated parsers
47 fun goto
(goto
: LRGoto)
49 #print "reduce from {state} -> {prod}"
50 state
.goto
(self, goto
)
53 # push a new state on the stack of states (
54 # Used by generated parsers
55 fun push
(dest
: LRState)
57 #print "push prod {prod} -> {dest}"
58 state_stack
.push state
62 # Pop and return the last node
63 # Also pop (and discard) the associated state
64 # Used by generated parsers
67 var res
= node_stack
.pop
68 state
= state_stack
.pop
72 # Produce a parse error and stop the parsing
73 # Used by generated parsers
76 var token
= peek_token
77 #print "* parse error in state {state} on token {token}"
78 #print " expected: {state.error_msg}"
79 #print " node_stack={node_stack.join(", ")}"
80 #print " state_stack={state_stack.join(", ")}"
81 node_stack
.push
(token
)
83 if token
isa NLexerError then
86 error
= new NParserError
87 error
.position
= token
.position
88 error
.text
= token
.text
91 error
.error_tree
.children
.add_all
(node_stack
)
92 error
.expected
= state
.error_msg
98 # The stating state for parsing
99 # Used by generated parsers
100 protected fun start_state
: LRState is abstract
103 # Used by generated parsers
104 var state
: LRState is noinit
112 # Used by generated parsers
113 var node_stack
= new Array[Node]
115 # The stack of states
116 # Used by generated parsers
117 var state_stack
= new Array[LRState]
119 # Should the parser stop
120 # Used by generated parsers
121 var stop
= true is writable
123 # Parse a full sequence of tokens and return a complete syntactic tree
131 #print "* current state {state}"
132 #print " tokens={tokens.join(" ")}"
133 #print " node_stack={node_stack.join(" ")}"
134 #print " state_stack={state_stack.join(" ")}"
138 return node_stack
.first
142 # A state in a parser LR automaton generated by nitcc
143 # Used by generated parsers
144 abstract class LRState
145 fun action
(parser
: Parser) is abstract
146 fun goto
(parser
: Parser, goto
: LRGoto) is abstract
147 fun error_msg
: String do return "FIXME"
150 # A concrete production in a parser LR automation generated by nitcc
151 # Used by generated parsers
152 abstract class LRGoto
157 # A abstract lexer engine generated by nitcc
159 # The input stream of characters
162 # The stating state for lexing
163 # Used by generated parsers
164 protected fun start_state
: DFAState is abstract
166 # Lexize a stream of characters and return a sequence of tokens
167 fun lex
: CircularArray[NToken]
169 var res
= new CircularArray[NToken]
172 if t
!= null then res
.add t
173 if t
isa NEof or t
isa NError then break
178 # Cursor current position (in chars, starting from 0)
181 # Cursor current line (starting from 1)
184 # Cursor current column (in chars, starting from 1)
187 # Move the cursor and return the next token.
189 # Returns a `NEof` and the end.
190 # Returns `null` if the token is ignored.
191 fun next_token
: nullable NToken
193 var state
= start_state
195 var pos_end
= pos_start
- 1
196 var line
= line_start
197 var line_end
= line_start
- 1
199 var col_end
= col_start
- 1
200 var last_state
: nullable DFAState = null
202 var length
= text
.length
204 if state
.is_accept
then
212 if pos
>= length
then
217 next
= state
.trans
(c
)
221 if pos_start
< length
then
222 if last_state
== null then
223 token
= new NLexerError
224 var position
= new Position(pos_start
, pos
, line_start
, line
, col_start
, col
)
225 token
.position
= position
226 token
.text
= text
.substring
(pos_start
, pos-pos_start
+1)
227 else if not last_state
.is_ignored
then
228 var position
= new Position(pos_start
, pos_end
, line_start
, line_end
, col_start
, col_end
)
229 token
= last_state
.make_token
(position
, text
)
235 var position
= new Position(pos
, pos
, line
, line
, col
, col
)
236 token
.position
= position
239 pos_start
= pos_end
+ 1
240 line_start
= line_end
256 # A state in a lexer automaton generated by nitcc
257 # Used by generated lexers
259 fun is_accept
: Bool do return false
260 fun trans
(c
: Char): nullable DFAState do return null
261 fun make_token
(position
: Position, source
: String): nullable NToken is abstract
262 fun is_ignored
: Bool do return false
267 # A abstract visitor on syntactic trees generated by nitcc
268 abstract class Visitor
269 # The main entry point to visit a node `n`
270 # Should not be redefined
271 fun enter_visit
(n
: Node)
276 # The specific implementation of a visit
278 # Should be redefined by concrete visitors
280 # Should not be called directly (use `enter_visit` instead)
282 # By default, the visitor just rescursively visit the children of `n`
283 protected fun visit
(n
: Node)
285 n
.visit_children
(self)
289 # Print a node (using to_s) on a line and recustively each children indented (with two spaces)
290 class TreePrinterVisitor
293 private var indent
= 0
296 for i
in [0..indent
[ do writer
.write
(" ")
305 # A position into a input stream
306 # Used to give position to tokens
317 redef fun to_s
do return "{line_start}:{col_start}-{line_end}:{col_end}"
319 # Extract the content from the given source
320 fun extract
(source
: String): String
322 return source
.substring
(pos_start
, pos_end-pos_start
+1)
325 # Get the lines covered by `self` and underline the target columns.
327 # This is useful for pretty printing errors or debug the output
330 # var src = "var Foo = new Array[Int]"
331 # var pos = new Position(0,0, 1, 1, 5, 8)
333 # assert pos.underline(src) == """
334 # var Foo = new Array[Int]
337 fun underline
(source
: Text): String
339 var res
= new FlatBuffer
341 # All the concerned lines
342 var lines
= source
.split
("\n")
343 for line
in [line_start
..line_end
] do
344 res
.append lines
[line-1
]
348 # Cover all columns, no matter their lines
349 var col_start
= col_start
.min
(col_end
)
350 var col_end
= self.col_start
.max
(col_end
)
353 var ptr
= " "*(col_start-1
).max
(0) + "^"*(col_end-col_start
)
360 # A node of a syntactic tree
362 # The name of the node (as used in the grammar file)
363 fun node_name
: String do return class_name
365 # A point of view on the direct children of the node
366 fun children
: SequenceRead[nullable Node] is abstract
368 # A point of view of a depth-first visit of all non-null children
369 var depth
: Collection[Node] = new DephCollection(self) is lazy
371 # Visit all the children of the node with the visitor `v`
372 protected fun visit_children
(v
: Visitor)
374 for c
in children
do if c
!= null then v
.enter_visit
(c
)
377 # The position of the node in the input stream
378 var position
: nullable Position = null is writable
380 # Produce a graphiz file for the syntaxtic tree rooted at `self`.
381 fun to_dot
(filepath
: String)
383 var f
= new FileWriter.open
(filepath
)
384 f
.write
("digraph g \{\n")
385 f
.write
("rankdir=BT;\n")
387 var a
= new Array[NToken]
390 f
.write
("\{ rank=same\n")
398 f
.write
("n{n.object_id}")
400 f
.write
("[style=invis];\n")
407 private fun to_dot_visitor
(f
: Writer, a
: Array[NToken])
409 f
.write
("n{object_id} [label=\"{node_name}\
"];\n")
411 if x
== null then continue
412 f
.write
("n{x.object_id} -> n{object_id};\n")
413 x
.to_dot_visitor
(f
,a
)
422 return "{node_name}@({pos})"
427 private class DephCollection
428 super Collection[Node]
430 redef fun iterator
do return new DephIterator([node
].iterator
)
433 private class DephIterator
436 var stack
= new Array[Iterator[nullable Node]]
438 init(i
: Iterator[nullable Node]) is old_style_init
do
442 redef fun is_ok
do return not stack
.is_empty
443 redef fun item
do return stack
.last
.item
.as(not null)
447 stack
.push i
.item
.children
.iterator
450 if not stack
.last
.is_ok
then
454 if stack
.last
.item
== null then
463 # A token produced by the lexer and used in a syntactic tree
464 abstract class NToken
467 redef fun children
do return once
new Array[Node]
469 redef fun to_dot_visitor
(f
, a
)
471 var labe
= "{node_name}"
473 if pos
!= null then labe
+= "\\n{pos}"
475 if node_name
!= "'{text}'" then
476 labe
+= "\\n'{text.escape_to_c}'"
478 f
.write
("n{object_id} [label=\"{labe}\
",shape=box];\n")
482 # The text associated with the token
483 var text
: String = "" is writable
488 if node_name
!= "'{text}'" then
489 res
+= "='{text.escape_to_c}'"
495 # The special token for the end of stream
500 # A special token used to represent a parser or lexer error
501 abstract class NError
504 # All the partially built tree during parsing (aka the node_stack)
505 var error_tree
= new Nodes[Node]
507 # The things unexpected
508 fun unexpected
: String is abstract
510 # The things expected (if any)
511 var expected
: nullable String = null
513 # The error message,using `expected` and `unexpected`
517 var res
= "Unexpected {unexpected}"
518 if exp
!= null then res
+= "; is acceptable instead: {exp}"
523 # A lexer error as a token for the unexpected characted
527 redef fun unexpected
do return "character '{text.chars.first}'"
530 # A parser error linked to a unexpected token
534 # The unexpected token
535 var token
: nullable NToken = null
539 var res
= token
.node_name
540 var text
= token
.text
541 if not text
.is_empty
and res
!= "'{text}'" then
542 res
+= " '{text.escape_to_c}'"
548 # A hogeneous sequence of node, used to represent unbounded lists (and + modifier)
551 redef var children
: Array[T
] = new Array[T
]
554 # A production with a specific, named and statically typed children
557 redef var children
: SequenceRead[nullable Node] = new NProdChildren(self)
559 # The exact number of direct children
560 # Used to implement `children` by generated parsers
561 fun number_of_children
: Int is abstract
563 # The specific children got by its index
564 # Used to implement `children` by generated parsers
565 fun child
(x
: Int): nullable Node is abstract
569 private class NProdChildren
570 super SequenceRead[nullable Node]
572 redef fun iterator
do return new NProdIterator(prod
)
573 redef fun length
do return prod
.number_of_children
574 redef fun is_empty
do return prod
.number_of_children
== 0
575 redef fun [](i
) do return prod
.child
(i
)
578 private class NProdIterator
579 super IndexedIterator[nullable Node]
582 redef fun is_ok
do return index
< prod
.number_of_children
583 redef fun next
do index
+= 1
584 redef fun item
do return prod
.child
(index
)
587 # All-in-one abstract class to test generated parsers on a given
588 abstract class TestParser
589 # How to get a new lexer on a given stream of character
590 fun new_lexer
(text
: String): Lexer is abstract
592 # How to get a new parser
593 fun new_parser
: Parser is abstract
595 # The name of the language (used for generated files)
596 fun name
: String is abstract
598 # Use the class as the main enrty point of the program
599 # - parse arguments and options of the command
600 # - test the parser (see `work`)
603 if args
.is_empty
then
604 print
"usage {name}_test <filepath> | - | -e <text>"
608 var filepath
= args
.shift
610 if filepath
== "-" then
611 text
= sys
.stdin
.read_all
612 else if filepath
== "-e" then
613 if args
.is_empty
then
614 print
"Error: -e need a text"
619 var f
= new FileReader.open
(filepath
)
624 if not args
.is_empty
then
625 print
"Error: superfluous arguments."
632 # Produce a full syntactic tree for a given stream of character
633 # Produce also statistics and output files
634 fun work
(text
: String): Node
636 print
"INPUT: {text.length} chars"
637 var l
= new_lexer
(text
)
640 var tokout
= "{name}.tokens.out"
641 print
"TOKEN: {tokens.length} tokens (see {tokout})"
643 var f
= new FileWriter.open
(tokout
)
650 p
.tokens
.add_all
(tokens
)
654 var astout
= "{name}.ast.out"
655 f
= new FileWriter.open
(astout
)
656 var tpv
= new TreePrinterVisitor(f
)
657 var astdotout
= "{name}.ast.dot"
659 print
"Syntax error: {n.message}"
660 print
"ERROR: {n} (see {astout} and {astdotout})"
664 print
"ROOT: {n}; {n.depth.length} nodes (see {astout} and {astdotout})"