abstract class Parser
# The list of tokens
# FIXME: provide something better, like a lexer?
- var tokens = new List[NToken]
+ var tokens = new CircularArray[NToken]
# Look at the next token
# Used by generated parsers
fun parse_error
do
var token = peek_token
- print "* parse error in state {state} on token {token}"
- print " expected: {state.error_msg}"
- print " node_stack={node_stack.join(", ")}"
- print " state_stack={state_stack.join(", ")}"
- var error: Node
+ #print "* parse error in state {state} on token {token}"
+ #print " expected: {state.error_msg}"
+ #print " node_stack={node_stack.join(", ")}"
+ #print " state_stack={state_stack.join(", ")}"
+ node_stack.push(token)
+ var error: NError
if token isa NLexerError then
error = token
- token.error_tree.items.add_all(node_stack)
else
error = new NParserError
error.position = token.position
error.text = token.text
error.token = token
- error.error_tree.items.add_all(node_stack)
end
+ error.error_tree.children.add_all(node_stack)
+ error.expected = state.error_msg
node_stack.clear
node_stack.add error
stop = true
# The current state
# Used by generated parsers
- var state: LRState
+ var state: LRState is noinit
init
do
# Should the parser stop
# Used by generated parsers
- var stop writable = true
+ var stop = true is writable
# Parse a full sequence of tokens and return a complete syntactic tree
fun parse: Node
protected fun start_state: DFAState is abstract
# Lexize a stream of characters and return a sequence of tokens
- fun lex: List[NToken]
+ fun lex: CircularArray[NToken]
do
- var res = new List[NToken]
+ var res = new CircularArray[NToken]
var state = start_state
var pos = 0
var pos_start = 0
last_state = state
end
var c
+ var next
if pos >= length then
c = '\0'
+ next = null
else
- c = text[pos]
+ c = text.chars[pos]
+ next = state.trans(c)
end
- var next = state.trans(c)
if next == null then
- if last_state == null then
- var token = new NLexerError
- var position = new Position(pos_start, pos, line_start, line, col_start, col)
- token.position = position
- token.text = text.substring(pos_start, pos-pos_start+1)
- res.add token
- break
+ if pos_start < length then
+ if last_state == null then
+ var token = new NLexerError
+ var position = new Position(pos_start, pos, line_start, line, col_start, col)
+ token.position = position
+ token.text = text.substring(pos_start, pos-pos_start+1)
+ res.push token
+ break
+ end
+ if not last_state.is_ignored then
+ var position = new Position(pos_start, pos_end, line_start, line_end, col_start, col_end)
+ var token = last_state.make_token(position, text)
+ if token != null then res.push(token)
+ end
end
- var position = new Position(pos_start, pos_end, line_start, line_end, col_start, col_end)
- var token = last_state.make_token(position, text.substring(pos_start, pos_end-pos_start+1))
- if token != null then res.add(token)
if pos >= length then
- token = new NEof
- position = new Position(pos, pos, line, line, col, col)
+ var token = new NEof
+ var position = new Position(pos, pos, line, line, col, col)
token.position = position
token.text = ""
- res.add token
+ res.push token
break
end
state = start_state
interface DFAState
fun is_accept: Bool do return false
fun trans(c: Char): nullable DFAState do return null
- fun make_token(position: Position, text: String): nullable NToken is abstract
+ fun make_token(position: Position, source: String): nullable NToken is abstract
+ fun is_ignored: Bool do return false
end
###
# Print a node (using to_s) on a line and recustively each children indented (with two spaces)
class TreePrinterVisitor
super Visitor
- var writer: OStream
+ var writer: Writer
private var indent = 0
- init(writer: OStream) do self.writer = writer
redef fun visit(n)
do
for i in [0..indent[ do writer.write(" ")
var line_end: Int
var col_start: Int
var col_end: Int
+
redef fun to_s do return "{line_start}:{col_start}-{line_end}:{col_end}"
+
+ # Extract the content from the given source
+ fun extract(source: String): String
+ do
+ return source.substring(pos_start, pos_end-pos_start+1)
+ end
+
+ # Get the lines covered by `self` and underline the target columns.
+ #
+ # This is useful for pretty printing errors or debug the output
+ #
+ # ~~~
+ # var src = "var Foo = new Array[Int]"
+ # var pos = new Position(0,0, 1, 1, 5, 8)
+ #
+ # assert pos.underline(src) == """
+ # var Foo = new Array[Int]
+ # ^^^"""
+ # ~~~
+ fun underline(source: Text): String
+ do
+ var res = new FlatBuffer
+
+ # All the concerned lines
+ var lines = source.split("\n")
+ for line in [line_start..line_end] do
+ res.append lines[line-1]
+ res.append "\n"
+ end
+
+ # Cover all columns, no matter their lines
+ var col_start = col_start.min(col_end)
+ var col_end = self.col_start.max(col_end)
+
+ # " ^^^^"
+ var ptr = " "*(col_start-1).max(0) + "^"*(col_end-col_start)
+ res.append ptr
+
+ return res.to_s
+ end
end
# A node of a syntactic tree
# The name of the node (as used in the grammar file)
fun node_name: String do return class_name
- # A point of view on the direct childrens of the node
+ # A point of view on the direct children of the node
fun children: SequenceRead[nullable Node] is abstract
+ # A point of view of a depth-first visit of all non-null children
+ var depth: Collection[Node] = new DephCollection(self) is lazy
+
# Visit all the children of the node with the visitor `v`
protected fun visit_children(v: Visitor)
do
end
# The position of the node in the input stream
- var position: nullable Position writable = null
+ var position: nullable Position = null is writable
# Produce a graphiz file for the syntaxtic tree rooted at `self`.
fun to_dot(filepath: String)
do
- var f = new OFStream.open(filepath)
+ var f = new FileWriter.open(filepath)
f.write("digraph g \{\n")
f.write("rankdir=BT;\n")
f.close
end
- private fun to_dot_visitor(f: OStream, a: Array[NToken])
+ private fun to_dot_visitor(f: Writer, a: Array[NToken])
do
f.write("n{object_id} [label=\"{node_name}\"];\n")
for x in children do
end
end
+private class DephCollection
+ super Collection[Node]
+ var node: Node
+ redef fun iterator do return new DephIterator([node].iterator)
+end
+
+private class DephIterator
+ super Iterator[Node]
+
+ var stack = new Array[Iterator[nullable Node]]
+
+ init(i: Iterator[nullable Node]) is old_style_init do
+ stack.push i
+ end
+
+ redef fun is_ok do return not stack.is_empty
+ redef fun item do return stack.last.item.as(not null)
+ redef fun next
+ do
+ var i = stack.last
+ stack.push i.item.children.iterator
+ i.next
+ while is_ok do
+ if not stack.last.is_ok then
+ stack.pop
+ continue
+ end
+ if stack.last.item == null then
+ stack.last.next
+ continue
+ end
+ return
+ end
+ end
+end
+
# A token produced by the lexer and used in a syntactic tree
abstract class NToken
super Node
end
# The text associated with the token
- var text: String writable = ""
+ var text: String = "" is writable
redef fun to_s do
var res = super
# All the partially built tree during parsing (aka the node_stack)
var error_tree = new Nodes[Node]
+
+ # The things unexpected
+ fun unexpected: String is abstract
+
+ # The things expected (if any)
+ var expected: nullable String = null
+
+ # The error message,using `expected` and `unexpected`
+ fun message: String
+ do
+ var exp = expected
+ var res = "Unexpected {unexpected}"
+ if exp != null then res += "; is acceptable instead: {exp}"
+ return res
+ end
end
# A lexer error as a token for the unexpected characted
class NLexerError
super NError
+
+ redef fun unexpected do return "character '{text.chars.first}'"
end
# A parser error linked to a unexpected token
class NParserError
super NError
+
# The unexpected token
- var token: nullable NToken
+ var token: nullable NToken = null
+
+ redef fun unexpected
+ do
+ var res = token.node_name
+ var text = token.text
+ if not text.is_empty and res != "'{text}'" then
+ res += " '{text.escape_to_c}'"
+ end
+ return res
+ end
end
# A hogeneous sequence of node, used to represent unbounded lists (and + modifier)
class Nodes[T: Node]
super Node
- redef fun children do return items
- var items = new Array[T]
+ redef var children: Array[T] = new Array[T]
end
-# A production with a specific, named and statically typed childrens
-class NProd
+# A production with a specific, named and statically typed children
+abstract class NProd
super Node
redef var children: SequenceRead[nullable Node] = new NProdChildren(self)
var filepath = args.shift
var text
if filepath == "-" then
- text = stdin.read_all
+ text = sys.stdin.read_all
else if filepath == "-e" then
if args.is_empty then
print "Error: -e need a text"
end
text = args.shift
else
- var f = new IFStream.open(filepath)
+ var f = new FileReader.open(filepath)
text = f.read_all
f.close
end
var tokout = "{name}.tokens.out"
print "TOKEN: {tokens.length} tokens (see {tokout})"
- var f = new OFStream.open(tokout)
+ var f = new FileWriter.open(tokout)
for t in tokens do
f.write "{t.to_s}\n"
end
var n = p.parse
var astout = "{name}.ast.out"
- f = new OFStream.open(astout)
+ f = new FileWriter.open(astout)
var tpv = new TreePrinterVisitor(f)
var astdotout = "{name}.ast.dot"
if n isa NError then
+ print "Syntax error: {n.message}"
print "ERROR: {n} (see {astout} and {astdotout})"
tpv.enter_visit(n)
n = n.error_tree
else
- print "ROOT: {n} (see {astout} and {astdotout})"
+ print "ROOT: {n}; {n.depth.length} nodes (see {astout} and {astdotout})"
end
tpv.enter_visit(n)
n.to_dot(astdotout)