-/* This file is part of NIT ( http://www.nitlanguage.org ).
- *
- * Copyright 2008 Jean Privat <jean@pryen.org>
- * Based on algorithms developped for ( http://www.sablecc.org/ ).
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+$ // This file is part of NIT ( http://www.nitlanguage.org ).
+$ //
+$ // Copyright 2008 Jean Privat <jean@pryen.org>
+$ // Based on algorithms developped for ( http://www.sablecc.org/ ).
+$ //
+$ // Licensed under the Apache License, Version 2.0 (the "License");
+$ // you may not use this file except in compliance with the License.
+$ // You may obtain a copy of the License at
+$ //
+$ // http://www.apache.org/licenses/LICENSE-2.0
+$ //
+$ // Unless required by applicable law or agreed to in writing, software
+$ // distributed under the License is distributed on an "AS IS" BASIS,
+$ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+$ // See the License for the specific language governing permissions and
+$ // limitations under the License.
$ template make_parser()
# State of the parser automata as stored in the parser stack.
private class State
# The internal state number
- readable writable attr _state: Int
+ readable writable var _state: Int
# The node stored with the state in the stack
- readable writable attr _nodes: Object
+ readable writable var _nodes: nullable Object
- init(state: Int, nodes: Object)
+ init(state: Int, nodes: nullable Object)
do
_state = state
_nodes = nodes
end
end
-redef class Parser
+class Parser
+special TablesCapable
# Associated lexer
- attr _lexer: Lexer
+ var _lexer: Lexer
# Stack of pushed states and productions
- attr _stack: Array[State]
+ var _stack: Array[State]
# Position in the stack
- attr _stack_pos: Int
+ var _stack_pos: Int
# Create a new parser based on a given lexer
init(lexer: Lexer)
_lexer = lexer
_stack = new Array[State]
_stack_pos = -1
- build_goto_table
- build_action_table
build_reduce_table
end
# Do a transition in the automata
- private meth go_to(index: Int): Int
+ private fun go_to(index: Int): Int
do
var state = state
- var table = _goto_table[index]
var low = 1
- var high = table.length/2 - 1
+ var high = parser_goto(index, 0) - 1
while low <= high do
var middle = (low + high) / 2
- var subindex = middle * 2
+ var subindex = middle * 2 + 1 # +1 because parser_goto(index, 0) is the length
- if state < table[subindex] then
+ var goal = parser_goto(index, subindex)
+ if state < goal then
high = middle - 1
- else if state > table[subindex] then
+ else if state > goal then
low = middle + 1
else
- return table[subindex + 1]
+ return parser_goto(index, subindex+1)
end
end
- return table[1] # Default value
+ return parser_goto(index, 2) # Default value
end
# Push someting in the state stack
- private meth push(numstate: Int, list_node: Object)
+ private fun push(numstate: Int, list_node: nullable Object)
do
var pos = _stack_pos + 1
_stack_pos = pos
end
# The current state
- private meth state: Int
+ private fun state: Int
do
return _stack[_stack_pos].state
end
# Pop something from the stack state
- private meth pop: Object
+ private fun pop: nullable Object
do
var res = _stack[_stack_pos].nodes
_stack_pos = _stack_pos -1
end
# Build and return a full AST.
- meth parse: Start
+ fun parse: Start
do
push(0, null)
- var ign: List[Token] = null
var lexer = _lexer
- while true do
+ loop
var token = lexer.peek
- var last_pos = token.pos
- var last_line = token.line
-
if token isa PError then
- assert token isa PError
return new Start(null, token)
end
var index = token.parser_index
- var table = _action_table[state]
- var action_type = table[1]
- var action_value = table[2]
+ var action_type = parser_action(state, 2)
+ var action_value = parser_action(state, 3)
var low = 1
- var high = table.length/3 - 1
+ var high = parser_action(state, 0) - 1
while low <= high do
var middle = (low + high) / 2
- var subindex = middle * 3
+ var subindex = middle * 3 + 1 # +1 because parser_action(state, 0) is the length
- if index < table[subindex] then
+ var goal = parser_action(state, subindex)
+ if index < goal then
high = middle - 1
- else if index > table[subindex] then
+ else if index > goal then
low = middle + 1
else
- action_type = table[subindex + 1]
- action_value = table[subindex + 2]
- high = low -1 # break
+ action_type = parser_action(state, subindex+1)
+ action_value = parser_action(state, subindex+2)
+ break
end
end
var node1 = pop
assert node1 isa ${/parser/prods/prod/@ename}
var node = new Start(node1, node2)
- (new SearchTokensVisitor).visit(node)
+ (new ComputeProdLocationVisitor).enter_visit(node)
return node
else if action_type == 3 then # ERROR
- var node2 = new PError.init_error(lexer.filename, last_line, last_pos, error_messages[errors[action_value]])
+ var node2 = new PError.init_error("Syntax error: unexpected token.", token.location)
var node = new Start(null, node2)
return node
end
end
- return null
end
- attr _reduce_table: Array[ReduceAction]
- private meth build_reduce_table
+ var _reduce_table: Array[ReduceAction]
+ private fun build_reduce_table
do
- _reduce_table = new Array[ReduceAction].with(
+ _reduce_table = new Array[ReduceAction].with_items(
$ foreach {rules/rule}
- new ReduceAction@index[-sep ','-]
+ new ReduceAction@index(@leftside)[-sep ','-]
$ end foreach
)
end
end
-# Find first and last tokens of production nodes
-private class SearchTokensVisitor
+redef class Prod
+ # Location on the first token after the start of a production
+ # So outside the production for epilon production
+ var _first_location: nullable Location
+
+ # Location of the last token before the end of a production
+ # So outside the production for epilon production
+ var _last_location: nullable Location
+end
+
+# Find location of production nodes
+# Uses existing token locations to infer location of productions.
+private class ComputeProdLocationVisitor
special Visitor
- attr _untokenned_nodes: Array[Prod]
- attr _last_token: Token
- redef meth visit(n: PNode)
+ # Currenlty visited productions that need a first token
+ var _need_first_prods: Array[Prod] = new Array[Prod]
+
+ # Already visited epsilon productions that waits something after them
+ var _need_after_epsilons: Array[Prod] = new Array[Prod]
+
+ # Already visited epsilon production that waits something before them
+ var _need_before_epsilons: Array[Prod] = new Array[Prod]
+
+ # Location of the last visited token in the current production
+ var _last_location: nullable Location = null
+
+ redef fun visit(n: nullable PNode)
do
- if n isa Token then
- assert n isa Token
- _last_token = n
- for no in _untokenned_nodes do
- no.first_token = n
+ if n == null then
+ return
+ else if n isa Token then
+ var loc = n.location
+ _last_location = loc
+
+ # Add a first token to productions that need one
+ for no in _need_first_prods do
+ no._first_location = loc
end
- _untokenned_nodes.clear
+ _need_first_prods.clear
+
+ # Find location for already visited epsilon production that need one
+ for no in _need_after_epsilons do
+ # Epsilon production that is in the middle of a non-epsilon production
+ # The epsilon production has both a token before and after it
+ var endl = loc
+ var startl = no._last_location
+ no.location = new Location(endl.file, startl.line_end, endl.line_start, startl.column_end, endl.column_start)
+ end
+ _need_after_epsilons.clear
else
assert n isa Prod
- _untokenned_nodes.add(n)
+ _need_first_prods.add(n)
+
+ var old_last = _last_location
+ _last_location = null
n.visit_all(self)
- n.last_token = _last_token
+ var endl = _last_location
+ if endl == null then _last_location = old_last
+
+ n._last_location = endl
+ var startl = n._first_location
+ if startl != null then
+ # Non-epsilon production
+ assert endl != null
+
+ n.location = new Location(startl.file, startl.line_start, endl.line_end, startl.column_start, endl.column_end)
+
+ for no in _need_before_epsilons do
+ # Epsilon production that starts the current non-epsilon production
+ #var startl = n.location
+ no.location = new Location(startl.file, startl.line_start, startl.line_start, startl.column_start, startl.column_start)
+ end
+ _need_before_epsilons.clear
+
+ for no in _need_after_epsilons do
+ # Epsilon production that finishes the current non-epsilon production
+ #var endl = n.location
+ no.location = new Location(endl.file, endl.line_end, endl.line_end, endl.column_end, endl.column_end)
+ end
+ _need_after_epsilons.clear
+ else
+ # No first token means epsilon production (or "throw all my tokens" production)
+ # So, it must be located it later
+ if endl == null then
+ # Epsilon production that starts a parent non-epsilon production
+ _need_before_epsilons.add(n)
+ else
+ # Epsilon production in the middle or that finishes a parent non-epsilon production
+ _need_after_epsilons.add(n)
+ end
+ end
end
end
- init
- do
- _untokenned_nodes = new Array[Prod]
- end
+
+ init do end
end
# Each reduca action has its own class, this one is the root of the hierarchy.
private abstract class ReduceAction
- meth action(p: Parser) is abstract
+ fun action(p: Parser) is abstract
+ fun concat(l1, l2 : Array[Object]): Array[Object]
+ do
+ if l1.is_empty then return l2
+ l1.append(l2)
+ return l1
+ end
end
$ foreach {rules/rule}
private class ReduceAction@index
special ReduceAction
- redef meth action(p: Parser)
+ redef fun action(p: Parser)
do
- var node_list: Object = null
+ var node_list: nullable Object = null
$ foreach {action}
$ choose
$ when {@cmd='POP'}
var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = p.pop
$ end
$ when {@cmd='FETCHLIST'}
- var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@from,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
+ var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@from,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
assert ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} isa Array[Object]
$ end
$ when {@cmd='FETCHNODE'}
var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@from,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
- assert ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} isa @etype
+ assert ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} isa nullable @etype
$ end
$ when {@cmd='ADDNODE'}
if ${translate(@node,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} != null then
end
$ end
$ when {@cmd='ADDLIST'}
- if ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} != null then
- if ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}.is_empty then
- ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
- else
- ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}.append(${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")})
- end
- end
+ ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = concat(${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}, ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")})
$ end
$ when {@cmd='MAKELIST'}
var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = new Array[Object]
$ end
$ when {@cmd='MAKENODE'}
- var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = new @etype.init_${translate(@etype,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}(
+ var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}: nullable @etype = new @etype.init_${translate(@etype,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}(
$ foreach {arg}
$ if @null
null[-sep ','-]
$ end
$ end choose
$ end foreach
- p.push(p.go_to(@leftside), node_list)
+ p.push(p.go_to(_goto), node_list)
end
-init do end
+ var _goto: Int
+ init(g: Int) do _goto = g
end
$ end foreach
$ end template
-$ template make_parser_tables()
-# Parser that build a full AST
-class Parser
- attr _action_table: Array[Array[Int]]
- private meth build_action_table
- do
- _action_table = once [
+$ template make_parser_table()
$ foreach {parser_data/action_table/row}
- action_table_row${position()}[-sep ','-]
+static int parser_action_row${position()}[] = {
+ ${count(action)},
+$ foreach {action}
+ @from, @action, @to[-sep ','-]
+$ end foreach
+};
$ end foreach
- ]
- end
+const int* const parser_action_table[] = {
$ foreach {parser_data/action_table/row}
- private meth action_table_row${position()}: Array[Int]
- do
- return [
-$ foreach {action}
- @from, @action, @to [-sep ','-]
-$ end foreach
- ]
- end
+ parser_action_row${position()}[-sep ','-]
$ end foreach
+};
- attr _goto_table: Array[Array[Int]]
- private meth build_goto_table
- do
- _goto_table = once [
$ foreach {parser_data/goto_table/row}
- [
+static int parser_goto_row${position()}[] = {
+ ${count(goto)},
$ foreach {goto}
- @from, @to [-sep ','-]
+ @from, @to[-sep ','-]
$ end foreach
- ] [-sep ','-]
+};
$ end foreach
- ]
- end
-
- private meth error_messages: Array[String]
- do
- return once [
-$ foreach {parser_data/error_messages/msg}
- "${sablecc:string2escaped_unicode(.)}" [-sep ','-]
-$ end
- ]
- end
- private meth errors: Array[Int]
- do
- return once [
- [-foreach {parser_data/errors/i}-]${.} [-sep ','-] [-end-]
- ]
- end
-end
+const int* const parser_goto_table[] = {
+$ foreach {parser_data/goto_table/row}
+ parser_goto_row${position()}[-sep ','-]
+$ end foreach
+};
$ end template