Merge: parser: do not allocate a reduction table for each parser
[nit.git] / src / parser / xss / parser.xss
index c72018f..5b1d60e 100644 (file)
@@ -17,274 +17,20 @@ $ // limitations under the License.
 
 $ template make_parser()
 
-# State of the parser automata as stored in the parser stack.
-private class State
-       # The internal state number
-       readable writable var _state: Int
-
-       # The node stored with the state in the stack
-       readable writable var _nodes: nullable Object
-
-       init(state: Int, nodes: nullable Object)
-       do
-               _state = state
-               _nodes = nodes
-       end
-end
-
-class Parser
-special ParserTable
-       # Associated lexer
-       var _lexer: Lexer
-
-       # Stack of pushed states and productions
-       var _stack: Array[State]
-
-       # Position in the stack
-       var _stack_pos: Int
-
-       # Create a new parser based on a given lexer
-       init(lexer: Lexer)
-       do
-               _lexer = lexer
-               _stack = new Array[State]
-               _stack_pos = -1
-               build_goto_table
-               build_action_table
-               build_reduce_table
-       end
-
-       # Do a transition in the automata
-       private fun go_to(index: Int): Int
-       do
-               var state = state
-               var table = _goto_table[index]
-               var low = 1
-               var high = table.length/2 - 1
-
-               while low <= high do
-                       var middle = (low + high) / 2
-                       var subindex = middle * 2
-
-                       if state < table[subindex] then
-                               high = middle - 1
-                       else if state > table[subindex] then
-                               low = middle + 1
-                       else
-                               return table[subindex + 1]
-                       end
-               end
-
-               return table[1] # Default value
-       end
-
-       # Push someting in the state stack
-       private fun push(numstate: Int, list_node: nullable Object)
-       do
-               var pos = _stack_pos + 1
-               _stack_pos = pos
-               if pos < _stack.length then
-                       var state = _stack[pos]
-                       state.state = numstate
-                       state.nodes = list_node
-               else
-                       _stack.push(new State(numstate, list_node))
-               end
-       end
-
-       # The current state
-       private fun state: Int
-       do
-               return _stack[_stack_pos].state
-       end
-
-       # Pop something from the stack state
-       private fun pop: nullable Object
-       do
-               var res = _stack[_stack_pos].nodes
-               _stack_pos = _stack_pos -1
-               return res
-       end
-
-       # Build and return a full AST.
-       fun parse: Start
-       do
-               push(0, null)
-
-               var lexer = _lexer
-               loop
-                       var token = lexer.peek
-                       if token isa PError then
-                               return new Start(null, token)
-                       end
-
-                       var index = token.parser_index
-                       var table = _action_table[state]
-                       var action_type = table[1]
-                       var action_value = table[2]
-
-                       var low = 1
-                       var high = table.length/3 - 1
-
-                       while low <= high do
-                               var middle = (low + high) / 2
-                               var subindex = middle * 3
-
-                               if index < table[subindex] then
-                                       high = middle - 1
-                               else if index > table[subindex] then
-                                       low = middle + 1
-                               else
-                                       action_type = table[subindex + 1]
-                                       action_value = table[subindex + 2]
-                                       high = low -1 # break
-                               end
-                       end
-
-                       if action_type == 0 then # SHIFT
-                               push(action_value, lexer.next)
-                       else if action_type == 1 then # REDUCE
-                               _reduce_table[action_value].action(self)
-                       else if action_type == 2 then # ACCEPT
-                               var node2 = lexer.next
-                               assert node2 isa EOF
-                               var node1 = pop
-                               assert node1 isa ${/parser/prods/prod/@ename}
-                               var node = new Start(node1, node2)
-                               (new ComputeProdLocationVisitor).enter_visit(node)
-                               return node
-                       else if action_type == 3 then # ERROR
-                               var node2 = new PError.init_error("Syntax error: unexpected token.", token.location)
-                               var node = new Start(null, node2)
-                               return node
-                       end
-                       if false then break # FIXME remove once unreach loop exits are in c_src
-               end
-               abort # FIXME remove once unreach loop exits are in c_src
-       end
-
-       var _reduce_table: Array[ReduceAction]
-       private fun build_reduce_table
+redef class Parser
+       redef fun build_reduce_table
        do
-               _reduce_table = new Array[ReduceAction].with_items(
+               var reduce_table = new Array[ReduceAction].with_capacity(${count(rules/rule)})
 $ foreach {rules/rule}
-                       new ReduceAction@index[-sep ','-]
+               reduce_table.add new ReduceAction@index(@leftside)
 $ end foreach
-               )
-       end
-end
-
-redef class Prod
-       # Location on the first token after the start of a production
-       # So outside the production for epilon production
-       var _first_location: nullable Location
-
-       # Location of the last token before the end of a production
-       # So outside the production for epilon production
-       var _last_location: nullable Location
-end
-
-# Find location of production nodes
-# Uses existing token locations to infer location of productions.
-private class ComputeProdLocationVisitor
-special Visitor
-       # Currenlty visited productions that need a first token
-       var _need_first_prods: Array[Prod] = new Array[Prod]
-
-       # Already visited epsilon productions that waits something after them
-       var _need_after_epsilons: Array[Prod] = new Array[Prod]
-
-       # Already visited epsilon production that waits something before them
-       var _need_before_epsilons: Array[Prod] = new Array[Prod]
-
-       # Location of the last visited token in the current production
-       var _last_location: nullable Location = null
-
-       redef fun visit(n: nullable PNode)
-       do
-               if n == null then
-                       return
-               else if n isa Token then
-                       var loc = n.location
-                       _last_location = loc
-
-                       # Add a first token to productions that need one
-                       for no in _need_first_prods do
-                               no._first_location = loc
-                       end
-                       _need_first_prods.clear
-
-                       # Find location for already visited epsilon production that need one
-                       for no in _need_after_epsilons do
-                               # Epsilon production that is in the middle of a non-epsilon production
-                               # The epsilon production has both a token before and after it
-                               var endl = loc
-                               var startl = no._last_location
-                               no.location = new Location(endl.file, startl.line_end, endl.line_start, startl.column_end, endl.column_start)
-                       end
-                       _need_after_epsilons.clear
-               else
-                       assert n isa Prod
-                       _need_first_prods.add(n)
-
-                       var old_last = _last_location
-                       _last_location = null
-                       n.visit_all(self)
-                       var endl = _last_location
-                       if endl == null then _last_location = old_last
-
-                       n._last_location = endl
-                       var startl = n._first_location
-                       if startl != null then
-                               # Non-epsilon production
-                               assert endl != null
-
-                               n.location = new Location(startl.file, startl.line_start, endl.line_end, startl.column_start, endl.column_end)
-
-                               for no in _need_before_epsilons do
-                                       # Epsilon production that starts the current non-epsilon production
-                                       #var startl = n.location
-                                       no.location = new Location(startl.file, startl.line_start, startl.line_start, startl.column_start, startl.column_start)
-                               end
-                               _need_before_epsilons.clear
-
-                               for no in _need_after_epsilons do
-                                       # Epsilon production that finishes the current non-epsilon production
-                                       #var endl = n.location
-                                       no.location = new Location(endl.file, endl.line_end, endl.line_end, endl.column_end, endl.column_end)
-                               end
-                               _need_after_epsilons.clear
-                       else
-                               # No first token means epsilon production (or "throw all my tokens" production)
-                               # So, it must be located it later
-                               if endl == null then
-                                       # Epsilon production that starts a parent non-epsilon production
-                                       _need_before_epsilons.add(n)
-                               else
-                                       # Epsilon production in the middle or that finishes a parent non-epsilon production
-                                       _need_after_epsilons.add(n)
-                               end
-                       end
-               end
-       end
-
-       init do end
-end
-
-# Each reduca action has its own class, this one is the root of the hierarchy.
-private abstract class ReduceAction
-       fun action(p: Parser) is abstract
-       fun concat(l1, l2 : Array[Object]): Array[Object]
-       do
-               if l1.is_empty then return l2
-               l1.append(l2)
-               return l1
+               return reduce_table
        end
 end
 
 $ foreach {rules/rule}
 private class ReduceAction@index
-special ReduceAction
+       super ReduceAction
        redef fun action(p: Parser)
        do
                                        var node_list: nullable Object = null
@@ -313,6 +59,7 @@ $     when {@cmd='MAKELIST'}
                                        var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = new Array[Object]
 $     end
 $     when {@cmd='MAKENODE'}
+$      if {count(arg)!=0}
                                        var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}: nullable @etype = new @etype.init_${translate(@etype,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}(
 $       foreach {arg}
 $           if @null
@@ -322,6 +69,9 @@ $           else
 $           end
 $       end foreach
                                        )
+$      else
+                                       var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}: nullable @etype = new @etype.init_${translate(@etype,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
+$      end
 $     end
 $     when {@cmd='RETURNNODE'}
 $       if @null
@@ -335,51 +85,40 @@ $     when {@cmd='RETURNLIST'}
 $     end
 $   end choose
 $   end foreach
-                                       p.push(p.go_to(@leftside), node_list)
+                                       p.push(p.go_to(_goto), node_list)
        end
-init do end
 end
 $ end foreach
 $ end template
 
-$ template make_parser_tables()
-# Parser that build a full AST
-abstract class ParserTable
-       var _action_table: Array[Array[Int]]
-       private fun build_action_table
-       do
-               _action_table = once [
+$ template make_parser_table()
 $ foreach {parser_data/action_table/row}
-                       action_table_row${position()}[-sep ','-]
+static int parser_action_row${position()}[] = {
+       ${count(action)},
+$   foreach {action}
+       @from, @action, @to[-sep ','-]
+$   end foreach
+};
 $ end foreach
-               ]
-       end
 
+const int* const parser_action_table[] = {
 $ foreach {parser_data/action_table/row}
-       private fun action_table_row${position()}: Array[Int]
-       do
-               return [
-$   foreach {action}
-                               @from, @action, @to[-sep ','-]
-$   end foreach
-                       ]
-       end
+       parser_action_row${position()}[-sep ','-]
 $ end foreach
+};
 
-       var _goto_table: Array[Array[Int]]
-       private fun build_goto_table
-       do
-               _goto_table = once [
 $ foreach {parser_data/goto_table/row}
-                       [
+static int parser_goto_row${position()}[] = {
+       ${count(goto)},
 $   foreach {goto}
-                               @from, @to[-sep ','-]
+       @from, @to[-sep ','-]
 $   end foreach
-                       ][-sep ','-]
+};
 $ end foreach
-               ]
-       end
 
-       init do end
-end
+const int* const parser_goto_table[] = {
+$ foreach {parser_data/goto_table/row}
+       parser_goto_row${position()}[-sep ','-]
+$ end foreach
+};
 $ end template