Merge branch 'update_syntax' into next
[nit.git] / src / parser / xss / parser.xss
index 6fc5fa2..56c1dfd 100644 (file)
@@ -1,32 +1,31 @@
-/* This file is part of NIT ( http://www.nitlanguage.org ).
- *
- * Copyright 2008 Jean Privat <jean@pryen.org>
- * Based on algorithms developped for ( http://www.sablecc.org/ ).
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+$ // This file is part of NIT ( http://www.nitlanguage.org ).
+$ //
+$ // Copyright 2008 Jean Privat <jean@pryen.org>
+$ // Based on algorithms developped for ( http://www.sablecc.org/ ).
+$ //
+$ // Licensed under the Apache License, Version 2.0 (the "License");
+$ // you may not use this file except in compliance with the License.
+$ // You may obtain a copy of the License at
+$ //
+$ //     http://www.apache.org/licenses/LICENSE-2.0
+$ //
+$ // Unless required by applicable law or agreed to in writing, software
+$ // distributed under the License is distributed on an "AS IS" BASIS,
+$ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+$ // See the License for the specific language governing permissions and
+$ // limitations under the License.
 
 $ template make_parser()
 
 # State of the parser automata as stored in the parser stack.
 private class State
        # The internal state number
-       readable writable attr _state: Int
+       readable writable var _state: Int
 
        # The node stored with the state in the stack
-       readable writable attr _nodes: Object 
+       readable writable var _nodes: nullable Object
 
-       init(state: Int, nodes: Object)
+       init(state: Int, nodes: nullable Object)
        do
                _state = state
                _nodes = nodes
@@ -34,15 +33,15 @@ private class State
 end
 
 class Parser
-special ParserTable
+       super TablesCapable
        # Associated lexer
-       attr _lexer: Lexer
+       var _lexer: Lexer
 
        # Stack of pushed states and productions
-       attr _stack: Array[State]
+       var _stack: Array[State]
 
        # Position in the stack
-       attr _stack_pos: Int
+       var _stack_pos: Int
 
        # Create a new parser based on a given lexer
        init(lexer: Lexer)
@@ -50,37 +49,35 @@ special ParserTable
                _lexer = lexer
                _stack = new Array[State]
                _stack_pos = -1
-               build_goto_table
-               build_action_table
                build_reduce_table
        end
 
        # Do a transition in the automata
-       private meth go_to(index: Int): Int
+       private fun go_to(index: Int): Int
        do
                var state = state
-               var table = _goto_table[index]
                var low = 1
-               var high = table.length/2 - 1
+               var high = parser_goto(index, 0) - 1
 
                while low <= high do
                        var middle = (low + high) / 2
-                       var subindex = middle * 2
+                       var subindex = middle * 2 + 1 # +1 because parser_goto(index, 0) is the length
 
-                       if state < table[subindex] then
+                       var goal = parser_goto(index, subindex)
+                       if state < goal then
                                high = middle - 1
-                       else if state > table[subindex] then
+                       else if state > goal then
                                low = middle + 1
                        else
-                               return table[subindex + 1]
+                               return parser_goto(index, subindex+1)
                        end
                end
 
-               return table[1] # Default value
+               return parser_goto(index, 2) # Default value
        end
 
        # Push someting in the state stack
-       private meth push(numstate: Int, list_node: Object)
+       private fun push(numstate: Int, list_node: nullable Object)
        do
                var pos = _stack_pos + 1
                _stack_pos = pos
@@ -94,13 +91,13 @@ special ParserTable
        end
 
        # The current state
-       private meth state: Int
+       private fun state: Int
        do
                return _stack[_stack_pos].state
        end
 
        # Pop something from the stack state
-       private meth pop: Object
+       private fun pop: nullable Object
        do
                var res = _stack[_stack_pos].nodes
                _stack_pos = _stack_pos -1
@@ -108,42 +105,37 @@ special ParserTable
        end
 
        # Build and return a full AST.
-       meth parse: Start
+       fun parse: Start
        do
                push(0, null)
 
-               var ign: List[Token] = null
                var lexer = _lexer
-               while true do
+               loop
                        var token = lexer.peek
-                       var last_pos = token.pos
-                       var last_line = token.line
-
                        if token isa PError then
-                               assert token isa PError
                                return new Start(null, token)
                        end
 
                        var index = token.parser_index
-                       var table = _action_table[state]
-                       var action_type = table[1]
-                       var action_value = table[2]
+                       var action_type = parser_action(state, 2)
+                       var action_value = parser_action(state, 3)
 
                        var low = 1
-                       var high = table.length/3 - 1
+                       var high = parser_action(state, 0) - 1
 
                        while low <= high do
                                var middle = (low + high) / 2
-                               var subindex = middle * 3
+                               var subindex = middle * 3 + 1 # +1 because parser_action(state, 0) is the length
 
-                               if index < table[subindex] then
+                               var goal = parser_action(state, subindex)
+                               if index < goal then
                                        high = middle - 1
-                               else if index > table[subindex] then
+                               else if index > goal then
                                        low = middle + 1
                                else
-                                       action_type = table[subindex + 1]
-                                       action_value = table[subindex + 2]
-                                       high = low -1 # break
+                                       action_type = parser_action(state, subindex+1)
+                                       action_value = parser_action(state, subindex+2)
+                                       break
                                end
                        end
 
@@ -157,78 +149,153 @@ special ParserTable
                                var node1 = pop
                                assert node1 isa ${/parser/prods/prod/@ename}
                                var node = new Start(node1, node2)
-                               (new SearchTokensVisitor).visit(node)
+                               (new ComputeProdLocationVisitor).enter_visit(node)
                                return node
                        else if action_type == 3 then # ERROR
-                               var node2 = new PError.init_error(lexer.filename, last_line, last_pos, error_messages[errors[action_value]])
+                               var node2 = new PError.init_error("Syntax error: unexpected token.", token.location)
                                var node = new Start(null, node2)
                                return node
                        end
                end
-               return null
        end
 
-       attr _reduce_table: Array[ReduceAction]
-       private meth build_reduce_table
+       var _reduce_table: Array[ReduceAction]
+       private fun build_reduce_table
        do
-               _reduce_table = new Array[ReduceAction].with(
+               _reduce_table = new Array[ReduceAction].with_items(
 $ foreach {rules/rule}
-                       new ReduceAction@index[-sep ','-]
+                       new ReduceAction@index(@leftside)[-sep ','-]
 $ end foreach
                )
        end
 end
 
-# Find first and last tokens of production nodes
-private class SearchTokensVisitor
-special Visitor
-       attr _untokenned_nodes: Array[Prod]
-       attr _last_token: Token
-       redef meth visit(n: PNode)
+redef class Prod
+       # Location on the first token after the start of a production
+       # So outside the production for epilon production
+       var _first_location: nullable Location
+
+       # Location of the last token before the end of a production
+       # So outside the production for epilon production
+       var _last_location: nullable Location
+end
+
+# Find location of production nodes
+# Uses existing token locations to infer location of productions.
+private class ComputeProdLocationVisitor
+       super Visitor
+       # Currenlty visited productions that need a first token
+       var _need_first_prods: Array[Prod] = new Array[Prod]
+
+       # Already visited epsilon productions that waits something after them
+       var _need_after_epsilons: Array[Prod] = new Array[Prod]
+
+       # Already visited epsilon production that waits something before them
+       var _need_before_epsilons: Array[Prod] = new Array[Prod]
+
+       # Location of the last visited token in the current production
+       var _last_location: nullable Location = null
+
+       redef fun visit(n: nullable PNode)
        do
-               if n isa Token then
-                       assert n isa Token
-                       _last_token = n
-                       for no in _untokenned_nodes do
-                               no.first_token = n
+               if n == null then
+                       return
+               else if n isa Token then
+                       var loc = n.location
+                       _last_location = loc
+
+                       # Add a first token to productions that need one
+                       for no in _need_first_prods do
+                               no._first_location = loc
+                       end
+                       _need_first_prods.clear
+
+                       # Find location for already visited epsilon production that need one
+                       for no in _need_after_epsilons do
+                               # Epsilon production that is in the middle of a non-epsilon production
+                               # The epsilon production has both a token before and after it
+                               var endl = loc
+                               var startl = no._last_location
+                               no.location = new Location(endl.file, startl.line_end, endl.line_start, startl.column_end, endl.column_start)
                        end
-                       _untokenned_nodes.clear
+                       _need_after_epsilons.clear
                else
                        assert n isa Prod
-                       _untokenned_nodes.add(n)
+                       _need_first_prods.add(n)
+
+                       var old_last = _last_location
+                       _last_location = null
                        n.visit_all(self)
-                       n.last_token = _last_token
+                       var endl = _last_location
+                       if endl == null then _last_location = old_last
+
+                       n._last_location = endl
+                       var startl = n._first_location
+                       if startl != null then
+                               # Non-epsilon production
+                               assert endl != null
+
+                               n.location = new Location(startl.file, startl.line_start, endl.line_end, startl.column_start, endl.column_end)
+
+                               for no in _need_before_epsilons do
+                                       # Epsilon production that starts the current non-epsilon production
+                                       #var startl = n.location
+                                       no.location = new Location(startl.file, startl.line_start, startl.line_start, startl.column_start, startl.column_start)
+                               end
+                               _need_before_epsilons.clear
+
+                               for no in _need_after_epsilons do
+                                       # Epsilon production that finishes the current non-epsilon production
+                                       #var endl = n.location
+                                       no.location = new Location(endl.file, endl.line_end, endl.line_end, endl.column_end, endl.column_end)
+                               end
+                               _need_after_epsilons.clear
+                       else
+                               # No first token means epsilon production (or "throw all my tokens" production)
+                               # So, it must be located it later
+                               if endl == null then
+                                       # Epsilon production that starts a parent non-epsilon production
+                                       _need_before_epsilons.add(n)
+                               else
+                                       # Epsilon production in the middle or that finishes a parent non-epsilon production
+                                       _need_after_epsilons.add(n)
+                               end
+                       end
                end
        end
-       init
-       do
-               _untokenned_nodes = new Array[Prod]
-       end
+
+       init do end
 end
 
 # Each reduca action has its own class, this one is the root of the hierarchy.
 private abstract class ReduceAction
-       meth action(p: Parser) is abstract
+       fun action(p: Parser) is abstract
+       fun concat(l1, l2 : Array[Object]): Array[Object]
+       do
+               if l1.is_empty then return l2
+               l1.append(l2)
+               return l1
+       end
 end
 
 $ foreach {rules/rule}
 private class ReduceAction@index
-special ReduceAction
-       redef meth action(p: Parser)
+       super ReduceAction
+       redef fun action(p: Parser)
        do
-                                       var node_list: Object = null
+                                       var node_list: nullable Object = null
 $   foreach {action}
 $   choose
 $     when {@cmd='POP'}
                                        var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = p.pop
 $     end
 $     when {@cmd='FETCHLIST'}
-                                       var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@from,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} 
+                                       var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@from,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
                                        assert ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} isa Array[Object]
 $     end
 $     when {@cmd='FETCHNODE'}
                                        var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@from,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
-                                       assert ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} isa @etype
+                                       assert ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} isa nullable @etype
 $     end
 $     when {@cmd='ADDNODE'}
                                        if ${translate(@node,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} != null then
@@ -236,19 +303,13 @@ $     when {@cmd='ADDNODE'}
                                        end
 $     end
 $     when {@cmd='ADDLIST'}
-                                       if ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} != null then
-                                               if ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}.is_empty then
-                                                       ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
-                                               else
-                                                       ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}.append(${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")})
-                                               end
-                                       end
+                                       ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = concat(${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}, ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")})
 $     end
 $     when {@cmd='MAKELIST'}
                                        var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = new Array[Object]
 $     end
 $     when {@cmd='MAKENODE'}
-                                       var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = new @etype.init_${translate(@etype,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}(
+                                       var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}: nullable @etype = new @etype.init_${translate(@etype,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}(
 $       foreach {arg}
 $           if @null
                                                null[-sep ','-]
@@ -270,65 +331,42 @@ $     when {@cmd='RETURNLIST'}
 $     end
 $   end choose
 $   end foreach
-                                       p.push(p.go_to(@leftside), node_list)
+                                       p.push(p.go_to(_goto), node_list)
        end
-init do end
+       var _goto: Int
+       init(g: Int) do _goto = g
 end
 $ end foreach
 $ end template
 
-$ template make_parser_tables()
-# Parser that build a full AST
-abstract class ParserTable
-       attr _action_table: Array[Array[Int]] = null
-       private meth build_action_table
-       do
-               _action_table = once [ 
+$ template make_parser_table()
 $ foreach {parser_data/action_table/row}
-                       action_table_row${position()}[-sep ','-]
+static int parser_action_row${position()}[] = {
+       ${count(action)},
+$   foreach {action}
+       @from, @action, @to[-sep ','-]
+$   end foreach
+};
 $ end foreach
-               ]
-       end
 
+const int* const parser_action_table[] = {
 $ foreach {parser_data/action_table/row}
-       private meth action_table_row${position()}: Array[Int]
-       do
-               return [
-$   foreach {action}
-                               @from, @action, @to [-sep ','-]
-$   end foreach
-                       ]
-       end
+       parser_action_row${position()}[-sep ','-]
 $ end foreach
+};
 
-       attr _goto_table: Array[Array[Int]] = null
-       private meth build_goto_table
-       do
-               _goto_table = once [ 
 $ foreach {parser_data/goto_table/row}
-                       [
+static int parser_goto_row${position()}[] = {
+       ${count(goto)},
 $   foreach {goto}
-                               @from, @to [-sep ','-]
+       @from, @to[-sep ','-]
 $   end foreach
-                       ] [-sep ','-]
+};
 $ end foreach
-               ]
-       end
-
-       private meth error_messages: Array[String]
-       do
-               return once [
-$ foreach {parser_data/error_messages/msg}
-                       "${sablecc:string2escaped_unicode(.)}" [-sep ','-]
-$ end
-               ]
-       end
 
-       private meth errors: Array[Int]
-       do
-               return once [
-                       [-foreach {parser_data/errors/i}-]${.} [-sep ','-] [-end-]
-               ]
-       end
-end
+const int* const parser_goto_table[] = {
+$ foreach {parser_data/goto_table/row}
+       parser_goto_row${position()}[-sep ','-]
+$ end foreach
+};
 $ end template