X-Git-Url: http://nitlanguage.org diff --git a/src/parser/xss/parser.xss b/src/parser/xss/parser.xss index 49c742f..56c1dfd 100644 --- a/src/parser/xss/parser.xss +++ b/src/parser/xss/parser.xss @@ -33,7 +33,7 @@ private class State end class Parser -special ParserTable + super TablesCapable # Associated lexer var _lexer: Lexer @@ -49,8 +49,6 @@ special ParserTable _lexer = lexer _stack = new Array[State] _stack_pos = -1 - build_goto_table - build_action_table build_reduce_table end @@ -58,24 +56,24 @@ special ParserTable private fun go_to(index: Int): Int do var state = state - var table = _goto_table[index] var low = 1 - var high = table.length/2 - 1 + var high = parser_goto(index, 0) - 1 while low <= high do var middle = (low + high) / 2 - var subindex = middle * 2 + var subindex = middle * 2 + 1 # +1 because parser_goto(index, 0) is the length - if state < table[subindex] then + var goal = parser_goto(index, subindex) + if state < goal then high = middle - 1 - else if state > table[subindex] then + else if state > goal then low = middle + 1 else - return table[subindex + 1] + return parser_goto(index, subindex+1) end end - return table[1] # Default value + return parser_goto(index, 2) # Default value end # Push someting in the state stack @@ -112,35 +110,32 @@ special ParserTable push(0, null) var lexer = _lexer - while true do + loop var token = lexer.peek - var last_pos = token.location.column_start - var last_line = token.location.line_start - if token isa PError then return new Start(null, token) end var index = token.parser_index - var table = _action_table[state] - var action_type = table[1] - var action_value = table[2] + var action_type = parser_action(state, 2) + var action_value = parser_action(state, 3) var low = 1 - var high = table.length/3 - 1 + var high = parser_action(state, 0) - 1 while low <= high do var middle = (low + high) / 2 - var subindex = middle * 3 + var subindex = middle * 3 + 1 # +1 because parser_action(state, 0) is the length - if index < table[subindex] then + var goal = parser_action(state, subindex) + if index < goal then high = middle - 1 - else if index > table[subindex] then + else if index > goal then low = middle + 1 else - action_type = table[subindex + 1] - action_value = table[subindex + 2] - high = low -1 # break + action_type = parser_action(state, subindex+1) + action_value = parser_action(state, subindex+2) + break end end @@ -154,16 +149,14 @@ special ParserTable var node1 = pop assert node1 isa ${/parser/prods/prod/@ename} var node = new Start(node1, node2) - (new SearchTokensVisitor).enter_visit(node) + (new ComputeProdLocationVisitor).enter_visit(node) return node else if action_type == 3 then # ERROR - var location = new Location(lexer.filename, last_line, last_line, last_pos, last_pos) - var node2 = new PError.init_error(error_messages[errors[action_value]],location) + var node2 = new PError.init_error("Syntax error: unexpected token.", token.location) var node = new Start(null, node2) return node end end - abort end var _reduce_table: Array[ReduceAction] @@ -171,62 +164,123 @@ special ParserTable do _reduce_table = new Array[ReduceAction].with_items( $ foreach {rules/rule} - new ReduceAction@index[-sep ','-] + new ReduceAction@index(@leftside)[-sep ','-] $ end foreach ) end end -# Find first and last tokens of production nodes -private class SearchTokensVisitor -special Visitor - var _untokenned_nodes: Array[Prod] - var _last_token: nullable Token = null +redef class Prod + # Location on the first token after the start of a production + # So outside the production for epilon production + var _first_location: nullable Location + + # Location of the last token before the end of a production + # So outside the production for epilon production + var _last_location: nullable Location +end + +# Find location of production nodes +# Uses existing token locations to infer location of productions. +private class ComputeProdLocationVisitor + super Visitor + # Currenlty visited productions that need a first token + var _need_first_prods: Array[Prod] = new Array[Prod] + + # Already visited epsilon productions that waits something after them + var _need_after_epsilons: Array[Prod] = new Array[Prod] + + # Already visited epsilon production that waits something before them + var _need_before_epsilons: Array[Prod] = new Array[Prod] + + # Location of the last visited token in the current production + var _last_location: nullable Location = null + redef fun visit(n: nullable PNode) do if n == null then return else if n isa Token then - _last_token = n - for no in _untokenned_nodes do - no.first_token = n + var loc = n.location + _last_location = loc + + # Add a first token to productions that need one + for no in _need_first_prods do + no._first_location = loc end - _untokenned_nodes.clear + _need_first_prods.clear + + # Find location for already visited epsilon production that need one + for no in _need_after_epsilons do + # Epsilon production that is in the middle of a non-epsilon production + # The epsilon production has both a token before and after it + var endl = loc + var startl = no._last_location + no.location = new Location(endl.file, startl.line_end, endl.line_start, startl.column_end, endl.column_start) + end + _need_after_epsilons.clear else assert n isa Prod - _untokenned_nodes.add(n) + _need_first_prods.add(n) + + var old_last = _last_location + _last_location = null n.visit_all(self) - n.last_token = _last_token - - if n.first_token != null then - var start_location = n.first_token.location - var end_location = _last_token.location - - if start_location != null and end_location != null then - var file = end_location.file - var line_start = start_location.line_start - var line_end = end_location.line_end - var column_start = start_location.column_start - var column_end = end_location.column_end - n.location = new Location(file, line_start, line_end, column_start, column_end) + var endl = _last_location + if endl == null then _last_location = old_last + + n._last_location = endl + var startl = n._first_location + if startl != null then + # Non-epsilon production + assert endl != null + + n.location = new Location(startl.file, startl.line_start, endl.line_end, startl.column_start, endl.column_end) + + for no in _need_before_epsilons do + # Epsilon production that starts the current non-epsilon production + #var startl = n.location + no.location = new Location(startl.file, startl.line_start, startl.line_start, startl.column_start, startl.column_start) + end + _need_before_epsilons.clear + + for no in _need_after_epsilons do + # Epsilon production that finishes the current non-epsilon production + #var endl = n.location + no.location = new Location(endl.file, endl.line_end, endl.line_end, endl.column_end, endl.column_end) + end + _need_after_epsilons.clear + else + # No first token means epsilon production (or "throw all my tokens" production) + # So, it must be located it later + if endl == null then + # Epsilon production that starts a parent non-epsilon production + _need_before_epsilons.add(n) + else + # Epsilon production in the middle or that finishes a parent non-epsilon production + _need_after_epsilons.add(n) end end end end - init - do - _untokenned_nodes = new Array[Prod] - end + + init do end end # Each reduca action has its own class, this one is the root of the hierarchy. private abstract class ReduceAction fun action(p: Parser) is abstract + fun concat(l1, l2 : Array[Object]): Array[Object] + do + if l1.is_empty then return l2 + l1.append(l2) + return l1 + end end $ foreach {rules/rule} private class ReduceAction@index -special ReduceAction + super ReduceAction redef fun action(p: Parser) do var node_list: nullable Object = null @@ -249,13 +303,7 @@ $ when {@cmd='ADDNODE'} end $ end $ when {@cmd='ADDLIST'} -# if ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} != null then - if ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}.is_empty then - ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} - else - ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}.append(${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}) - end -# end + ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = concat(${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}, ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}) $ end $ when {@cmd='MAKELIST'} var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = new Array[Object] @@ -283,67 +331,42 @@ $ when {@cmd='RETURNLIST'} $ end $ end choose $ end foreach - p.push(p.go_to(@leftside), node_list) + p.push(p.go_to(_goto), node_list) end -init do end + var _goto: Int + init(g: Int) do _goto = g end $ end foreach $ end template -$ template make_parser_tables() -# Parser that build a full AST -abstract class ParserTable - var _action_table: Array[Array[Int]] - private fun build_action_table - do - _action_table = once [ +$ template make_parser_table() $ foreach {parser_data/action_table/row} - action_table_row${position()}[-sep ','-] +static int parser_action_row${position()}[] = { + ${count(action)}, +$ foreach {action} + @from, @action, @to[-sep ','-] +$ end foreach +}; $ end foreach - ] - end +const int* const parser_action_table[] = { $ foreach {parser_data/action_table/row} - private fun action_table_row${position()}: Array[Int] - do - return [ -$ foreach {action} - @from, @action, @to[-sep ','-] -$ end foreach - ] - end + parser_action_row${position()}[-sep ','-] $ end foreach +}; - var _goto_table: Array[Array[Int]] - private fun build_goto_table - do - _goto_table = once [ $ foreach {parser_data/goto_table/row} - [ +static int parser_goto_row${position()}[] = { + ${count(goto)}, $ foreach {goto} - @from, @to[-sep ','-] + @from, @to[-sep ','-] $ end foreach - ][-sep ','-] +}; $ end foreach - ] - end - - private fun error_messages: Array[String] - do - return once [ -$ foreach {parser_data/error_messages/msg} - "${sablecc:string2escaped_unicode(.)}"[-sep ','-] -$ end - ] - end - private fun errors: Array[Int] - do - return once [ - [-foreach {parser_data/errors/i}-]${.}[-sep ','-][-end-] - ] - end - - init do end -end +const int* const parser_goto_table[] = { +$ foreach {parser_data/goto_table/row} + parser_goto_row${position()}[-sep ','-] +$ end foreach +}; $ end template