parser: remove some remaining old style accessors
[nit.git] / src / parser / parser_work.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Internal algorithm and data structures for the Nit parser
16 module parser_work
17
18 intrude import parser_prod
19
20 # State of the parser automata as stored in the parser stack.
21 private class State
22 # The internal state number
23 var _state: Int
24
25 # The node stored with the state in the stack
26 var _nodes: nullable Object
27
28 init(state: Int, nodes: nullable Object)
29 do
30 _state = state
31 _nodes = nodes
32 end
33 end
34
35 class Parser
36 super TablesCapable
37 # Associated lexer
38 var _lexer: Lexer
39
40 # Stack of pushed states and productions
41 var _stack: Array[State]
42
43 # Position in the stack
44 var _stack_pos: Int
45
46 # Create a new parser based on a given lexer
47 init(lexer: Lexer)
48 do
49 _lexer = lexer
50 _stack = new Array[State]
51 _stack_pos = -1
52 build_reduce_table
53 end
54
55 # Do a transition in the automata
56 private fun go_to(index: Int): Int
57 do
58 var state = state
59 var low = 1
60 var high = parser_goto(index, 0) - 1
61
62 while low <= high do
63 var middle = (low + high) / 2
64 var subindex = middle * 2 + 1 # +1 because parser_goto(index, 0) is the length
65
66 var goal = parser_goto(index, subindex)
67 if state < goal then
68 high = middle - 1
69 else if state > goal then
70 low = middle + 1
71 else
72 return parser_goto(index, subindex+1)
73 end
74 end
75
76 return parser_goto(index, 2) # Default value
77 end
78
79 # Push someting in the state stack
80 private fun push(numstate: Int, list_node: nullable Object)
81 do
82 var pos = _stack_pos + 1
83 _stack_pos = pos
84 if pos < _stack.length then
85 var state = _stack[pos]
86 state._state = numstate
87 state._nodes = list_node
88 else
89 _stack.push(new State(numstate, list_node))
90 end
91 end
92
93 # The current state
94 private fun state: Int
95 do
96 return _stack[_stack_pos]._state
97 end
98
99 # Pop something from the stack state
100 private fun pop: nullable Object
101 do
102 var res = _stack[_stack_pos]._nodes
103 _stack_pos = _stack_pos -1
104 return res
105 end
106
107 # Build and return a full AST.
108 fun parse: Start
109 do
110 push(0, null)
111
112 var lexer = _lexer
113 loop
114 var token = lexer.peek
115 if token isa AError then
116 return new Start(null, token)
117 end
118
119 var state = self.state
120 var index = token.parser_index
121 var action_type = parser_action(state, 2)
122 var action_value = parser_action(state, 3)
123
124 var low = 1
125 var high = parser_action(state, 0) - 1
126
127 while low <= high do
128 var middle = (low + high) / 2
129 var subindex = middle * 3 + 1 # +1 because parser_action(state, 0) is the length
130
131 var goal = parser_action(state, subindex)
132 if index < goal then
133 high = middle - 1
134 else if index > goal then
135 low = middle + 1
136 else
137 action_type = parser_action(state, subindex+1)
138 action_value = parser_action(state, subindex+2)
139 break
140 end
141 end
142
143 if action_type == 0 then # SHIFT
144 push(action_value, lexer.next)
145 else if action_type == 1 then # REDUCE
146 _reduce_table[action_value].action(self)
147 else if action_type == 2 then # ACCEPT
148 var node2 = lexer.next
149 assert node2 isa EOF
150 var node1 = pop
151 assert node1 isa AModule
152 var node = new Start(node1, node2)
153 (new ComputeProdLocationVisitor).enter_visit(node)
154 return node
155 else if action_type == 3 then # ERROR
156 # skip injected tokens
157 while token._location == null do token = lexer.next
158 var node2 = new AParserError.init_parser_error("Syntax error: unexpected {token}.", token.location, token)
159 var node = new Start(null, node2)
160 return node
161 end
162 end
163 end
164
165 var _reduce_table: Array[ReduceAction]
166 private fun build_reduce_table is abstract
167 end
168
169 redef class Prod
170 # Location on the first token after the start of a production
171 # So outside the production for epilon production
172 var _first_location: nullable Location
173 end
174
175 # Find location of production nodes
176 # Uses existing token locations to infer location of productions.
177 private class ComputeProdLocationVisitor
178 super Visitor
179 # Currenlty visited productions that need a first token
180 var _need_first_prods: Array[Prod] = new Array[Prod]
181
182 # Already visited epsilon productions that waits something after them
183 var _need_after_epsilons: Array[Prod] = new Array[Prod]
184
185 # Location of the last visited token in the current production
186 var _last_location: nullable Location = null
187
188 redef fun visit(n: ANode)
189 do
190 if n isa Token then
191 var loc = n._location
192 if loc == null then return
193 _last_location = loc
194
195 # Add a first token to productions that need one
196 if not _need_first_prods.is_empty then
197 for no in _need_first_prods do
198 no._first_location = loc
199 end
200 _need_first_prods.clear
201 end
202
203 # Find location for already visited epsilon production that need one
204 if not _need_after_epsilons.is_empty then
205 var loco = new Location(loc.file, loc.line_start, loc.line_start, loc.column_start, loc.column_start)
206 for no in _need_after_epsilons do
207 no.location = loco
208 end
209 _need_after_epsilons.clear
210 end
211 else
212 assert n isa Prod
213 _need_first_prods.add(n)
214
215 n.visit_all(self)
216
217 var startl = n._first_location
218 if startl != null then
219 # Non-epsilon production
220 var endl = _last_location
221 assert endl != null
222
223 n.location = new Location(startl.file, startl.line_start, endl.line_end, startl.column_start, endl.column_end)
224
225 if not _need_after_epsilons.is_empty then
226 var loc = new Location(endl.file, endl.line_end, endl.line_end, endl.column_end, endl.column_end)
227 for no in _need_after_epsilons do
228 # Epsilon production that finishes the current non-epsilon production
229 no.location = loc
230 end
231 _need_after_epsilons.clear
232 end
233 else
234 # Epsilon production in the middle or that finishes a parent non-epsilon production
235 _need_after_epsilons.add(n)
236 end
237 end
238 end
239
240 init do end
241 end
242
243 # Each reduca action has its own class, this one is the root of the hierarchy.
244 private abstract class ReduceAction
245 fun action(p: Parser) is abstract
246 fun concat(l1, l2 : Array[Object]): Array[Object]
247 do
248 if l1.is_empty then return l2
249 l1.append(l2)
250 return l1
251 end
252 var _goto: Int
253 init(g: Int) do _goto = g
254 end