src: update AST client to handle qualified identifiers (no semantic change)
[nit.git] / src / parser / parser_work.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Internal algorithm and data structures for the Nit parser
16 module parser_work
17
18 intrude import parser_prod
19
20 # State of the parser automata as stored in the parser stack.
21 private class State
22 # The internal state number
23 var state: Int
24
25 # The node stored with the state in the stack
26 var nodes: nullable Object
27 end
28
29 # The parser of the Nit language.
30 class Parser
31 super TablesCapable
32 # Associated lexer
33 var lexer: Lexer
34
35 # Stack of pushed states and productions
36 private var stack = new Array[State]
37
38 # Position in the stack
39 private var stack_pos: Int = -1
40
41 init
42 do
43 build_reduce_table
44 end
45
46 # Do a transition in the automata
47 private fun go_to(index: Int): Int
48 do
49 var state = state
50 var low = 1
51 var high = parser_goto(index, 0) - 1
52
53 while low <= high do
54 var middle = (low + high) / 2
55 var subindex = middle * 2 + 1 # +1 because parser_goto(index, 0) is the length
56
57 var goal = parser_goto(index, subindex)
58 if state < goal then
59 high = middle - 1
60 else if state > goal then
61 low = middle + 1
62 else
63 return parser_goto(index, subindex+1)
64 end
65 end
66
67 return parser_goto(index, 2) # Default value
68 end
69
70 # Push someting in the state stack
71 private fun push(numstate: Int, list_node: nullable Object)
72 do
73 var pos = _stack_pos + 1
74 _stack_pos = pos
75 if pos < _stack.length then
76 var state = _stack[pos]
77 state._state = numstate
78 state._nodes = list_node
79 else
80 _stack.push(new State(numstate, list_node))
81 end
82 end
83
84 # The current state
85 private fun state: Int
86 do
87 return _stack[_stack_pos]._state
88 end
89
90 # Pop something from the stack state
91 private fun pop: nullable Object
92 do
93 var res = _stack[_stack_pos]._nodes
94 _stack_pos = _stack_pos -1
95 return res
96 end
97
98 # Build and return a full AST.
99 fun parse: Start
100 do
101 push(0, null)
102
103 var lexer = _lexer
104 loop
105 var token = lexer.peek
106 if token isa AError then
107 return new Start(null, token)
108 end
109
110 var state = self.state
111 var index = token.parser_index
112 var action_type = parser_action(state, 2)
113 var action_value = parser_action(state, 3)
114
115 var low = 1
116 var high = parser_action(state, 0) - 1
117
118 while low <= high do
119 var middle = (low + high) / 2
120 var subindex = middle * 3 + 1 # +1 because parser_action(state, 0) is the length
121
122 var goal = parser_action(state, subindex)
123 if index < goal then
124 high = middle - 1
125 else if index > goal then
126 low = middle + 1
127 else
128 action_type = parser_action(state, subindex+1)
129 action_value = parser_action(state, subindex+2)
130 break
131 end
132 end
133
134 if action_type == 0 then # SHIFT
135 push(action_value, lexer.next)
136 else if action_type == 1 then # REDUCE
137 _reduce_table[action_value].action(self)
138 else if action_type == 2 then # ACCEPT
139 var node2 = lexer.next
140 assert node2 isa EOF
141 var node1 = pop
142 assert node1 isa AModule
143 var node = new Start(node1, node2)
144 node2.parent = node
145 (new ComputeProdLocationVisitor(lexer.file.first_token)).enter_visit(node)
146 return node
147 else if action_type == 3 then # ERROR
148 # skip injected tokens
149 while not isset token._location do token = lexer.next
150 var node2 = new AParserError.init_parser_error("Syntax Error: unexpected {token}.", token.location, token)
151 var node = new Start(null, node2)
152 return node
153 end
154 end
155 end
156
157 private var reduce_table: Array[ReduceAction] is noinit
158 private fun build_reduce_table is abstract
159 end
160
161 redef class Prod
162 # Location on the first token after the start of a production
163 # So outside the production for epsilon production
164 var first_location: nullable Location
165
166 # Join the text of all visited tokens
167 fun collect_text: String
168 do
169 var v = new TextCollectorVisitor
170 v.enter_visit(self)
171 assert v.text != ""
172 return v.text
173 end
174 end
175
176 # Find location of production nodes
177 # Uses existing token locations to infer location of productions.
178 private class ComputeProdLocationVisitor
179 super Visitor
180
181 # The current (or starting) cursor on the token sequence used to collect loose tokens
182 var token: nullable Token
183
184 # Currently visited productions that need a first token
185 var need_first_prods = new Array[Prod]
186
187 # Already visited epsilon productions that waits something after them
188 var need_after_epsilons = new Array[Prod]
189
190 # The last visited token in the current production
191 var last_token: nullable Token = null
192
193 redef fun visit(n: ANode)
194 do
195 if n isa Token then
196 # Skip injected tokens
197 if not isset n._location then return
198
199 # Collect loose tokens (not in the AST) and attach them to token in the AST
200 var cursor = token
201 if n != cursor then
202 var lt = last_token
203 # In order, we have the tokens:
204 # * `lt` the previous visited token in the AST (if any)
205 # * then `cursor` the loose tokens to attach
206 # * then `n` the current visited token in the AST
207
208 # In the following, we advance `cursor` to add them to `lt.next_looses` or to `n.prev_looses`.
209 if lt != null then
210 var ltl = lt.location.line_end
211 # floating tokens on the same line of a AST-token follows it
212 while cursor != null and cursor != n and ltl == cursor.location.line_start do
213 cursor.is_loose = true
214 lt.next_looses.add cursor
215 cursor = cursor.next_token
216 end
217 end
218 # other loose tokens precede the next AST-token
219 while cursor != null and cursor != n do
220 cursor.is_loose = true
221 n.prev_looses.add cursor
222 cursor = cursor.next_token
223 end
224 end
225 token = n.next_token
226
227 var loc = n._location
228 _last_token = n
229
230 # Add a first token to productions that need one
231 if not _need_first_prods.is_empty then
232 for no in _need_first_prods do
233 no._first_location = loc
234 end
235 _need_first_prods.clear
236 end
237
238 # Find location for already visited epsilon production that need one
239 if not _need_after_epsilons.is_empty then
240 var loco = new Location(loc.file, loc.line_start, loc.line_start, loc.column_start, loc.column_start)
241 for no in _need_after_epsilons do
242 no.location = loco
243 end
244 _need_after_epsilons.clear
245 end
246 else
247 assert n isa Prod
248 _need_first_prods.add(n)
249
250 n.visit_all(self)
251
252 var startl = n._first_location
253 if startl != null then
254 # Non-epsilon production
255 var endl = _last_token.location
256
257 if startl == endl then
258 n.location = startl
259 else
260 n.location = new Location(startl.file, startl.line_start, endl.line_end, startl.column_start, endl.column_end)
261 end
262
263 if not _need_after_epsilons.is_empty then
264 var loc = new Location(endl.file, endl.line_end, endl.line_end, endl.column_end, endl.column_end)
265 for no in _need_after_epsilons do
266 # Epsilon production that finishes the current non-epsilon production
267 no.location = loc
268 end
269 _need_after_epsilons.clear
270 end
271 else
272 # Epsilon production in the middle or that finishes a parent non-epsilon production
273 _need_after_epsilons.add(n)
274 end
275 end
276 end
277 end
278
279 private class TextCollectorVisitor
280 super Visitor
281 var text: String = ""
282 redef fun visit(n)
283 do
284 if n isa Token then text += n.text
285 n.visit_all(self)
286 end
287 end
288
289
290 # Each reduce action has its own class, this one is the root of the hierarchy.
291 private abstract class ReduceAction
292 fun action(p: Parser) is abstract
293 fun concat(l1, l2 : Array[Object]): Array[Object]
294 do
295 if l1.is_empty then return l2
296 l1.append(l2)
297 return l1
298 end
299 var goto: Int
300 end
301
302 redef class AExpr
303
304 # Get `self` as a single identifier.
305 # Return null if not a single identifier.
306 fun as_id: nullable String
307 do
308 if self isa AMethidExpr then
309 return self.collect_text
310 end
311 if not self isa ACallExpr then return null
312 if not self.n_expr isa AImplicitSelfExpr then return null
313 if not self.n_args.n_exprs.is_empty then return null
314 return self.n_qid.n_id.text
315 end
316 end