4d679086b940467b8a98316e9036115d56d5711d
[nit.git] / src / parser / parser_work.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Internal algorithm and data structures for the Nit parser
16 module parser_work
17
18 intrude import parser_prod
19
20 # State of the parser automata as stored in the parser stack.
21 private class State
22 # The internal state number
23 var state: Int
24
25 # The node stored with the state in the stack
26 var nodes: nullable Object
27
28 init(state: Int, nodes: nullable Object)
29 do
30 _state = state
31 _nodes = nodes
32 end
33 end
34
35 # The parser of the Nit language.
36 class Parser
37 super TablesCapable
38 # Associated lexer
39 var lexer: Lexer
40
41 # Stack of pushed states and productions
42 private var stack: Array[State]
43
44 # Position in the stack
45 private var stack_pos: Int
46
47 # Create a new parser based on a given lexer
48 init(lexer: Lexer)
49 do
50 _lexer = lexer
51 _stack = new Array[State]
52 _stack_pos = -1
53 build_reduce_table
54 end
55
56 # Do a transition in the automata
57 private fun go_to(index: Int): Int
58 do
59 var state = state
60 var low = 1
61 var high = parser_goto(index, 0) - 1
62
63 while low <= high do
64 var middle = (low + high) / 2
65 var subindex = middle * 2 + 1 # +1 because parser_goto(index, 0) is the length
66
67 var goal = parser_goto(index, subindex)
68 if state < goal then
69 high = middle - 1
70 else if state > goal then
71 low = middle + 1
72 else
73 return parser_goto(index, subindex+1)
74 end
75 end
76
77 return parser_goto(index, 2) # Default value
78 end
79
80 # Push someting in the state stack
81 private fun push(numstate: Int, list_node: nullable Object)
82 do
83 var pos = _stack_pos + 1
84 _stack_pos = pos
85 if pos < _stack.length then
86 var state = _stack[pos]
87 state._state = numstate
88 state._nodes = list_node
89 else
90 _stack.push(new State(numstate, list_node))
91 end
92 end
93
94 # The current state
95 private fun state: Int
96 do
97 return _stack[_stack_pos]._state
98 end
99
100 # Pop something from the stack state
101 private fun pop: nullable Object
102 do
103 var res = _stack[_stack_pos]._nodes
104 _stack_pos = _stack_pos -1
105 return res
106 end
107
108 # Build and return a full AST.
109 fun parse: Start
110 do
111 push(0, null)
112
113 var lexer = _lexer
114 loop
115 var token = lexer.peek
116 if token isa AError then
117 return new Start(null, token)
118 end
119
120 var state = self.state
121 var index = token.parser_index
122 var action_type = parser_action(state, 2)
123 var action_value = parser_action(state, 3)
124
125 var low = 1
126 var high = parser_action(state, 0) - 1
127
128 while low <= high do
129 var middle = (low + high) / 2
130 var subindex = middle * 3 + 1 # +1 because parser_action(state, 0) is the length
131
132 var goal = parser_action(state, subindex)
133 if index < goal then
134 high = middle - 1
135 else if index > goal then
136 low = middle + 1
137 else
138 action_type = parser_action(state, subindex+1)
139 action_value = parser_action(state, subindex+2)
140 break
141 end
142 end
143
144 if action_type == 0 then # SHIFT
145 push(action_value, lexer.next)
146 else if action_type == 1 then # REDUCE
147 _reduce_table[action_value].action(self)
148 else if action_type == 2 then # ACCEPT
149 var node2 = lexer.next
150 assert node2 isa EOF
151 var node1 = pop
152 assert node1 isa AModule
153 var node = new Start(node1, node2)
154 (new ComputeProdLocationVisitor).enter_visit(node)
155 return node
156 else if action_type == 3 then # ERROR
157 # skip injected tokens
158 while not isset token._location do token = lexer.next
159 var node2 = new AParserError.init_parser_error("Syntax error: unexpected {token}.", token.location, token)
160 var node = new Start(null, node2)
161 return node
162 end
163 end
164 end
165
166 private var reduce_table: Array[ReduceAction]
167 private fun build_reduce_table is abstract
168 end
169
170 redef class Prod
171 # Location on the first token after the start of a production
172 # So outside the production for epsilon production
173 var first_location: nullable Location
174
175 # Join the text of all visited tokens
176 fun collect_text: String
177 do
178 var v = new TextCollectorVisitor
179 v.enter_visit(self)
180 assert v.text != ""
181 return v.text
182 end
183 end
184
185 # Find location of production nodes
186 # Uses existing token locations to infer location of productions.
187 private class ComputeProdLocationVisitor
188 super Visitor
189 # Currently visited productions that need a first token
190 var need_first_prods = new Array[Prod]
191
192 # Already visited epsilon productions that waits something after them
193 var need_after_epsilons = new Array[Prod]
194
195 # Location of the last visited token in the current production
196 var last_location: nullable Location = null
197
198 redef fun visit(n: ANode)
199 do
200 if n isa Token then
201 if not isset n._location then return
202 var loc = n._location
203 _last_location = loc
204
205 # Add a first token to productions that need one
206 if not _need_first_prods.is_empty then
207 for no in _need_first_prods do
208 no._first_location = loc
209 end
210 _need_first_prods.clear
211 end
212
213 # Find location for already visited epsilon production that need one
214 if not _need_after_epsilons.is_empty then
215 var loco = new Location(loc.file, loc.line_start, loc.line_start, loc.column_start, loc.column_start)
216 for no in _need_after_epsilons do
217 no.location = loco
218 end
219 _need_after_epsilons.clear
220 end
221 else
222 assert n isa Prod
223 _need_first_prods.add(n)
224
225 n.visit_all(self)
226
227 var startl = n._first_location
228 if startl != null then
229 # Non-epsilon production
230 var endl = _last_location
231 assert endl != null
232
233 n.location = new Location(startl.file, startl.line_start, endl.line_end, startl.column_start, endl.column_end)
234
235 if not _need_after_epsilons.is_empty then
236 var loc = new Location(endl.file, endl.line_end, endl.line_end, endl.column_end, endl.column_end)
237 for no in _need_after_epsilons do
238 # Epsilon production that finishes the current non-epsilon production
239 no.location = loc
240 end
241 _need_after_epsilons.clear
242 end
243 else
244 # Epsilon production in the middle or that finishes a parent non-epsilon production
245 _need_after_epsilons.add(n)
246 end
247 end
248 end
249 end
250
251 private class TextCollectorVisitor
252 super Visitor
253 var text: String = ""
254 redef fun visit(n)
255 do
256 if n isa Token then text += n.text
257 n.visit_all(self)
258 end
259 end
260
261
262 # Each reduce action has its own class, this one is the root of the hierarchy.
263 private abstract class ReduceAction
264 fun action(p: Parser) is abstract
265 fun concat(l1, l2 : Array[Object]): Array[Object]
266 do
267 if l1.is_empty then return l2
268 l1.append(l2)
269 return l1
270 end
271 var goto: Int
272 init(g: Int) do _goto = g
273 end