ca423509a714ef586d0c342dec08b2d93a923fc7
[nit.git] / src / parser / xss / parser.xss
1 $ // This file is part of NIT ( http://www.nitlanguage.org ).
2 $ //
3 $ // Copyright 2008 Jean Privat <jean@pryen.org>
4 $ // Based on algorithms developped for ( http://www.sablecc.org/ ).
5 $ //
6 $ // Licensed under the Apache License, Version 2.0 (the "License");
7 $ // you may not use this file except in compliance with the License.
8 $ // You may obtain a copy of the License at
9 $ //
10 $ //     http://www.apache.org/licenses/LICENSE-2.0
11 $ //
12 $ // Unless required by applicable law or agreed to in writing, software
13 $ // distributed under the License is distributed on an "AS IS" BASIS,
14 $ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 $ // See the License for the specific language governing permissions and
16 $ // limitations under the License.
17
18 $ template make_parser()
19
20 # State of the parser automata as stored in the parser stack.
21 private class State
22         # The internal state number
23         readable writable var _state: Int
24
25         # The node stored with the state in the stack
26         readable writable var _nodes: nullable Object
27
28         init(state: Int, nodes: nullable Object)
29         do
30                 _state = state
31                 _nodes = nodes
32         end
33 end
34
35 class Parser
36 special ParserTable
37         # Associated lexer
38         var _lexer: Lexer
39
40         # Stack of pushed states and productions
41         var _stack: Array[State]
42
43         # Position in the stack
44         var _stack_pos: Int
45
46         # Create a new parser based on a given lexer
47         init(lexer: Lexer)
48         do
49                 _lexer = lexer
50                 _stack = new Array[State]
51                 _stack_pos = -1
52                 build_goto_table
53                 build_action_table
54                 build_reduce_table
55         end
56
57         # Do a transition in the automata
58         private fun go_to(index: Int): Int
59         do
60                 var state = state
61                 var table = _goto_table[index]
62                 var low = 1
63                 var high = table.length/2 - 1
64
65                 while low <= high do
66                         var middle = (low + high) / 2
67                         var subindex = middle * 2
68
69                         if state < table[subindex] then
70                                 high = middle - 1
71                         else if state > table[subindex] then
72                                 low = middle + 1
73                         else
74                                 return table[subindex + 1]
75                         end
76                 end
77
78                 return table[1] # Default value
79         end
80
81         # Push someting in the state stack
82         private fun push(numstate: Int, list_node: nullable Object)
83         do
84                 var pos = _stack_pos + 1
85                 _stack_pos = pos
86                 if pos < _stack.length then
87                         var state = _stack[pos]
88                         state.state = numstate
89                         state.nodes = list_node
90                 else
91                         _stack.push(new State(numstate, list_node))
92                 end
93         end
94
95         # The current state
96         private fun state: Int
97         do
98                 return _stack[_stack_pos].state
99         end
100
101         # Pop something from the stack state
102         private fun pop: nullable Object
103         do
104                 var res = _stack[_stack_pos].nodes
105                 _stack_pos = _stack_pos -1
106                 return res
107         end
108
109         # Build and return a full AST.
110         fun parse: Start
111         do
112                 push(0, null)
113
114                 var lexer = _lexer
115                 while true do
116                         var token = lexer.peek
117                         if token isa PError then
118                                 return new Start(null, token)
119                         end
120
121                         var index = token.parser_index
122                         var table = _action_table[state]
123                         var action_type = table[1]
124                         var action_value = table[2]
125
126                         var low = 1
127                         var high = table.length/3 - 1
128
129                         while low <= high do
130                                 var middle = (low + high) / 2
131                                 var subindex = middle * 3
132
133                                 if index < table[subindex] then
134                                         high = middle - 1
135                                 else if index > table[subindex] then
136                                         low = middle + 1
137                                 else
138                                         action_type = table[subindex + 1]
139                                         action_value = table[subindex + 2]
140                                         high = low -1 # break
141                                 end
142                         end
143
144                         if action_type == 0 then # SHIFT
145                                 push(action_value, lexer.next)
146                         else if action_type == 1 then # REDUCE
147                                 _reduce_table[action_value].action(self)
148                         else if action_type == 2 then # ACCEPT
149                                 var node2 = lexer.next
150                                 assert node2 isa EOF
151                                 var node1 = pop
152                                 assert node1 isa ${/parser/prods/prod/@ename}
153                                 var node = new Start(node1, node2)
154                                 (new ComputeProdLocationVisitor).enter_visit(node)
155                                 return node
156                         else if action_type == 3 then # ERROR
157                                 var node2 = new PError.init_error("Syntax error: unexpected token.", token.location)
158                                 var node = new Start(null, node2)
159                                 return node
160                         end
161                 end
162                 abort
163         end
164
165         var _reduce_table: Array[ReduceAction]
166         private fun build_reduce_table
167         do
168                 _reduce_table = new Array[ReduceAction].with_items(
169 $ foreach {rules/rule}
170                         new ReduceAction@index[-sep ','-]
171 $ end foreach
172                 )
173         end
174 end
175
176 redef class Prod
177         # Location on the first token after the start of a production
178         # So outside the production for epilon production
179         var _first_location: nullable Location
180
181         # Location of the last token before the end of a production
182         # So outside the production for epilon production
183         var _last_location: nullable Location
184 end
185
186 # Find location of production nodes
187 # Uses existing token locations to infer location of productions.
188 private class ComputeProdLocationVisitor
189 special Visitor
190         # Currenlty visited productions that need a first token
191         var _need_first_prods: Array[Prod] = new Array[Prod]
192
193         # Already visited epsilon productions that waits something after them
194         var _need_after_epsilons: Array[Prod] = new Array[Prod]
195
196         # Already visited epsilon production that waits something before them
197         var _need_before_epsilons: Array[Prod] = new Array[Prod]
198
199         # Location of the last visited token in the current production
200         var _last_location: nullable Location = null
201
202         redef fun visit(n: nullable PNode)
203         do
204                 if n == null then
205                         return
206                 else if n isa Token then
207                         var loc = n.location
208                         _last_location = loc
209
210                         # Add a first token to productions that need one
211                         for no in _need_first_prods do
212                                 no._first_location = loc
213                         end
214                         _need_first_prods.clear
215
216                         # Find location for already visited epsilon production that need one
217                         for no in _need_after_epsilons do
218                                 # Epsilon production that is in the middle of a non-epsilon production
219                                 # The epsilon production has both a token before and after it
220                                 var endl = loc
221                                 var startl = no._last_location
222                                 no.location = new Location(endl.file, startl.line_end, endl.line_start, startl.column_end, endl.column_start)
223                         end
224                         _need_after_epsilons.clear
225                 else
226                         assert n isa Prod
227                         _need_first_prods.add(n)
228
229                         var old_last = _last_location
230                         _last_location = null
231                         n.visit_all(self)
232                         var endl = _last_location
233                         if endl == null then _last_location = old_last
234
235                         n._last_location = endl
236                         var startl = n._first_location
237                         if startl != null then
238                                 # Non-epsilon production
239                                 assert endl != null
240
241                                 n.location = new Location(startl.file, startl.line_start, endl.line_end, startl.column_start, endl.column_end)
242
243                                 for no in _need_before_epsilons do
244                                         # Epsilon production that starts the current non-epsilon production
245                                         #var startl = n.location
246                                         no.location = new Location(startl.file, startl.line_start, startl.line_start, startl.column_start, startl.column_start)
247                                 end
248                                 _need_before_epsilons.clear
249
250                                 for no in _need_after_epsilons do
251                                         # Epsilon production that finishes the current non-epsilon production
252                                         #var endl = n.location
253                                         no.location = new Location(endl.file, endl.line_end, endl.line_end, endl.column_end, endl.column_end)
254                                 end
255                                 _need_after_epsilons.clear
256                         else
257                                 # No first token means epsilon production (or "throw all my tokens" production)
258                                 # So, it must be located it later
259                                 if endl == null then
260                                         # Epsilon production that starts a parent non-epsilon production
261                                         _need_before_epsilons.add(n)
262                                 else
263                                         # Epsilon production in the middle or that finishes a parent non-epsilon production
264                                         _need_after_epsilons.add(n)
265                                 end
266                         end
267                 end
268         end
269
270         init do end
271 end
272
273 # Each reduca action has its own class, this one is the root of the hierarchy.
274 private abstract class ReduceAction
275         fun action(p: Parser) is abstract
276 end
277
278 $ foreach {rules/rule}
279 private class ReduceAction@index
280 special ReduceAction
281         redef fun action(p: Parser)
282         do
283                                         var node_list: nullable Object = null
284 $   foreach {action}
285 $   choose
286 $     when {@cmd='POP'}
287                                         var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = p.pop
288 $     end
289 $     when {@cmd='FETCHLIST'}
290                                         var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@from,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
291                                         assert ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} isa Array[Object]
292 $     end
293 $     when {@cmd='FETCHNODE'}
294                                         var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@from,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
295                                         assert ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} isa nullable @etype
296 $     end
297 $     when {@cmd='ADDNODE'}
298                                         if ${translate(@node,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} != null then
299                                                 ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}.add(${translate(@node,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")})
300                                         end
301 $     end
302 $     when {@cmd='ADDLIST'}
303 #                                       if ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} != null then
304                                                 if ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}.is_empty then
305                                                         ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
306                                                 else
307                                                         ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}.append(${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")})
308                                                 end
309 #                                       end
310 $     end
311 $     when {@cmd='MAKELIST'}
312                                         var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = new Array[Object]
313 $     end
314 $     when {@cmd='MAKENODE'}
315                                         var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}: nullable @etype = new @etype.init_${translate(@etype,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}(
316 $       foreach {arg}
317 $           if @null
318                                                 null[-sep ','-]
319 $           else
320                                                 ${translate(.,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}[-sep ','-]
321 $           end
322 $       end foreach
323                                         )
324 $     end
325 $     when {@cmd='RETURNNODE'}
326 $       if @null
327                                         node_list = null
328 $       else
329                                         node_list = ${translate(@node,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
330 $       end
331 $     end
332 $     when {@cmd='RETURNLIST'}
333                                         node_list = ${translate(@list,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
334 $     end
335 $   end choose
336 $   end foreach
337                                         p.push(p.go_to(@leftside), node_list)
338         end
339 init do end
340 end
341 $ end foreach
342 $ end template
343
344 $ template make_parser_tables()
345 # Parser that build a full AST
346 abstract class ParserTable
347         var _action_table: Array[Array[Int]]
348         private fun build_action_table
349         do
350                 _action_table = once [
351 $ foreach {parser_data/action_table/row}
352                         action_table_row${position()}[-sep ','-]
353 $ end foreach
354                 ]
355         end
356
357 $ foreach {parser_data/action_table/row}
358         private fun action_table_row${position()}: Array[Int]
359         do
360                 return [
361 $   foreach {action}
362                                 @from, @action, @to[-sep ','-]
363 $   end foreach
364                         ]
365         end
366 $ end foreach
367
368         var _goto_table: Array[Array[Int]]
369         private fun build_goto_table
370         do
371                 _goto_table = once [
372 $ foreach {parser_data/goto_table/row}
373                         [
374 $   foreach {goto}
375                                 @from, @to[-sep ','-]
376 $   end foreach
377                         ][-sep ','-]
378 $ end foreach
379                 ]
380         end
381
382         init do end
383 end
384 $ end template