a46156397c9bb1ecef1f62390bf0c35ff0899258
[nit.git] / src / parser / xss / parser.xss
1 $ // This file is part of NIT ( http://www.nitlanguage.org ).
2 $ //
3 $ // Copyright 2008 Jean Privat <jean@pryen.org>
4 $ // Based on algorithms developped for ( http://www.sablecc.org/ ).
5 $ //
6 $ // Licensed under the Apache License, Version 2.0 (the "License");
7 $ // you may not use this file except in compliance with the License.
8 $ // You may obtain a copy of the License at
9 $ //
10 $ //     http://www.apache.org/licenses/LICENSE-2.0
11 $ //
12 $ // Unless required by applicable law or agreed to in writing, software
13 $ // distributed under the License is distributed on an "AS IS" BASIS,
14 $ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 $ // See the License for the specific language governing permissions and
16 $ // limitations under the License.
17
18 $ template make_parser()
19
20 # State of the parser automata as stored in the parser stack.
21 private class State
22         # The internal state number
23         readable writable var _state: Int
24
25         # The node stored with the state in the stack
26         readable writable var _nodes: nullable Object
27
28         init(state: Int, nodes: nullable Object)
29         do
30                 _state = state
31                 _nodes = nodes
32         end
33 end
34
35 class Parser
36         super TablesCapable
37         # Associated lexer
38         var _lexer: Lexer
39
40         # Stack of pushed states and productions
41         var _stack: Array[State]
42
43         # Position in the stack
44         var _stack_pos: Int
45
46         # Create a new parser based on a given lexer
47         init(lexer: Lexer)
48         do
49                 _lexer = lexer
50                 _stack = new Array[State]
51                 _stack_pos = -1
52                 build_reduce_table
53         end
54
55         # Do a transition in the automata
56         private fun go_to(index: Int): Int
57         do
58                 var state = state
59                 var low = 1
60                 var high = parser_goto(index, 0) - 1
61
62                 while low <= high do
63                         var middle = (low + high) / 2
64                         var subindex = middle * 2 + 1 # +1 because parser_goto(index, 0) is the length
65
66                         var goal = parser_goto(index, subindex)
67                         if state < goal then
68                                 high = middle - 1
69                         else if state > goal then
70                                 low = middle + 1
71                         else
72                                 return parser_goto(index, subindex+1)
73                         end
74                 end
75
76                 return parser_goto(index, 2) # Default value
77         end
78
79         # Push someting in the state stack
80         private fun push(numstate: Int, list_node: nullable Object)
81         do
82                 var pos = _stack_pos + 1
83                 _stack_pos = pos
84                 if pos < _stack.length then
85                         var state = _stack[pos]
86                         state.state = numstate
87                         state.nodes = list_node
88                 else
89                         _stack.push(new State(numstate, list_node))
90                 end
91         end
92
93         # The current state
94         private fun state: Int
95         do
96                 return _stack[_stack_pos].state
97         end
98
99         # Pop something from the stack state
100         private fun pop: nullable Object
101         do
102                 var res = _stack[_stack_pos].nodes
103                 _stack_pos = _stack_pos -1
104                 return res
105         end
106
107         # Build and return a full AST.
108         fun parse: Start
109         do
110                 push(0, null)
111
112                 var lexer = _lexer
113                 loop
114                         var token = lexer.peek
115                         if token isa PError then
116                                 return new Start(null, token)
117                         end
118
119                         var state = self.state
120                         var index = token.parser_index
121                         var action_type = parser_action(state, 2)
122                         var action_value = parser_action(state, 3)
123
124                         var low = 1
125                         var high = parser_action(state, 0) - 1
126
127                         while low <= high do
128                                 var middle = (low + high) / 2
129                                 var subindex = middle * 3 + 1 # +1 because parser_action(state, 0) is the length
130
131                                 var goal = parser_action(state, subindex)
132                                 if index < goal then
133                                         high = middle - 1
134                                 else if index > goal then
135                                         low = middle + 1
136                                 else
137                                         action_type = parser_action(state, subindex+1)
138                                         action_value = parser_action(state, subindex+2)
139                                         break
140                                 end
141                         end
142
143                         if action_type == 0 then # SHIFT
144                                 push(action_value, lexer.next)
145                         else if action_type == 1 then # REDUCE
146                                 _reduce_table[action_value].action(self)
147                         else if action_type == 2 then # ACCEPT
148                                 var node2 = lexer.next
149                                 assert node2 isa EOF
150                                 var node1 = pop
151                                 assert node1 isa ${/parser/prods/prod/@ename}
152                                 var node = new Start(node1, node2)
153                                 (new ComputeProdLocationVisitor).enter_visit(node)
154                                 return node
155                         else if action_type == 3 then # ERROR
156                                 var node2 = new PParserError.init_parser_error("Syntax error: unexpected {token}.", token.location, token)
157                                 var node = new Start(null, node2)
158                                 return node
159                         end
160                 end
161         end
162
163         var _reduce_table: Array[ReduceAction]
164         private fun build_reduce_table
165         do
166                 _reduce_table = new Array[ReduceAction].with_items(
167 $ foreach {rules/rule}
168                         new ReduceAction@index(@leftside)[-sep ','-]
169 $ end foreach
170                 )
171         end
172 end
173
174 redef class Prod
175         # Location on the first token after the start of a production
176         # So outside the production for epilon production
177         var _first_location: nullable Location
178
179         # Location of the last token before the end of a production
180         # So outside the production for epilon production
181         var _last_location: nullable Location
182 end
183
184 # Find location of production nodes
185 # Uses existing token locations to infer location of productions.
186 private class ComputeProdLocationVisitor
187         super Visitor
188         # Currenlty visited productions that need a first token
189         var _need_first_prods: Array[Prod] = new Array[Prod]
190
191         # Already visited epsilon productions that waits something after them
192         var _need_after_epsilons: Array[Prod] = new Array[Prod]
193
194         # Already visited epsilon production that waits something before them
195         var _need_before_epsilons: Array[Prod] = new Array[Prod]
196
197         # Location of the last visited token in the current production
198         var _last_location: nullable Location = null
199
200         redef fun visit(n: nullable PNode)
201         do
202                 if n == null then
203                         return
204                 else if n isa Token then
205                         var loc = n.location
206                         _last_location = loc
207
208                         # Add a first token to productions that need one
209                         if not _need_first_prods.is_empty then
210                                 for no in _need_first_prods do
211                                         no._first_location = loc
212                                 end
213                                 _need_first_prods.clear
214                         end
215
216                         # Find location for already visited epsilon production that need one
217                         if not _need_after_epsilons.is_empty then
218                                 for no in _need_after_epsilons do
219                                         # Epsilon production that is in the middle of a non-epsilon production
220                                         # The epsilon production has both a token before and after it
221                                         var endl = loc
222                                         var startl = no._last_location
223                                         no.location = new Location(endl.file, startl.line_end, endl.line_start, startl.column_end, endl.column_start)
224                                 end
225                                 _need_after_epsilons.clear
226                         end
227                 else
228                         assert n isa Prod
229                         _need_first_prods.add(n)
230
231                         var old_last = _last_location
232                         _last_location = null
233                         n.visit_all(self)
234                         var endl = _last_location
235                         if endl == null then _last_location = old_last
236
237                         n._last_location = endl
238                         var startl = n._first_location
239                         if startl != null then
240                                 # Non-epsilon production
241                                 assert endl != null
242
243                                 n.location = new Location(startl.file, startl.line_start, endl.line_end, startl.column_start, endl.column_end)
244
245                                 if not _need_before_epsilons.is_empty then
246                                         var loc = new Location(startl.file, startl.line_start, startl.line_start, startl.column_start, startl.column_start)
247                                         for no in _need_before_epsilons do
248                                                 # Epsilon production that starts the current non-epsilon production
249                                                 no.location = loc
250                                         end
251                                         _need_before_epsilons.clear
252                                 end
253
254                                 if not _need_after_epsilons.is_empty then
255                                         var loc = new Location(endl.file, endl.line_end, endl.line_end, endl.column_end, endl.column_end)
256                                         for no in _need_after_epsilons do
257                                                 # Epsilon production that finishes the current non-epsilon production
258                                                 no.location = loc
259                                         end
260                                         _need_after_epsilons.clear
261                                 end
262                         else
263                                 # No first token means epsilon production (or "throw all my tokens" production)
264                                 # So, it must be located it later
265                                 if endl == null then
266                                         # Epsilon production that starts a parent non-epsilon production
267                                         _need_before_epsilons.add(n)
268                                 else
269                                         # Epsilon production in the middle or that finishes a parent non-epsilon production
270                                         _need_after_epsilons.add(n)
271                                 end
272                         end
273                 end
274         end
275
276         init do end
277 end
278
279 # Each reduca action has its own class, this one is the root of the hierarchy.
280 private abstract class ReduceAction
281         fun action(p: Parser) is abstract
282         fun concat(l1, l2 : Array[Object]): Array[Object]
283         do
284                 if l1.is_empty then return l2
285                 l1.append(l2)
286                 return l1
287         end
288         var _goto: Int
289         init(g: Int) do _goto = g
290 end
291
292 $ foreach {rules/rule}
293 private class ReduceAction@index
294         super ReduceAction
295         redef fun action(p: Parser)
296         do
297                                         var node_list: nullable Object = null
298 $   foreach {action}
299 $   choose
300 $     when {@cmd='POP'}
301                                         var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = p.pop
302 $     end
303 $     when {@cmd='FETCHLIST'}
304                                         var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@from,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
305                                         assert ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} isa Array[Object]
306 $     end
307 $     when {@cmd='FETCHNODE'}
308                                         var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = ${translate(@from,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
309                                         assert ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} isa nullable @etype
310 $     end
311 $     when {@cmd='ADDNODE'}
312                                         if ${translate(@node,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} != null then
313                                                 ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}.add(${translate(@node,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")})
314                                         end
315 $     end
316 $     when {@cmd='ADDLIST'}
317                                         ${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = concat(${translate(@tolist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}, ${translate(@fromlist,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")})
318 $     end
319 $     when {@cmd='MAKELIST'}
320                                         var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")} = new Array[Object]
321 $     end
322 $     when {@cmd='MAKENODE'}
323 $      if {count(arg)!=0}
324                                         var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}: nullable @etype = new @etype.init_${translate(@etype,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}(
325 $       foreach {arg}
326 $           if @null
327                                                 null[-sep ','-]
328 $           else
329                                                 ${translate(.,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}[-sep ','-]
330 $           end
331 $       end foreach
332                                         )
333 $      else
334                                         var ${translate(@result,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}: nullable @etype = new @etype.init_${translate(@etype,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
335 $      end
336 $     end
337 $     when {@cmd='RETURNNODE'}
338 $       if @null
339                                         node_list = null
340 $       else
341                                         node_list = ${translate(@node,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
342 $       end
343 $     end
344 $     when {@cmd='RETURNLIST'}
345                                         node_list = ${translate(@list,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
346 $     end
347 $   end choose
348 $   end foreach
349                                         p.push(p.go_to(_goto), node_list)
350         end
351 end
352 $ end foreach
353 $ end template
354
355 $ template make_parser_table()
356 $ foreach {parser_data/action_table/row}
357 static int parser_action_row${position()}[] = {
358         ${count(action)},
359 $   foreach {action}
360         @from, @action, @to[-sep ','-]
361 $   end foreach
362 };
363 $ end foreach
364
365 const int* const parser_action_table[] = {
366 $ foreach {parser_data/action_table/row}
367         parser_action_row${position()}[-sep ','-]
368 $ end foreach
369 };
370
371 $ foreach {parser_data/goto_table/row}
372 static int parser_goto_row${position()}[] = {
373         ${count(goto)},
374 $   foreach {goto}
375         @from, @to[-sep ','-]
376 $   end foreach
377 };
378 $ end foreach
379
380 const int* const parser_goto_table[] = {
381 $ foreach {parser_data/goto_table/row}
382         parser_goto_row${position()}[-sep ','-]
383 $ end foreach
384 };
385 $ end template