nitcc: add Rejected tokens
[nit.git] / contrib / nitcc / src / nitcc_semantic.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
17 #
18 # TODO: split in submodules
19 module nitcc_semantic
20
21 import nitcc_parser
22 import grammar
23 import re2nfa
24
25 # Main visitor for the semantic analysis
26 #
27 # TODO clean-up the visitors
28 class CollectNameVisitor
29 super Visitor
30
31 var nprods = new Array[Nprod]
32
33 # Symbol table to associate things (prods and exprs) with their name
34 var names = new HashMap[String, Node]
35
36 # The associated grammar object
37 # Read the result of the visit here
38 var gram = new Gram
39
40 # The associated NFA object
41 # Read the result of the visit here
42 var nfa = new Automaton.empty
43
44 # Run the semantic analysis of the grammar
45 fun start(n: Node)
46 do
47 # First visit to collect names
48 enter_visit(n)
49
50 # Second visit to use collectec names and build rhings
51 var v2 = new CheckNameVisitor(self)
52 v2.enter_visit(n)
53
54 # Inline all the ?
55 gram.inline(v2.quesizes.values)
56 # Inlile all the prods sufixed by '_imline' #TODO use a real keyword
57 for p in gram.prods do
58 if not p.name.has_suffix("_inline") then continue
59 print "inline {p}"
60 gram.inline([p])
61 end
62
63 # Build the NFA automaton
64 for t in gram.tokens do
65 var nexpr = t.nexpr
66 var nfa2
67 if nexpr == null then
68 nfa2 = new Automaton.epsilon
69 for c in t.text.as(not null) do
70 nfa2.concat(new Automaton.atom(c.ascii))
71 end
72 else
73 nfa2 = nexpr.build_nfa
74 end
75 nfa.states.add_all nfa2.states
76 nfa.start.add_trans(nfa2.start, null)
77 for s in nfa2.accept do nfa.add_tag(s, t)
78 nfa.accept.add_all nfa2.accept
79 end
80 end
81
82 redef fun visit(n) do n.accept_collect_prod(self)
83 end
84
85 private class CheckNameVisitor
86 super Visitor
87
88 # The collected altname, for the alternative
89 var altname: nullable String = null
90
91 # The collected elements, for the alternative
92 var elems = new Array[Element]
93
94 # The collected element names, for the alternative
95 var elems_names = new Array[nullable String]
96
97 # The collected elementname, for the nelem
98 var elemname: nullable String = null
99
100 # Is the alternative transformed, for the alternative
101 var trans = false
102
103 # Known rejected tokens
104 var rejecteds = new Array[Token]
105
106 # Pool of elements that are modified with + (reuse them!)
107 private var plusizes = new HashMap[Element, Production]
108
109 # Create a + version of an element
110 fun plusize(e: Element): Production
111 do
112 if plusizes.has_key(e) then return plusizes[e]
113 var name = "{e}+"
114 var prod = new Production(name)
115 prod.acname = "Nodes[{e.acname}]"
116 v1.gram.prods.add(prod)
117 prod.new_alt("{name}_one", e)
118 prod.new_alt("{name}_more", prod, e)
119 plusizes[e] = prod
120 return prod
121 end
122
123 # Pool of elements that are modified with ? (reuse them!)
124 private var quesizes = new HashMap[Element, Production]
125
126 # Create a ? version of an element
127 fun quesize(e: Element): Production
128 do
129 if quesizes.has_key(e) then return quesizes[e]
130 var name = "{e}?"
131 var prod = new Production(name)
132 prod.acname = "nullable {e.acname}"
133 v1.gram.prods.add(prod)
134 var a1 = prod.new_alt("{name}_one", e)
135 a1.codes = [new CodePop]
136 var a0 = prod.new_alt0("{name}_none")
137 a0.codes = [new CodeNull]
138 quesizes[e] = prod
139 return prod
140 end
141
142 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
143 var nexpr: nullable Nexpr
144
145 # The current production, used to initialize alternatives
146 var prod: nullable Production
147
148 # The main visitor, used to access the grammar of the symbol table
149 var v1: CollectNameVisitor
150 init(v1: CollectNameVisitor) do self.v1 = v1
151
152 redef fun visit(n) do n.accept_check_name_visitor(self)
153 end
154
155 redef class Node
156 private fun accept_collect_prod(v: CollectNameVisitor) do visit_children(v)
157 private fun accept_check_name_visitor(v: CheckNameVisitor) do visit_children(v)
158 end
159
160 redef class Nexpr
161 # The associated token
162 var token: nullable Token
163
164 # The associated name
165 var name: nullable String
166
167 # The required expression (Nexpr that are used inside)
168 var precs = new ArraySet[Nexpr]
169
170 redef fun accept_collect_prod(v) do
171 var id = children.first.as(Nid)
172 var name = id.text
173 v.names[name] = self
174 self.name = name
175 end
176 redef fun accept_check_name_visitor(v) do
177 v.nexpr = self
178 super
179 v.nexpr = null
180 end
181
182 # Flag to track circular regular expression
183 var is_building = false
184
185 # The associated NFA (cached, see `build_nfa`)
186 private var nfa: nullable Automaton
187
188 # Build the NFA, possibily building the NFA of required expressions
189 # Print an error if there is a circular dependency
190 # The result is cached
191 fun build_nfa: Automaton do
192 var res = nfa
193 if res != null then return res
194 if is_building then
195 print "Error: circular regular expression {name.to_s}."
196 exit(1)
197 abort
198 end
199 is_building = true
200 for p in precs do
201 p.build_nfa
202 end
203 is_building = false
204 var nre = self.children[2]
205 res = nre.make_rfa
206 nfa = res
207 return res
208 end
209 end
210
211 redef class Nre_id
212 # The named expression
213 var nexpr: nullable Nexpr
214
215 redef fun accept_check_name_visitor(v) do
216 var id = children.first.as(Nid)
217 var name = id.text
218 if not v.v1.names.has_key(name) then
219 print "{id.position} Error: unknown name {name}"
220 exit(1)
221 abort
222 end
223 var node = v.v1.names[name]
224 if node isa Nprod then
225 print "{id.position} Error: cannot use production {name} in a regular expression"
226 exit(1)
227 abort
228 else if not node isa Nexpr then
229 abort
230 end
231 nexpr = node
232 v.nexpr.precs.add(node)
233 end
234
235 redef fun make_rfa
236 do
237 return nexpr.nfa.dup
238 end
239 end
240
241 redef class Nign
242 # The named element
243 var elem: nullable Element
244
245 redef fun accept_check_name_visitor(v) do
246 var id = children[1].as(Nid)
247 var name = id.text
248 if not v.v1.names.has_key(name) then
249 print "{id.position} Error: unknown name {name}"
250 exit(1)
251 abort
252 end
253 var node = v.v1.names[name]
254 var elem: nullable Element
255 if node isa Nprod then
256 print "{id.position} Error: cannot ignore a production"
257 exit(1)
258 abort
259 else if node isa Nexpr then
260 elem = node.token
261 if elem == null then
262 elem = new Token("Ignored")
263 elem.nexpr = node
264 v.v1.gram.tokens.add(elem)
265 node.token = elem
266 end
267 else
268 abort
269 end
270 self.elem = elem
271 end
272 end
273
274 redef class Nrej
275 redef fun accept_check_name_visitor(v) do
276 v.elems = new Array[Element]
277 super
278 for e in v.elems do
279 if e isa Production then
280 print "Error: cannot reject {e}, it is a production"
281 exit(1)
282 abort
283 else if e isa Token then
284 # The token was build and registered during the visit
285 # Just add it to the rejected list
286 v.rejecteds.add(e)
287 else
288 abort
289 end
290 end
291 end
292 end
293
294 redef class Nprod
295 # The associated production
296 var prod: nullable Production
297
298 redef fun accept_collect_prod(v) do
299 var id = children.first.as(Nid)
300 var name = id.text
301 v.names[name] = self
302 v.nprods.add(self)
303 prod = new Production(name)
304 v.gram.prods.add(prod.as(not null))
305 end
306 redef fun accept_check_name_visitor(v) do
307 v.prod = prod
308 super
309 v.prod = null
310 end
311 end
312
313 redef class Nptrans
314 redef fun accept_check_name_visitor(v) do
315 var id = children[2].as(Nid)
316 var name = id.text
317
318 var node = v.v1.names[name]
319 if node isa Nprod then
320 v.prod.spe = node.prod.as(not null)
321 if v.prod.spe.spe != null then
322 print "Cannot transform into {name}, {name} is already transformed."
323 exit(1)
324 abort
325 end
326 else if node isa Nexpr then
327 print "Cannot transform into {name}, {name} is a token."
328 else
329 abort
330 end
331 end
332 end
333
334 redef class Natrans
335 redef fun accept_check_name_visitor(v) do
336 var id = children[2].as(Nid)
337 var name = id.text
338
339 v.trans = true
340 end
341 end
342
343 redef class Nalt
344 # The associated alternative
345 var alt: nullable Alternative
346
347 redef fun accept_check_name_visitor(v)
348 do
349 v.altname = null
350 v.trans = false
351 v.elems = new Array[Element]
352 v.elems_names = new Array[nullable String]
353 super
354 var prod = v.prod.as(not null)
355 var prodabs = prod.spe
356 if prodabs == null then prodabs = prod
357 var name = v.altname
358 if name == null then
359 if v.trans then
360 name = prod.name + "_" + prod.alts.length.to_s
361 else if prod.spe == null and prod.alts.is_empty then
362 name = prod.name
363 prod.altone = true
364 else
365 if prodabs.altone then
366 prodabs.altone = false
367 prodabs.alts.first.name = prodabs.name + "_0"
368 end
369 name = prod.name + "_" + prod.alts.length.to_s
370 end
371 else
372 if prodabs.altone then
373 prodabs.altone = false
374 prodabs.alts.first.name = prodabs.name + "_0"
375 end
376 name = prodabs.name + "_" + name
377 end
378 var alt = prod.new_alt2(name, v.elems)
379 alt.elems_names.add_all(v.elems_names)
380 self.alt = alt
381 if v.trans then
382 alt.trans = true
383 alt.codes = [new CodePop]
384 end
385 end
386 end
387
388 redef class Naltid
389 redef fun accept_check_name_visitor(v)
390 do
391 var id = children[1].as(Nid)
392 var name = id.text
393 v.altname = name
394 end
395 end
396
397 redef class Nelem
398 # The associated element
399 var elem: nullable Element
400
401 # Set the element and check things
402 fun set_elem(v: CheckNameVisitor, pos: nullable Position, elem: Element)
403 do
404 assert self.elem == null
405 self.elem = elem
406 v.elems.push(elem)
407 if elem isa Token and v.rejecteds.has(elem) then
408 if pos != null then
409 print "{pos} Error: {elem.name} is already a rejected token."
410 else
411 print "Error: {elem.name} is already a rejected token."
412 end
413 exit(1)
414 end
415
416 end
417
418 end
419
420 redef class Token
421 # The associated expression (if any, ie defined in the lexer part)
422 var nexpr: nullable Nexpr
423 # The associated text (if any, ie defined in the parser part)
424 var text: nullable String
425 end
426
427 redef class Nnelem
428 redef fun accept_check_name_visitor(v)
429 do
430 v.elemname = null
431 super
432 var elemname = v.elemname
433 if elemname != null and v.elems_names.has(elemname) then
434 var i = 2
435 while v.elems_names.has(elemname+i.to_s) do i += 1
436 elemname = elemname+i.to_s
437 end
438 v.elems_names.add elemname
439 end
440 end
441
442 redef class Nelemid
443 redef fun accept_check_name_visitor(v)
444 do
445 var id = children[1].as(Nid)
446 var name = id.text
447 v.elemname = name
448 end
449 end
450
451 redef class Nelem_id
452 redef fun accept_check_name_visitor(v) do
453 var id = children.first.as(Nid)
454 var name = id.text
455 if not v.v1.names.has_key(name) then
456 print "{id.position} Error: unknown name {name}"
457 exit(1)
458 abort
459 end
460 var node = v.v1.names[name]
461 var elem: nullable Element
462 if node isa Nprod then
463 elem = node.prod
464 else if node isa Nexpr then
465 elem = node.token
466 if elem == null then
467 elem = new Token(name)
468 elem.nexpr = node
469 v.v1.gram.tokens.add(elem)
470 node.token = elem
471 end
472 else
473 abort
474 end
475 assert elem != null
476 set_elem(v, id.position, elem)
477 if v.elemname == null then v.elemname = name
478 end
479 end
480
481 redef class Nelem_str
482 redef fun accept_check_name_visitor(v) do
483 var str = children.first.as(Nstr)
484 var text = str.value
485 var name = str.text
486 var elem: nullable Element
487 if v.v1.names.has_key(name) then
488 elem = v.v1.names[name].as(Nelem_str).elem
489 assert elem != null
490 else
491 elem = new Token(name)
492 elem.text = text
493 v.v1.gram.tokens.add(elem)
494 v.v1.names[name] = self
495 end
496 set_elem(v, str.position, elem)
497 end
498 end
499
500 redef class Nelem_star
501 redef fun accept_check_name_visitor(v) do
502 super
503 var elem = v.elems.pop
504 elem = v.plusize(elem)
505 elem = v.quesize(elem)
506 set_elem(v, null, elem)
507 end
508 end
509
510 redef class Nelem_ques
511 redef fun accept_check_name_visitor(v) do
512 super
513 var elem = v.elems.pop
514 elem = v.quesize(elem)
515 set_elem(v, null, elem)
516 end
517 end
518
519 redef class Nelem_plus
520 redef fun accept_check_name_visitor(v) do
521 super
522 var elem = v.elems.pop
523 elem = v.plusize(elem)
524 set_elem(v, null, elem)
525 end
526 end