nitcc: error on duplicated alternative name
[nit.git] / contrib / nitcc / src / nitcc_semantic.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
17 #
18 # TODO: split in submodules
19 module nitcc_semantic
20
21 import nitcc_parser
22 import grammar
23 import re2nfa
24
25 # Main visitor for the semantic analysis
26 #
27 # TODO clean-up the visitors
28 class CollectNameVisitor
29 super Visitor
30
31 var nprods = new Array[Nprod]
32
33 # Symbol table to associate things (prods and exprs) with their name
34 var names = new HashMap[String, Node]
35
36 # The associated grammar object
37 # Read the result of the visit here
38 var gram = new Gram
39
40 # The associated NFA object
41 # Read the result of the visit here
42 var nfa = new Automaton.empty
43
44 # Run the semantic analysis of the grammar
45 fun start(n: Node)
46 do
47 # First visit to collect names
48 enter_visit(n)
49
50 # Second visit to use collectec names and build rhings
51 var v2 = new CheckNameVisitor(self)
52 v2.enter_visit(n)
53
54 # Inline all the ?
55 gram.inline(v2.quesizes.values)
56 # Inlile all the prods sufixed by '_imline' #TODO use a real keyword
57 for p in gram.prods do
58 if not p.name.has_suffix("_inline") then continue
59 print "inline {p}"
60 gram.inline([p])
61 end
62
63 # Build the NFA automaton
64 for t in gram.tokens do
65 var nexpr = t.nexpr
66 var nfa2
67 if nexpr == null then
68 nfa2 = new Automaton.epsilon
69 for c in t.text.as(not null) do
70 nfa2.concat(new Automaton.atom(c.ascii))
71 end
72 else
73 nfa2 = nexpr.build_nfa
74 end
75 nfa.states.add_all nfa2.states
76 nfa.start.add_trans(nfa2.start, null)
77 for s in nfa2.accept do nfa.add_tag(s, t)
78 nfa.accept.add_all nfa2.accept
79 end
80 end
81
82 redef fun visit(n) do n.accept_collect_prod(self)
83 end
84
85 private class CheckNameVisitor
86 super Visitor
87
88 # The collected altname, for the alternative
89 var altname: nullable String = null
90
91 # The collected elements, for the alternative
92 var elems = new Array[Element]
93
94 # The collected element names, for the alternative
95 var elems_names = new Array[nullable String]
96
97 # The collected elementname, for the nelem
98 var elemname: nullable String = null
99
100 # Is the alternative transformed, for the alternative
101 var trans = false
102
103 # Known rejected tokens
104 var rejecteds = new Array[Element]
105
106 # Pool of elements that are modified with + (reuse them!)
107 private var plusizes = new HashMap[Element, Production]
108
109 # Create a + version of an element
110 fun plusize(e: Element): Production
111 do
112 if plusizes.has_key(e) then return plusizes[e]
113 var name = "{e}+"
114 var prod = new Production(name)
115 prod.acname = "Nodes[{e.acname}]"
116 v1.gram.prods.add(prod)
117 prod.new_alt("{name}_one", e)
118 prod.new_alt("{name}_more", prod, e)
119 plusizes[e] = prod
120 return prod
121 end
122
123 # Pool of elements that are modified with ? (reuse them!)
124 private var quesizes = new HashMap[Element, Production]
125
126 # Create a ? version of an element
127 fun quesize(e: Element): Production
128 do
129 if quesizes.has_key(e) then return quesizes[e]
130 var name = "{e}?"
131 var prod = new Production(name)
132 prod.acname = "nullable {e.acname}"
133 v1.gram.prods.add(prod)
134 var a1 = prod.new_alt("{name}_one", e)
135 a1.codes = [new CodePop]
136 var a0 = prod.new_alt0("{name}_none")
137 a0.codes = [new CodeNull]
138 quesizes[e] = prod
139 return prod
140 end
141
142 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
143 var nexpr: nullable Nexpr
144
145 # The current production, used to initialize alternatives
146 var prod: nullable Production
147
148 # The main visitor, used to access the grammar of the symbol table
149 var v1: CollectNameVisitor
150 init(v1: CollectNameVisitor) do self.v1 = v1
151
152 redef fun visit(n) do n.accept_check_name_visitor(self)
153 end
154
155 redef class Node
156 private fun accept_collect_prod(v: CollectNameVisitor) do visit_children(v)
157 private fun accept_check_name_visitor(v: CheckNameVisitor) do visit_children(v)
158 end
159
160 redef class Nexpr
161 # The associated token
162 var token: nullable Token
163
164 # The associated name
165 var name: nullable String
166
167 # The required expression (Nexpr that are used inside)
168 var precs = new ArraySet[Nexpr]
169
170 redef fun accept_collect_prod(v) do
171 var id = children.first.as(Nid)
172 var name = id.text
173 if v.names.has_key(name) then
174 print "{id.position} Error {name} already defined."
175 exit(1)
176 end
177 v.names[name] = self
178 self.name = name
179 end
180 redef fun accept_check_name_visitor(v) do
181 v.nexpr = self
182 super
183 v.nexpr = null
184 end
185
186 # Flag to track circular regular expression
187 var is_building = false
188
189 # The associated NFA (cached, see `build_nfa`)
190 private var nfa: nullable Automaton
191
192 # Build the NFA, possibily building the NFA of required expressions
193 # Print an error if there is a circular dependency
194 # The result is cached
195 fun build_nfa: Automaton do
196 var res = nfa
197 if res != null then return res
198 if is_building then
199 print "Error: circular regular expression {name.to_s}."
200 exit(1)
201 abort
202 end
203 is_building = true
204 for p in precs do
205 p.build_nfa
206 end
207 is_building = false
208 var nre = self.children[2]
209 res = nre.make_rfa
210 nfa = res
211 return res
212 end
213 end
214
215 redef class Nre_id
216 # The named expression
217 var nexpr: nullable Nexpr
218
219 redef fun accept_check_name_visitor(v) do
220 var id = children.first.as(Nid)
221 var name = id.text
222 if not v.v1.names.has_key(name) then
223 print "{id.position} Error: unknown name {name}"
224 exit(1)
225 abort
226 end
227 var node = v.v1.names[name]
228 if node isa Nprod then
229 print "{id.position} Error: cannot use production {name} in a regular expression"
230 exit(1)
231 abort
232 else if not node isa Nexpr then
233 abort
234 end
235 nexpr = node
236 v.nexpr.precs.add(node)
237 end
238
239 redef fun make_rfa
240 do
241 return nexpr.nfa.dup
242 end
243 end
244
245 redef class Nign
246 # The named element
247 var elem: nullable Element
248
249 redef fun accept_check_name_visitor(v) do
250 var id = children[1].as(Nid)
251 var name = id.text
252 if not v.v1.names.has_key(name) then
253 print "{id.position} Error: unknown name {name}"
254 exit(1)
255 abort
256 end
257 var node = v.v1.names[name]
258 var elem: nullable Element
259 if node isa Nprod then
260 print "{id.position} Error: cannot ignore a production"
261 exit(1)
262 abort
263 else if node isa Nexpr then
264 elem = node.token
265 if elem == null then
266 elem = new Token("Ignored")
267 elem.nexpr = node
268 v.v1.gram.tokens.add(elem)
269 node.token = elem
270 end
271 else
272 abort
273 end
274 self.elem = elem
275 end
276 end
277
278 redef class Nrej
279 redef fun accept_check_name_visitor(v) do
280 # Add elements to the rejected list
281 v.elems = v.rejecteds
282 super
283 for e in v.elems do
284 if e isa Production then
285 print "Error: cannot reject {e}, it is a production"
286 exit(1)
287 abort
288 else if e isa Token then
289 # The token was build and registered during the visit
290 else
291 abort
292 end
293 end
294 end
295 end
296
297 redef class Nprod
298 # The associated production
299 var prod: nullable Production
300
301 redef fun accept_collect_prod(v) do
302 var id = children.first.as(Nid)
303 var name = id.text
304 if v.names.has_key(name) then
305 print "{id.position} Error {name} already defined."
306 exit(1)
307 end
308 v.names[name] = self
309 v.nprods.add(self)
310 prod = new Production(name)
311 v.gram.prods.add(prod.as(not null))
312 end
313 redef fun accept_check_name_visitor(v) do
314 v.prod = prod
315 super
316 v.prod = null
317 end
318 end
319
320 redef class Nptrans
321 redef fun accept_check_name_visitor(v) do
322 var id = children[2].as(Nid)
323 var name = id.text
324
325 var node = v.v1.names[name]
326 if node isa Nprod then
327 v.prod.spe = node.prod.as(not null)
328 if v.prod.spe.spe != null then
329 print "Cannot transform into {name}, {name} is already transformed."
330 exit(1)
331 abort
332 end
333 else if node isa Nexpr then
334 print "Cannot transform into {name}, {name} is a token."
335 else
336 abort
337 end
338 end
339 end
340
341 redef class Natrans
342 redef fun accept_check_name_visitor(v) do
343 var id = children[2].as(Nid)
344 var name = id.text
345
346 v.trans = true
347 end
348 end
349
350 redef class Alternative
351 var short_name: nullable String
352 end
353
354 redef class Nalt
355 # The associated alternative
356 var alt: nullable Alternative
357
358 redef fun accept_check_name_visitor(v)
359 do
360 v.altname = null
361 v.trans = false
362 v.elems = new Array[Element]
363 v.elems_names = new Array[nullable String]
364 super
365 var prod = v.prod.as(not null)
366 var prodabs = prod.spe
367 if prodabs == null then prodabs = prod
368 var name = v.altname
369 if name == null then
370 if v.trans then
371 name = prod.name + "_" + prod.alts.length.to_s
372 else if prod.spe == null and prod.alts.is_empty then
373 name = prod.name
374 prod.altone = true
375 else
376 if prodabs.altone then
377 prodabs.altone = false
378 prodabs.alts.first.name = prodabs.name + "_0"
379 end
380 name = prod.name + "_" + prod.alts.length.to_s
381 end
382 else
383 if prodabs.altone then
384 prodabs.altone = false
385 prodabs.alts.first.name = prodabs.name + "_0"
386 end
387 name = prodabs.name + "_" + name
388 end
389 var alt = prod.new_alt2(name, v.elems)
390 alt.short_name = v.altname
391 alt.elems_names.add_all(v.elems_names)
392 self.alt = alt
393 if v.trans then
394 alt.trans = true
395 alt.codes = [new CodePop]
396 end
397 end
398 end
399
400 redef class Naltid
401 redef fun accept_check_name_visitor(v)
402 do
403 var id = children[1].as(Nid)
404 var name = id.text
405 v.altname = name
406 var prod = v.prod.as(not null)
407 var prodabs = prod.spe
408 if prodabs == null then prodabs = prod
409 for x in prodabs.alts do
410 if x.short_name == name then
411 print "{id.position} Error: already an alternative named {name}"
412 exit(1)
413 end
414 end
415 end
416 end
417
418 redef class Nelem
419 # The associated element
420 var elem: nullable Element
421
422 # Set the element and check things
423 fun set_elem(v: CheckNameVisitor, pos: nullable Position, elem: Element)
424 do
425 assert self.elem == null
426 self.elem = elem
427 if elem isa Token and v.rejecteds.has(elem) then
428 if pos != null then
429 print "{pos} Error: {elem.name} is already a rejected token."
430 else
431 print "Error: {elem.name} is already a rejected token."
432 end
433 exit(1)
434 end
435 v.elems.push(elem)
436 end
437 end
438
439 redef class Token
440 # The associated expression (if any, ie defined in the lexer part)
441 var nexpr: nullable Nexpr
442 # The associated text (if any, ie defined in the parser part)
443 var text: nullable String
444 end
445
446 redef class Nnelem
447 redef fun accept_check_name_visitor(v)
448 do
449 v.elemname = null
450 super
451 var elemname = v.elemname
452 if elemname != null and v.elems_names.has(elemname) then
453 var i = 2
454 while v.elems_names.has(elemname+i.to_s) do i += 1
455 elemname = elemname+i.to_s
456 end
457 v.elems_names.add elemname
458 end
459 end
460
461 redef class Nelemid
462 redef fun accept_check_name_visitor(v)
463 do
464 var id = children[1].as(Nid)
465 var name = id.text
466 for n2 in v.elems_names do
467 if name == n2 then
468 print "{id.position} Error: already an element named {name}."
469 exit(1)
470 end
471 end
472 v.elemname = name
473 end
474 end
475
476 redef class Nelem_id
477 redef fun accept_check_name_visitor(v) do
478 var id = children.first.as(Nid)
479 var name = id.text
480 if not v.v1.names.has_key(name) then
481 print "{id.position} Error: unknown name {name}"
482 exit(1)
483 abort
484 end
485 var node = v.v1.names[name]
486 var elem: nullable Element
487 if node isa Nprod then
488 elem = node.prod
489 else if node isa Nexpr then
490 elem = node.token
491 if elem == null then
492 elem = new Token(name)
493 elem.nexpr = node
494 v.v1.gram.tokens.add(elem)
495 node.token = elem
496 end
497 else
498 abort
499 end
500 assert elem != null
501 set_elem(v, id.position, elem)
502 if v.elemname == null then v.elemname = name
503 end
504 end
505
506 redef class Nelem_str
507 redef fun accept_check_name_visitor(v) do
508 var str = children.first.as(Nstr)
509 var text = str.value
510 var name = str.text
511 var elem: nullable Element
512 if v.v1.names.has_key(name) then
513 elem = v.v1.names[name].as(Nelem_str).elem
514 assert elem != null
515 else
516 elem = new Token(name)
517 elem.text = text
518 v.v1.gram.tokens.add(elem)
519 v.v1.names[name] = self
520 end
521 set_elem(v, str.position, elem)
522 end
523 end
524
525 redef class Nelem_star
526 redef fun accept_check_name_visitor(v) do
527 super
528 var elem = v.elems.pop
529 elem = v.plusize(elem)
530 elem = v.quesize(elem)
531 set_elem(v, null, elem)
532 end
533 end
534
535 redef class Nelem_ques
536 redef fun accept_check_name_visitor(v) do
537 super
538 var elem = v.elems.pop
539 elem = v.quesize(elem)
540 set_elem(v, null, elem)
541 end
542 end
543
544 redef class Nelem_plus
545 redef fun accept_check_name_visitor(v) do
546 super
547 var elem = v.elems.pop
548 elem = v.plusize(elem)
549 set_elem(v, null, elem)
550 end
551 end