af417d48b9c5a28e6f42c4ddd18d6a3b7236a967
[nit.git] / contrib / nitcc / src / nitcc_semantic.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
17 #
18 # TODO: split in submodules
19 module nitcc_semantic
20
21 import nitcc_parser
22 import grammar
23 import re2nfa
24
25 # Main visitor for the semantic analysis
26 #
27 # TODO clean-up the visitors
28 class CollectNameVisitor
29 super Visitor
30
31 # All the productions
32 var nprods = new Array[Nprod]
33
34 # Symbol table to associate things (prods and exprs) with their name
35 var names = new HashMap[String, Node]
36
37 # The associated grammar object
38 # Read the result of the visit here
39 var gram = new Gram
40
41 # The associated NFA object
42 # Read the result of the visit here
43 var nfa = new Automaton.empty
44
45 # The current production, used to initialize priorities
46 var prod: nullable Production = null
47
48 # The current priority counter to name transformed productions
49 var pricpt: Int = 0
50
51 # Run the semantic analysis of the grammar
52 fun start(n: Node)
53 do
54 # First visit to collect names
55 enter_visit(n)
56
57 # Second visit to use collected names and build things
58 var v2 = new CheckNameVisitor(self)
59 v2.enter_visit(n)
60
61 # Inline all the `?`
62 gram.inline(v2.quesizes.values)
63 # Inline all the prods suffixed by '_inline' #TODO use a real keyword
64 for p in gram.prods do
65 if not p.name.has_suffix("_inline") then continue
66 print "inline {p}"
67 gram.inline([p])
68 end
69
70 # Build the NFA automaton
71 for t in gram.tokens do
72 var nfa2 = t.build_nfa
73 nfa2.tag_accept(t)
74 nfa.absorb(nfa2)
75 end
76
77 if not v2.ignoreds.is_empty then
78 var ign = new Token("Ignored")
79 var nfa2 = new Automaton.empty
80 for t in v2.ignoreds do
81 assert t isa Token
82 var nfa3 = t.build_nfa
83 nfa2.alternate(nfa3)
84 end
85 nfa2.tag_accept(ign)
86 nfa.absorb(nfa2)
87 end
88
89 end
90
91 redef fun visit(n) do n.accept_collect_prod(self)
92 end
93
94 private class CheckNameVisitor
95 super Visitor
96
97 # The collected altname, for the alternative
98 var altname: nullable String = null
99
100 # The collected elements, for the alternative
101 var elems = new Array[Element]
102
103 # The collected element names, for the alternative
104 var elems_names = new Array[nullable String]
105
106 # The collected element names, for the nelem
107 var elemname: nullable String = null
108
109 # Is the alternative transformed, for the alternative
110 var trans = false
111
112 # The current priority class
113 # Used the check, and transform the grammar
114 var pri: nullable Npriority = null
115
116 # Known ignored tokens
117 var ignoreds = new Array[Element]
118
119 # Known rejected tokens
120 var rejecteds = new Array[Element]
121
122 # Pool of elements that are modified with + (reuse them!)
123 var plusizes = new HashMap[Element, Production]
124
125 # Create a + version of an element
126 fun plusize(e: Element): Production
127 do
128 if plusizes.has_key(e) then return plusizes[e]
129 var name = "{e}+"
130 var prod = new Production(name)
131 prod.acname = "Nodes[{e.acname}]"
132 v1.gram.prods.add(prod)
133 prod.new_alt("{name}_one", e)
134 prod.new_alt("{name}_more", prod, e)
135 plusizes[e] = prod
136 return prod
137 end
138
139 # Pool of elements that are modified with ? (reuse them!)
140 var quesizes = new HashMap[Element, Production]
141
142 # Create a ? version of an element
143 fun quesize(e: Element): Production
144 do
145 if quesizes.has_key(e) then return quesizes[e]
146 var name = "{e}?"
147 var prod = new Production(name)
148 prod.acname = "nullable {e.acname}"
149 v1.gram.prods.add(prod)
150 var a1 = prod.new_alt("{name}_one", e)
151 a1.codes = [new CodePop]
152 var a0 = prod.new_alt0("{name}_none")
153 a0.codes = [new CodeNull]
154 quesizes[e] = prod
155 return prod
156 end
157
158 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
159 var nexpr: nullable Nexpr = null
160
161 # The current production, used to initialize alternatives
162 var prod: nullable Production = null
163
164 # The main visitor, used to access the grammar of the symbol table
165 var v1: CollectNameVisitor
166
167 redef fun visit(n) do n.accept_check_name_visitor(self)
168 end
169
170 redef class Node
171 private fun accept_collect_prod(v: CollectNameVisitor) do visit_children(v)
172 private fun accept_check_name_visitor(v: CheckNameVisitor) do visit_children(v)
173 end
174
175 redef class Nexpr
176 # The associated token
177 var token: nullable Token
178
179 # The associated name
180 var name: nullable String
181
182 # The required expression (Nexpr that are used inside)
183 var precs = new ArraySet[Nexpr]
184
185 redef fun accept_collect_prod(v) do
186 var id = children.first.as(Nid)
187 var name = id.text
188 if v.names.has_key(name) then
189 print "{id.position.to_s} Error {name} already defined."
190 exit(1)
191 end
192 v.names[name] = self
193 self.name = name
194 end
195 redef fun accept_check_name_visitor(v) do
196 v.nexpr = self
197 super
198 v.nexpr = null
199 end
200
201 # Flag to track circular regular expression
202 var is_building = false
203
204 # The associated NFA (cached, see `build_nfa`)
205 private var nfa: nullable Automaton
206
207 # Build the NFA, possibly building the NFA of required expressions
208 # Print an error if there is a circular dependency
209 # The result is cached
210 fun build_nfa: Automaton do
211 var res = nfa
212 if res != null then return res
213 if is_building then
214 print "Error: circular regular expression {name.to_s}."
215 exit(1)
216 abort
217 end
218 is_building = true
219 for p in precs do
220 p.build_nfa
221 end
222 is_building = false
223 var nre = self.children[2]
224 res = nre.make_rfa
225 nfa = res
226 return res
227 end
228 end
229
230 redef class Nre_id
231 # The named expression
232 var nexpr: nullable Nexpr
233
234 redef fun accept_check_name_visitor(v) do
235 var id = children.first.as(Nid)
236 var name = id.text
237 if not v.v1.names.has_key(name) then
238 print "{id.position.to_s} Error: unknown name {name}"
239 exit(1)
240 abort
241 end
242 var node = v.v1.names[name]
243 if node isa Nprod then
244 print "{id.position.to_s} Error: cannot use production {name} in a regular expression"
245 exit(1)
246 abort
247 else if not node isa Nexpr then
248 abort
249 end
250 nexpr = node
251 v.nexpr.precs.add(node)
252 end
253
254 redef fun make_rfa
255 do
256 return nexpr.nfa.dup
257 end
258 end
259
260 redef class Nign
261 redef fun accept_check_name_visitor(v) do
262 # Add elements to the ignored list
263 v.elems = v.ignoreds
264 super
265 for e in v.elems do
266 if e isa Production then
267 print "Error: cannot ignore {e}, it is a production"
268 exit(1)
269 abort
270 else if e isa Token then
271 # The token was build and registered during the visit
272 # So, unregister them, the big Ignored token will be build later
273 v.v1.gram.tokens.remove(e)
274 else
275 abort
276 end
277 end
278 end
279 end
280
281 redef class Nrej
282 redef fun accept_check_name_visitor(v) do
283 # Add elements to the rejected list
284 v.elems = v.rejecteds
285 super
286 for e in v.elems do
287 if e isa Production then
288 print "Error: cannot reject {e}, it is a production"
289 exit(1)
290 abort
291 else if e isa Token then
292 # The token was build and registered during the visit
293 else
294 abort
295 end
296 end
297 end
298 end
299
300 redef class Nprod
301 # The associated main production.
302 # i.e. the last priority class.
303 var prod: nullable Production
304
305 # The associated most-priority production.
306 # i.e. the first priority class.
307 # If there is no priority then `sub_prod == prod`.
308 var sub_prod: nullable Production
309
310 redef fun accept_collect_prod(v) do
311 var id = children.first.as(Nid)
312 var name = id.text
313 if v.names.has_key(name) then
314 print "{id.position.to_s} Error {name} already defined."
315 exit(1)
316 end
317 v.names[name] = self
318 v.nprods.add(self)
319 var prod = new Production(name)
320 self.prod = prod
321 v.gram.prods.add(prod)
322
323 # Check priority block
324 var pris = children[4]
325 if pris isa Nodes[Npriority] then
326 var lastpri = pris.children.last
327 lastpri.is_last = true
328
329 v.pricpt = pris.children.length
330
331 # Create a new production for the first priority class
332 # The main production will be used for the last priority class
333 var spe = prod
334 prod = new Production(name + "$0")
335 prod.spe = spe
336 v.gram.prods.add(prod)
337 end
338 self.sub_prod = prod
339
340 v.prod = prod
341 super
342 v.prod = null
343 end
344 redef fun accept_check_name_visitor(v) do
345 v.prod = sub_prod
346 super
347 v.prod = null
348 end
349 end
350
351 redef class Nptrans
352 redef fun accept_check_name_visitor(v) do
353 var id = children[2].as(Nid)
354 var name = id.text
355
356 var node = v.v1.names[name]
357 if node isa Nprod then
358 v.prod.spe = node.prod.as(not null)
359 if v.prod.spe.spe != null then
360 print "Cannot transform into {name}, {name} is already transformed."
361 exit(1)
362 abort
363 end
364 else if node isa Nexpr then
365 print "Cannot transform into {name}, {name} is a token."
366 else
367 abort
368 end
369 end
370 end
371
372 redef class Natrans
373 redef fun accept_check_name_visitor(v) do
374 v.trans = true
375 end
376 end
377
378 redef class Npriority
379 # It is the last priority group?
380 var is_last = false
381
382 # The associated production
383 var prod: nullable Production
384
385 # The production in the with the next less priority class.
386 # `null` if there is no priority or if the first priority class.
387 var next: nullable Production
388
389 redef fun accept_collect_prod(v) do
390 var old = v.prod
391 assert old != null
392 var spe = old.spe
393 assert spe != null
394 if is_last then
395 prod = spe
396 else
397 v.pricpt -= 1
398 prod = new Production(spe.name + "${v.pricpt}")
399 prod.spe = spe
400 v.gram.prods.add(prod.as(not null))
401 end
402 next = old
403 v.prod = prod
404 super
405
406 end
407 redef fun accept_check_name_visitor(v) do
408 var old = v.prod
409 v.prod = prod
410 v.pri = self
411 super
412
413 # Inject a new alternative that goes to the next less priority class
414 var alt = prod.new_alt2(prod.name + "_" + prod.alts.length.to_s, [next.as(not null)])
415 alt.trans = true
416 alt.codes = [new CodePop]
417
418 v.pri = null
419 v.prod = old
420 end
421
422 # Check and transform `v.elems` for priority
423 private fun check_priority(v: CheckNameVisitor) is abstract
424 end
425
426 redef class Npriority_left
427 redef fun check_priority(v) do
428 var p = prod.spe or else prod
429 assert p != null
430 if v.elems.length < 2 or v.elems.first != p or v.elems.last != p then
431 print("Error: in a Left priority class, left and right must be the production")
432 exit(1)
433 end
434 v.elems.first = prod.as(not null)
435 v.elems.last = next.as(not null)
436 end
437 end
438
439 redef class Npriority_right
440 redef fun check_priority(v) do
441 var p = prod.spe or else prod
442 assert p != null
443 if v.elems.length < 2 or v.elems.first != p or v.elems.last != p then
444 print("Error: in a Right priority class, left and right must be the production")
445 exit(1)
446 end
447 v.elems.first = next.as(not null)
448 v.elems.last = prod.as(not null)
449 end
450 end
451
452 redef class Npriority_unary
453 redef fun check_priority(v) do
454 var p = prod.spe or else prod
455 assert p != null
456 if v.elems.length < 2 or (v.elems.first != p and v.elems.last != p) then
457 print("Error: in a Unary priority class, left or right must be the production")
458 exit(1)
459 end
460 if v.elems.first == p then v.elems.first = prod.as(not null)
461 if v.elems.last == p then v.elems.last = prod.as(not null)
462 end
463 end
464
465 redef class Alternative
466 # The short name of the alternative.
467 # Used for errors
468 var short_name: nullable String
469 end
470
471 redef class Nalt
472 # The associated alternative
473 var alt: nullable Alternative
474
475 redef fun accept_check_name_visitor(v)
476 do
477 v.altname = null
478 v.trans = false
479 v.elems = new Array[Element]
480 v.elems_names = new Array[nullable String]
481
482 super
483
484 var pri = v.pri
485 if pri != null then pri.check_priority(v)
486
487 var prod = v.prod.as(not null)
488 var prodabs = prod.spe
489 if prodabs == null then prodabs = prod
490 var name = v.altname
491 if name == null then
492 if v.trans then
493 name = prod.name + "_" + prod.alts.length.to_s
494 else if prod.spe == null and prod.alts.is_empty and pri == null then
495 name = prod.name
496 prod.altone = true
497 else
498 if prodabs.altone then
499 prodabs.altone = false
500 prodabs.alts.first.name = prodabs.name + "_0"
501 end
502 name = prod.name + "_" + prod.alts.length.to_s
503 end
504 else
505 if prodabs.altone then
506 prodabs.altone = false
507 prodabs.alts.first.name = prodabs.name + "_0"
508 end
509 name = prodabs.name + "_" + name
510 end
511
512 var alt = prod.new_alt2(name, v.elems)
513 alt.short_name = v.altname
514 alt.elems_names.add_all(v.elems_names)
515 self.alt = alt
516 if v.trans then
517 alt.trans = true
518 alt.codes = [new CodePop]
519 end
520 end
521 end
522
523 redef class Naltid
524 redef fun accept_check_name_visitor(v)
525 do
526 var id = children[1].as(Nid)
527 var name = id.text
528 v.altname = name
529 var prod = v.prod.as(not null)
530 var prodabs = prod.spe
531 if prodabs == null then prodabs = prod
532 for x in prodabs.alts do
533 if x.short_name == name then
534 print "{id.position.to_s} Error: already an alternative named {name}"
535 exit(1)
536 end
537 end
538 end
539 end
540
541 redef class Nelem
542 # The associated element
543 var elem: nullable Element
544
545 # Set the element and check things
546 private fun set_elem(v: CheckNameVisitor, pos: nullable Position, elem: Element)
547 do
548 assert self.elem == null
549 self.elem = elem
550 if elem isa Token then
551 if v.ignoreds.has(elem) then
552 if pos != null then
553 print "{pos} Error: {elem.name} is already an ignored token."
554 else
555 print "Error: {elem.name} is already an ignored token."
556 end
557 exit(1)
558 end
559 if v.rejecteds.has(elem) then
560 if pos != null then
561 print "{pos} Error: {elem.name} is already a rejected token."
562 else
563 print "Error: {elem.name} is already a rejected token."
564 end
565 exit(1)
566 end
567 end
568 v.elems.push(elem)
569 end
570 end
571
572 redef class Token
573 # The associated expression (if any, ie defined in the lexer part)
574 var nexpr: nullable Nexpr
575 # The associated text (if any, ie defined in the parser part)
576 var text: nullable String
577
578 # Build a NFA according to nexpr or text
579 # Does not tag it!
580 fun build_nfa: Automaton
581 do
582 var nexpr = self.nexpr
583 if nexpr != null then
584 assert self.text == null
585 return nexpr.build_nfa
586 end
587 var text = self.text
588 if text != null then
589 var nfa = new Automaton.epsilon
590 for c in text.chars do
591 nfa.concat(new Automaton.atom(c.ascii))
592 end
593 return nfa
594 end
595 abort
596 end
597 end
598
599 redef class Nnelem
600 redef fun accept_check_name_visitor(v)
601 do
602 v.elemname = null
603 super
604 var elemname = v.elemname
605 if elemname != null and v.elems_names.has(elemname) then
606 var i = 2
607 while v.elems_names.has(elemname+i.to_s) do i += 1
608 elemname = elemname+i.to_s
609 end
610 v.elems_names.add elemname
611 end
612 end
613
614 redef class Nelemid
615 redef fun accept_check_name_visitor(v)
616 do
617 var id = children[1].as(Nid)
618 var name = id.text
619 for n2 in v.elems_names do
620 if name == n2 then
621 print "{id.position.to_s} Error: already an element named {name}."
622 exit(1)
623 end
624 end
625 v.elemname = name
626 end
627 end
628
629 redef class Nelem_id
630 redef fun accept_check_name_visitor(v) do
631 var id = children.first.as(Nid)
632 var name = id.text
633 if not v.v1.names.has_key(name) then
634 print "{id.position.to_s} Error: unknown name {name}"
635 exit(1)
636 abort
637 end
638 var node = v.v1.names[name]
639 var elem: nullable Element
640 if node isa Nprod then
641 elem = node.prod
642 else if node isa Nexpr then
643 elem = node.token
644 if elem == null then
645 elem = new Token(name)
646 elem.nexpr = node
647 v.v1.gram.tokens.add(elem)
648 node.token = elem
649 end
650 else
651 abort
652 end
653 assert elem != null
654 set_elem(v, id.position, elem)
655 if v.elemname == null then v.elemname = name
656 end
657 end
658
659 redef class Nelem_str
660 redef fun accept_check_name_visitor(v) do
661 var str = children.first.children.first.as(NToken)
662 var text = str.value
663 var name = str.text
664 var elem: nullable Element
665 if v.v1.names.has_key(name) then
666 elem = v.v1.names[name].as(Nelem_str).elem
667 assert elem != null
668 else
669 elem = new Token(name)
670 elem.text = text
671 v.v1.gram.tokens.add(elem)
672 v.v1.names[name] = self
673 end
674 set_elem(v, str.position, elem)
675 end
676 end
677
678 redef class Nelem_star
679 redef fun accept_check_name_visitor(v) do
680 super
681 var elem = v.elems.pop
682 elem = v.plusize(elem)
683 elem = v.quesize(elem)
684 set_elem(v, null, elem)
685 end
686 end
687
688 redef class Nelem_ques
689 redef fun accept_check_name_visitor(v) do
690 super
691 var elem = v.elems.pop
692 elem = v.quesize(elem)
693 set_elem(v, null, elem)
694 end
695 end
696
697 redef class Nelem_plus
698 redef fun accept_check_name_visitor(v) do
699 super
700 var elem = v.elems.pop
701 elem = v.plusize(elem)
702 set_elem(v, null, elem)
703 end
704 end
705
706 redef class Ntree_part
707 redef fun accept_collect_prod(v) do
708 print "NOT YET IMPLEMENTED: `Tree` part; it is ignored"
709 # SKIP by doing nothing
710 end
711
712 redef fun accept_check_name_visitor(v) do
713 # SKIP by doing nothing
714 end
715 end