73baa05a17ed70b35aa00026f05a84771bac4606
[nit.git] / contrib / nitcc / src / nitcc_semantic.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
17 #
18 # TODO: split in submodules
19 module nitcc_semantic
20
21 import nitcc_parser
22 import grammar
23 import re2nfa
24
25 # Main visitor for the semantic analysis
26 #
27 # TODO clean-up the visitors
28 class CollectNameVisitor
29 super Visitor
30
31 var nprods = new Array[Nprod]
32
33 # Symbol table to associate things (prods and exprs) with their name
34 var names = new HashMap[String, Node]
35
36 # The associated grammar object
37 # Read the result of the visit here
38 var gram = new Gram
39
40 # The associated NFA object
41 # Read the result of the visit here
42 var nfa = new Automaton.empty
43
44 # The current production, used to initialize priorities
45 var prod: nullable Production = null
46
47 # The current priority counter to name tranformed productions
48 var pricpt: Int = 0
49
50 # Run the semantic analysis of the grammar
51 fun start(n: Node)
52 do
53 # First visit to collect names
54 enter_visit(n)
55
56 # Second visit to use collectec names and build rhings
57 var v2 = new CheckNameVisitor(self)
58 v2.enter_visit(n)
59
60 # Inline all the ?
61 gram.inline(v2.quesizes.values)
62 # Inlile all the prods sufixed by '_imline' #TODO use a real keyword
63 for p in gram.prods do
64 if not p.name.has_suffix("_inline") then continue
65 print "inline {p}"
66 gram.inline([p])
67 end
68
69 # Build the NFA automaton
70 for t in gram.tokens do
71 var nfa2 = t.build_nfa
72 nfa2.tag_accept(t)
73 nfa.absorb(nfa2)
74 end
75
76 if not v2.ignoreds.is_empty then
77 var ign = new Token("Ignored")
78 var nfa2 = new Automaton.empty
79 for t in v2.ignoreds do
80 assert t isa Token
81 var nfa3 = t.build_nfa
82 nfa2.alternate(nfa3)
83 end
84 nfa2.tag_accept(ign)
85 nfa.absorb(nfa2)
86 end
87
88 end
89
90 redef fun visit(n) do n.accept_collect_prod(self)
91 end
92
93 private class CheckNameVisitor
94 super Visitor
95
96 # The collected altname, for the alternative
97 var altname: nullable String = null
98
99 # The collected elements, for the alternative
100 var elems = new Array[Element]
101
102 # The collected element names, for the alternative
103 var elems_names = new Array[nullable String]
104
105 # The collected elementname, for the nelem
106 var elemname: nullable String = null
107
108 # Is the alternative transformed, for the alternative
109 var trans = false
110
111 # The current priority class
112 # Used the check, and tranform the grammar
113 var pri: nullable Npriority = null
114
115 # Known ignored tokens
116 var ignoreds = new Array[Element]
117
118 # Known rejected tokens
119 var rejecteds = new Array[Element]
120
121 # Pool of elements that are modified with + (reuse them!)
122 private var plusizes = new HashMap[Element, Production]
123
124 # Create a + version of an element
125 fun plusize(e: Element): Production
126 do
127 if plusizes.has_key(e) then return plusizes[e]
128 var name = "{e}+"
129 var prod = new Production(name)
130 prod.acname = "Nodes[{e.acname}]"
131 v1.gram.prods.add(prod)
132 prod.new_alt("{name}_one", e)
133 prod.new_alt("{name}_more", prod, e)
134 plusizes[e] = prod
135 return prod
136 end
137
138 # Pool of elements that are modified with ? (reuse them!)
139 private var quesizes = new HashMap[Element, Production]
140
141 # Create a ? version of an element
142 fun quesize(e: Element): Production
143 do
144 if quesizes.has_key(e) then return quesizes[e]
145 var name = "{e}?"
146 var prod = new Production(name)
147 prod.acname = "nullable {e.acname}"
148 v1.gram.prods.add(prod)
149 var a1 = prod.new_alt("{name}_one", e)
150 a1.codes = [new CodePop]
151 var a0 = prod.new_alt0("{name}_none")
152 a0.codes = [new CodeNull]
153 quesizes[e] = prod
154 return prod
155 end
156
157 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
158 var nexpr: nullable Nexpr
159
160 # The current production, used to initialize alternatives
161 var prod: nullable Production
162
163 # The main visitor, used to access the grammar of the symbol table
164 var v1: CollectNameVisitor
165 init(v1: CollectNameVisitor) do self.v1 = v1
166
167 redef fun visit(n) do n.accept_check_name_visitor(self)
168 end
169
170 redef class Node
171 private fun accept_collect_prod(v: CollectNameVisitor) do visit_children(v)
172 private fun accept_check_name_visitor(v: CheckNameVisitor) do visit_children(v)
173 end
174
175 redef class Nexpr
176 # The associated token
177 var token: nullable Token
178
179 # The associated name
180 var name: nullable String
181
182 # The required expression (Nexpr that are used inside)
183 var precs = new ArraySet[Nexpr]
184
185 redef fun accept_collect_prod(v) do
186 var id = children.first.as(Nid)
187 var name = id.text
188 if v.names.has_key(name) then
189 print "{id.position.to_s} Error {name} already defined."
190 exit(1)
191 end
192 v.names[name] = self
193 self.name = name
194 end
195 redef fun accept_check_name_visitor(v) do
196 v.nexpr = self
197 super
198 v.nexpr = null
199 end
200
201 # Flag to track circular regular expression
202 var is_building = false
203
204 # The associated NFA (cached, see `build_nfa`)
205 private var nfa: nullable Automaton
206
207 # Build the NFA, possibily building the NFA of required expressions
208 # Print an error if there is a circular dependency
209 # The result is cached
210 fun build_nfa: Automaton do
211 var res = nfa
212 if res != null then return res
213 if is_building then
214 print "Error: circular regular expression {name.to_s}."
215 exit(1)
216 abort
217 end
218 is_building = true
219 for p in precs do
220 p.build_nfa
221 end
222 is_building = false
223 var nre = self.children[2]
224 res = nre.make_rfa
225 nfa = res
226 return res
227 end
228 end
229
230 redef class Nre_id
231 # The named expression
232 var nexpr: nullable Nexpr
233
234 redef fun accept_check_name_visitor(v) do
235 var id = children.first.as(Nid)
236 var name = id.text
237 if not v.v1.names.has_key(name) then
238 print "{id.position.to_s} Error: unknown name {name}"
239 exit(1)
240 abort
241 end
242 var node = v.v1.names[name]
243 if node isa Nprod then
244 print "{id.position.to_s} Error: cannot use production {name} in a regular expression"
245 exit(1)
246 abort
247 else if not node isa Nexpr then
248 abort
249 end
250 nexpr = node
251 v.nexpr.precs.add(node)
252 end
253
254 redef fun make_rfa
255 do
256 return nexpr.nfa.dup
257 end
258 end
259
260 redef class Nign
261 redef fun accept_check_name_visitor(v) do
262 # Add elements to the ignored list
263 v.elems = v.ignoreds
264 super
265 for e in v.elems do
266 if e isa Production then
267 print "Error: cannot ignore {e}, it is a production"
268 exit(1)
269 abort
270 else if e isa Token then
271 # The token was build and registered during the visit
272 # So, unregister then, the bit Ignred token will be build later
273 v.v1.gram.tokens.remove(e)
274 else
275 abort
276 end
277 end
278 end
279 end
280
281 redef class Nrej
282 redef fun accept_check_name_visitor(v) do
283 # Add elements to the rejected list
284 v.elems = v.rejecteds
285 super
286 for e in v.elems do
287 if e isa Production then
288 print "Error: cannot reject {e}, it is a production"
289 exit(1)
290 abort
291 else if e isa Token then
292 # The token was build and registered during the visit
293 else
294 abort
295 end
296 end
297 end
298 end
299
300 redef class Nprod
301 # The associated main production
302 # ie the last priority class
303 var prod: nullable Production
304
305 # The associated most-priority production
306 # ie the first priority class
307 # If there is no priority then `sub_prod == prod`
308 var sub_prod: nullable Production
309
310 redef fun accept_collect_prod(v) do
311 var id = children.first.as(Nid)
312 var name = id.text
313 if v.names.has_key(name) then
314 print "{id.position.to_s} Error {name} already defined."
315 exit(1)
316 end
317 v.names[name] = self
318 v.nprods.add(self)
319 var prod = new Production(name)
320 self.prod = prod
321 v.gram.prods.add(prod)
322
323 # Check priority block
324 var pris = children[4]
325 if pris isa Nodes[Npriority] then
326 var lastpri = pris.children.last
327 lastpri.is_last = true
328
329 v.pricpt = pris.children.length
330
331 # Create a new production for the first priority class
332 # The main production will be used for the last priority class
333 var spe = prod
334 prod = new Production(name + "$0")
335 prod.spe = spe
336 v.gram.prods.add(prod)
337 end
338 self.sub_prod = prod
339
340 v.prod = prod
341 super
342 v.prod = null
343 end
344 redef fun accept_check_name_visitor(v) do
345 v.prod = sub_prod
346 super
347 v.prod = null
348 end
349 end
350
351 redef class Nptrans
352 redef fun accept_check_name_visitor(v) do
353 var id = children[2].as(Nid)
354 var name = id.text
355
356 var node = v.v1.names[name]
357 if node isa Nprod then
358 v.prod.spe = node.prod.as(not null)
359 if v.prod.spe.spe != null then
360 print "Cannot transform into {name}, {name} is already transformed."
361 exit(1)
362 abort
363 end
364 else if node isa Nexpr then
365 print "Cannot transform into {name}, {name} is a token."
366 else
367 abort
368 end
369 end
370 end
371
372 redef class Natrans
373 redef fun accept_check_name_visitor(v) do
374 v.trans = true
375 end
376 end
377
378 redef class Npriority
379 var is_last = false
380
381 # The associated production
382 var prod: nullable Production
383
384 # The production in the with the next less priority class
385 # null is there is no priority or if the first priority class
386 var next: nullable Production
387
388 redef fun accept_collect_prod(v) do
389 var old = v.prod
390 assert old != null
391 var spe = old.spe
392 assert spe != null
393 if is_last then
394 prod = spe
395 else
396 v.pricpt -= 1
397 prod = new Production(spe.name + "${v.pricpt}")
398 prod.spe = spe
399 v.gram.prods.add(prod.as(not null))
400 end
401 next = old
402 v.prod = prod
403 super
404
405 end
406 redef fun accept_check_name_visitor(v) do
407 var old = v.prod
408 v.prod = prod
409 v.pri = self
410 super
411
412 # Inject a new alternative that goes to the next less prioty class
413 var alt = prod.new_alt2(prod.name + "_" + prod.alts.length.to_s, [next.as(not null)])
414 alt.trans = true
415 alt.codes = [new CodePop]
416
417 v.pri = null
418 v.prod = old
419 end
420
421 # Check and transform `v.elems` for priority
422 private fun check_priority(v: CheckNameVisitor) is abstract
423 end
424
425 redef class Npriority_left
426 redef fun check_priority(v) do
427 var p = prod.spe or else prod
428 assert p != null
429 if v.elems.length < 2 or v.elems.first != p or v.elems.last != p then
430 print("Error: in a Left priority class, left and right must be the production")
431 exit(1)
432 end
433 v.elems.first = prod.as(not null)
434 v.elems.last = next.as(not null)
435 end
436 end
437
438 redef class Npriority_right
439 redef fun check_priority(v) do
440 var p = prod.spe or else prod
441 assert p != null
442 if v.elems.length < 2 or v.elems.first != p or v.elems.last != p then
443 print("Error: in a Right priority class, left and right must be the production")
444 exit(1)
445 end
446 v.elems.first = next.as(not null)
447 v.elems.last = prod.as(not null)
448 end
449 end
450
451 redef class Npriority_unary
452 redef fun check_priority(v) do
453 var p = prod.spe or else prod
454 assert p != null
455 if v.elems.length < 2 or (v.elems.first != p and v.elems.last != p) then
456 print("Error: in a Unary priority class, left or right must be the production")
457 exit(1)
458 end
459 if v.elems.first == p then v.elems.first = prod.as(not null)
460 if v.elems.last == p then v.elems.last = prod.as(not null)
461 end
462 end
463
464 redef class Alternative
465 var short_name: nullable String
466 end
467
468 redef class Nalt
469 # The associated alternative
470 var alt: nullable Alternative
471
472 redef fun accept_check_name_visitor(v)
473 do
474 v.altname = null
475 v.trans = false
476 v.elems = new Array[Element]
477 v.elems_names = new Array[nullable String]
478
479 super
480
481 var pri = v.pri
482 if pri != null then pri.check_priority(v)
483
484 var prod = v.prod.as(not null)
485 var prodabs = prod.spe
486 if prodabs == null then prodabs = prod
487 var name = v.altname
488 if name == null then
489 if v.trans then
490 name = prod.name + "_" + prod.alts.length.to_s
491 else if prod.spe == null and prod.alts.is_empty and pri == null then
492 name = prod.name
493 prod.altone = true
494 else
495 if prodabs.altone then
496 prodabs.altone = false
497 prodabs.alts.first.name = prodabs.name + "_0"
498 end
499 name = prod.name + "_" + prod.alts.length.to_s
500 end
501 else
502 if prodabs.altone then
503 prodabs.altone = false
504 prodabs.alts.first.name = prodabs.name + "_0"
505 end
506 name = prodabs.name + "_" + name
507 end
508
509 var alt = prod.new_alt2(name, v.elems)
510 alt.short_name = v.altname
511 alt.elems_names.add_all(v.elems_names)
512 self.alt = alt
513 if v.trans then
514 alt.trans = true
515 alt.codes = [new CodePop]
516 end
517 end
518 end
519
520 redef class Naltid
521 redef fun accept_check_name_visitor(v)
522 do
523 var id = children[1].as(Nid)
524 var name = id.text
525 v.altname = name
526 var prod = v.prod.as(not null)
527 var prodabs = prod.spe
528 if prodabs == null then prodabs = prod
529 for x in prodabs.alts do
530 if x.short_name == name then
531 print "{id.position.to_s} Error: already an alternative named {name}"
532 exit(1)
533 end
534 end
535 end
536 end
537
538 redef class Nelem
539 # The associated element
540 var elem: nullable Element
541
542 # Set the element and check things
543 private fun set_elem(v: CheckNameVisitor, pos: nullable Position, elem: Element)
544 do
545 assert self.elem == null
546 self.elem = elem
547 if elem isa Token then
548 if v.ignoreds.has(elem) then
549 if pos != null then
550 print "{pos} Error: {elem.name} is already an ignored token."
551 else
552 print "Error: {elem.name} is already an ignored token."
553 end
554 exit(1)
555 end
556 if v.rejecteds.has(elem) then
557 if pos != null then
558 print "{pos} Error: {elem.name} is already a rejected token."
559 else
560 print "Error: {elem.name} is already a rejected token."
561 end
562 exit(1)
563 end
564 end
565 v.elems.push(elem)
566 end
567 end
568
569 redef class Token
570 # The associated expression (if any, ie defined in the lexer part)
571 var nexpr: nullable Nexpr
572 # The associated text (if any, ie defined in the parser part)
573 var text: nullable String
574
575 # Build a NFA according to nexpr or text
576 # Does not tag it!
577 fun build_nfa: Automaton
578 do
579 var nexpr = self.nexpr
580 if nexpr != null then
581 assert self.text == null
582 return nexpr.build_nfa
583 end
584 var text = self.text
585 if text != null then
586 var nfa = new Automaton.epsilon
587 for c in text.chars do
588 nfa.concat(new Automaton.atom(c.ascii))
589 end
590 return nfa
591 end
592 abort
593 end
594 end
595
596 redef class Nnelem
597 redef fun accept_check_name_visitor(v)
598 do
599 v.elemname = null
600 super
601 var elemname = v.elemname
602 if elemname != null and v.elems_names.has(elemname) then
603 var i = 2
604 while v.elems_names.has(elemname+i.to_s) do i += 1
605 elemname = elemname+i.to_s
606 end
607 v.elems_names.add elemname
608 end
609 end
610
611 redef class Nelemid
612 redef fun accept_check_name_visitor(v)
613 do
614 var id = children[1].as(Nid)
615 var name = id.text
616 for n2 in v.elems_names do
617 if name == n2 then
618 print "{id.position.to_s} Error: already an element named {name}."
619 exit(1)
620 end
621 end
622 v.elemname = name
623 end
624 end
625
626 redef class Nelem_id
627 redef fun accept_check_name_visitor(v) do
628 var id = children.first.as(Nid)
629 var name = id.text
630 if not v.v1.names.has_key(name) then
631 print "{id.position.to_s} Error: unknown name {name}"
632 exit(1)
633 abort
634 end
635 var node = v.v1.names[name]
636 var elem: nullable Element
637 if node isa Nprod then
638 elem = node.prod
639 else if node isa Nexpr then
640 elem = node.token
641 if elem == null then
642 elem = new Token(name)
643 elem.nexpr = node
644 v.v1.gram.tokens.add(elem)
645 node.token = elem
646 end
647 else
648 abort
649 end
650 assert elem != null
651 set_elem(v, id.position, elem)
652 if v.elemname == null then v.elemname = name
653 end
654 end
655
656 redef class Nelem_str
657 redef fun accept_check_name_visitor(v) do
658 var str = children.first.children.first.as(NToken)
659 var text = str.value
660 var name = str.text
661 var elem: nullable Element
662 if v.v1.names.has_key(name) then
663 elem = v.v1.names[name].as(Nelem_str).elem
664 assert elem != null
665 else
666 elem = new Token(name)
667 elem.text = text
668 v.v1.gram.tokens.add(elem)
669 v.v1.names[name] = self
670 end
671 set_elem(v, str.position, elem)
672 end
673 end
674
675 redef class Nelem_star
676 redef fun accept_check_name_visitor(v) do
677 super
678 var elem = v.elems.pop
679 elem = v.plusize(elem)
680 elem = v.quesize(elem)
681 set_elem(v, null, elem)
682 end
683 end
684
685 redef class Nelem_ques
686 redef fun accept_check_name_visitor(v) do
687 super
688 var elem = v.elems.pop
689 elem = v.quesize(elem)
690 set_elem(v, null, elem)
691 end
692 end
693
694 redef class Nelem_plus
695 redef fun accept_check_name_visitor(v) do
696 super
697 var elem = v.elems.pop
698 elem = v.plusize(elem)
699 set_elem(v, null, elem)
700 end
701 end
702
703 redef class Ntree_part
704 redef fun accept_collect_prod(v) do
705 print "NOT YET IMPLEMENTED: `Tree` part; it is ignored"
706 # SKIP by doing nothing
707 end
708
709 redef fun accept_check_name_visitor(v) do
710 # SKIP by doing nothing
711 end
712 end