1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
18 # TODO: split in submodules
25 # Main visitor for the semantic analysis
27 # TODO clean-up the visitors
28 class CollectNameVisitor
32 var nprods
= new Array[Nprod]
34 # Symbol table to associate things (prods and exprs) with their name
35 var names
= new HashMap[String, Node]
37 # The associated grammar object
38 # Read the result of the visit here
41 # The associated NFA object
42 # Read the result of the visit here
43 var nfa
= new Automaton.empty
45 # The current production, used to initialize priorities
46 var prod
: nullable Production = null
48 # The current priority counter to name transformed productions
51 # Run the semantic analysis of the grammar
54 # First visit to collect names
57 # Second visit to use collected names and build things
58 var v2
= new CheckNameVisitor(self)
62 gram
.inline
(v2
.quesizes
.values
)
63 # Inline all the prods suffixed by '_inline' #TODO use a real keyword
64 for p
in gram
.prods
do
65 if not p
.name
.has_suffix
("_inline") then continue
70 # Build the NFA automaton
71 for t
in gram
.tokens
do
72 var nfa2
= t
.build_nfa
77 if not v2
.ignoreds
.is_empty
then
78 var ign
= new Token("Ignored")
79 var nfa2
= new Automaton.empty
80 for t
in v2
.ignoreds
do
82 var nfa3
= t
.build_nfa
91 redef fun visit
(n
) do n
.accept_collect_prod
(self)
94 private class CheckNameVisitor
97 # The collected altname, for the alternative
98 var altname
: nullable String = null
100 # The collected elements, for the alternative
101 var elems
= new Array[Element]
103 # The collected element names, for the alternative
104 var elems_names
= new Array[nullable String]
106 # The collected element names, for the nelem
107 var elemname
: nullable String = null
109 # Is the alternative transformed, for the alternative
112 # The current priority class
113 # Used the check, and transform the grammar
114 var pri
: nullable Npriority = null
116 # Known ignored tokens
117 var ignoreds
= new Array[Element]
119 # Known rejected tokens
120 var rejecteds
= new Array[Element]
122 # Pool of elements that are modified with + (reuse them!)
123 var plusizes
= new HashMap[Element, Production]
125 # Create a + version of an element
126 fun plusize
(e
: Element): Production
128 if plusizes
.has_key
(e
) then return plusizes
[e
]
130 var prod
= new Production(name
)
131 prod
.acname
= "Nodes[{e.acname}]"
132 v1
.gram
.prods
.add
(prod
)
133 prod
.new_alt
("{name}_one", e
)
134 prod
.new_alt
("{name}_more", prod
, e
)
139 # Pool of elements that are modified with ? (reuse them!)
140 var quesizes
= new HashMap[Element, Production]
142 # Create a ? version of an element
143 fun quesize
(e
: Element): Production
145 if quesizes
.has_key
(e
) then return quesizes
[e
]
147 var prod
= new Production(name
)
148 prod
.acname
= "nullable {e.acname}"
149 v1
.gram
.prods
.add
(prod
)
150 var a1
= prod
.new_alt
("{name}_one", e
)
151 a1
.codes
= [new CodePop]
152 var a0
= prod
.new_alt0
("{name}_none")
153 a0
.codes
= [new CodeNull]
158 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
159 var nexpr
: nullable Nexpr = null
161 # The current production, used to initialize alternatives
162 var prod
: nullable Production = null
164 # The main visitor, used to access the grammar of the symbol table
165 var v1
: CollectNameVisitor
167 redef fun visit
(n
) do n
.accept_check_name_visitor
(self)
171 private fun accept_collect_prod
(v
: CollectNameVisitor) do visit_children
(v
)
172 private fun accept_check_name_visitor
(v
: CheckNameVisitor) do visit_children
(v
)
176 # The associated token
177 var token
: nullable Token
179 # The associated name
180 var name
: nullable String
182 # The required expression (Nexpr that are used inside)
183 var precs
= new ArraySet[Nexpr]
185 redef fun accept_collect_prod
(v
) do
186 var id
= children
.first
.as(Nid)
188 if v
.names
.has_key
(name
) then
189 print
"{id.position.to_s} Error {name} already defined."
195 redef fun accept_check_name_visitor
(v
) do
201 # Flag to track circular regular expression
202 var is_building
= false
204 # The associated NFA (cached, see `build_nfa`)
205 private var nfa
: nullable Automaton
207 # Build the NFA, possibly building the NFA of required expressions
208 # Print an error if there is a circular dependency
209 # The result is cached
210 fun build_nfa
: Automaton do
212 if res
!= null then return res
214 print
"Error: circular regular expression {name.to_s}."
223 var nre
= self.children
[2]
231 # The named expression
232 var nexpr
: nullable Nexpr
234 redef fun accept_check_name_visitor
(v
) do
235 var id
= children
.first
.as(Nid)
237 if not v
.v1
.names
.has_key
(name
) then
238 print
"{id.position.to_s} Error: unknown name {name}"
242 var node
= v
.v1
.names
[name
]
243 if node
isa Nprod then
244 print
"{id.position.to_s} Error: cannot use production {name} in a regular expression"
247 else if not node
isa Nexpr then
251 v
.nexpr
.precs
.add
(node
)
261 redef fun accept_check_name_visitor
(v
) do
262 # Add elements to the ignored list
266 if e
isa Production then
267 print
"Error: cannot ignore {e}, it is a production"
270 else if e
isa Token then
271 # The token was build and registered during the visit
272 # So, unregister them, the big Ignored token will be build later
273 v
.v1
.gram
.tokens
.remove
(e
)
282 redef fun accept_check_name_visitor
(v
) do
283 # Add elements to the rejected list
284 v
.elems
= v
.rejecteds
287 if e
isa Production then
288 print
"Error: cannot reject {e}, it is a production"
291 else if e
isa Token then
292 # The token was build and registered during the visit
301 # The associated main production.
302 # i.e. the last priority class.
303 var prod
: nullable Production
305 # The associated most-priority production.
306 # i.e. the first priority class.
307 # If there is no priority then `sub_prod == prod`.
308 var sub_prod
: nullable Production
310 redef fun accept_collect_prod
(v
) do
311 var id
= children
.first
.as(Nid)
313 if v
.names
.has_key
(name
) then
314 print
"{id.position.to_s} Error {name} already defined."
319 var prod
= new Production(name
)
321 v
.gram
.prods
.add
(prod
)
323 # Check priority block
324 var pris
= children
[4]
325 if pris
isa Nodes[Npriority] then
326 var lastpri
= pris
.children
.last
327 lastpri
.is_last
= true
329 v
.pricpt
= pris
.children
.length
331 # Create a new production for the first priority class
332 # The main production will be used for the last priority class
334 prod
= new Production(name
+ "$0")
336 v
.gram
.prods
.add
(prod
)
344 redef fun accept_check_name_visitor
(v
) do
352 redef fun accept_check_name_visitor
(v
) do
353 var id
= children
[2].as(Nid)
356 var node
= v
.v1
.names
[name
]
357 if node
isa Nprod then
358 v
.prod
.spe
= node
.prod
.as(not null)
359 if v
.prod
.spe
.spe
!= null then
360 print
"Cannot transform into {name}, {name} is already transformed."
364 else if node
isa Nexpr then
365 print
"Cannot transform into {name}, {name} is a token."
373 redef fun accept_check_name_visitor
(v
) do
378 redef class Npriority
379 # It is the last priority group?
382 # The associated production
383 var prod
: nullable Production
385 # The production in the with the next less priority class.
386 # `null` if there is no priority or if the first priority class.
387 var next
: nullable Production
389 redef fun accept_collect_prod
(v
) do
398 prod
= new Production(spe
.name
+ "${v.pricpt}")
400 v
.gram
.prods
.add
(prod
.as(not null))
407 redef fun accept_check_name_visitor
(v
) do
413 # Inject a new alternative that goes to the next less priority class
414 var alt
= prod
.new_alt2
(prod
.name
+ "_" + prod
.alts
.length
.to_s
, [next
.as(not null)])
416 alt
.codes
= [new CodePop]
422 # Check and transform `v.elems` for priority
423 private fun check_priority
(v
: CheckNameVisitor) is abstract
426 redef class Npriority_left
427 redef fun check_priority
(v
) do
428 var p
= prod
.spe
or else prod
430 if v
.elems
.length
< 2 or v
.elems
.first
!= p
or v
.elems
.last
!= p
then
431 print
("Error: in a Left priority class, left and right must be the production")
434 v
.elems
.first
= prod
.as(not null)
435 v
.elems
.last
= next
.as(not null)
439 redef class Npriority_right
440 redef fun check_priority
(v
) do
441 var p
= prod
.spe
or else prod
443 if v
.elems
.length
< 2 or v
.elems
.first
!= p
or v
.elems
.last
!= p
then
444 print
("Error: in a Right priority class, left and right must be the production")
447 v
.elems
.first
= next
.as(not null)
448 v
.elems
.last
= prod
.as(not null)
452 redef class Npriority_unary
453 redef fun check_priority
(v
) do
454 var p
= prod
.spe
or else prod
456 if v
.elems
.length
< 2 or (v
.elems
.first
!= p
and v
.elems
.last
!= p
) then
457 print
("Error: in a Unary priority class, left or right must be the production")
460 if v
.elems
.first
== p
then v
.elems
.first
= prod
.as(not null)
461 if v
.elems
.last
== p
then v
.elems
.last
= prod
.as(not null)
465 redef class Alternative
466 # The short name of the alternative.
468 var short_name
: nullable String
472 # The associated alternative
473 var alt
: nullable Alternative
475 redef fun accept_check_name_visitor
(v
)
479 v
.elems
= new Array[Element]
480 v
.elems_names
= new Array[nullable String]
485 if pri
!= null then pri
.check_priority
(v
)
487 var prod
= v
.prod
.as(not null)
488 var prodabs
= prod
.spe
489 if prodabs
== null then prodabs
= prod
493 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
494 else if prod
.spe
== null and prod
.alts
.is_empty
and pri
== null then
498 if prodabs
.altone
then
499 prodabs
.altone
= false
500 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
502 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
505 if prodabs
.altone
then
506 prodabs
.altone
= false
507 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
509 name
= prodabs
.name
+ "_" + name
512 var alt
= prod
.new_alt2
(name
, v
.elems
)
513 alt
.short_name
= v
.altname
514 alt
.elems_names
.add_all
(v
.elems_names
)
518 alt
.codes
= [new CodePop]
524 redef fun accept_check_name_visitor
(v
)
526 var id
= children
[1].as(Nid)
529 var prod
= v
.prod
.as(not null)
530 var prodabs
= prod
.spe
531 if prodabs
== null then prodabs
= prod
532 for x
in prodabs
.alts
do
533 if x
.short_name
== name
then
534 print
"{id.position.to_s} Error: already an alternative named {name}"
542 # The associated element
543 var elem
: nullable Element
545 # Set the element and check things
546 private fun set_elem
(v
: CheckNameVisitor, pos
: nullable Position, elem
: Element)
548 assert self.elem
== null
550 if elem
isa Token then
551 if v
.ignoreds
.has
(elem
) then
553 print
"{pos} Error: {elem.name} is already an ignored token."
555 print
"Error: {elem.name} is already an ignored token."
559 if v
.rejecteds
.has
(elem
) then
561 print
"{pos} Error: {elem.name} is already a rejected token."
563 print
"Error: {elem.name} is already a rejected token."
573 # The associated expression (if any, ie defined in the lexer part)
574 var nexpr
: nullable Nexpr
576 # Build a NFA according to nexpr or text
578 fun build_nfa
: Automaton
580 var nexpr
= self.nexpr
581 if nexpr
!= null then
582 assert self.text
== null
583 return nexpr
.build_nfa
587 var nfa
= new Automaton.epsilon
588 for c
in text
.chars
do
589 nfa
.concat
(new Automaton.atom
(c
.code_point
))
598 redef fun accept_check_name_visitor
(v
)
602 var elemname
= v
.elemname
603 if elemname
!= null and v
.elems_names
.has
(elemname
) then
605 while v
.elems_names
.has
(elemname
+i
.to_s
) do i
+= 1
606 elemname
= elemname
+i
.to_s
608 v
.elems_names
.add elemname
613 redef fun accept_check_name_visitor
(v
)
615 var id
= children
[1].as(Nid)
617 for n2
in v
.elems_names
do
619 print
"{id.position.to_s} Error: already an element named {name}."
628 redef fun accept_check_name_visitor
(v
) do
629 var id
= children
.first
.as(Nid)
631 if not v
.v1
.names
.has_key
(name
) then
632 print
"{id.position.to_s} Error: unknown name {name}"
636 var node
= v
.v1
.names
[name
]
637 var elem
: nullable Element
638 if node
isa Nprod then
640 else if node
isa Nexpr then
643 elem
= new Token(name
)
645 v
.v1
.gram
.tokens
.add
(elem
)
652 set_elem
(v
, id
.position
, elem
)
653 if v
.elemname
== null then v
.elemname
= name
657 redef class Nelem_str
658 redef fun accept_check_name_visitor
(v
) do
659 var str
= children
.first
.children
.first
.as(NToken)
662 var elem
: nullable Element
663 if v
.v1
.names
.has_key
(name
) then
664 elem
= v
.v1
.names
[name
].as(Nelem_str).elem
667 elem
= new Token(name
)
669 v
.v1
.gram
.tokens
.add
(elem
)
670 v
.v1
.names
[name
] = self
672 set_elem
(v
, str
.position
, elem
)
676 redef class Nelem_star
677 redef fun accept_check_name_visitor
(v
) do
679 var elem
= v
.elems
.pop
680 elem
= v
.plusize
(elem
)
681 elem
= v
.quesize
(elem
)
682 set_elem
(v
, null, elem
)
686 redef class Nelem_ques
687 redef fun accept_check_name_visitor
(v
) do
689 var elem
= v
.elems
.pop
690 elem
= v
.quesize
(elem
)
691 set_elem
(v
, null, elem
)
695 redef class Nelem_plus
696 redef fun accept_check_name_visitor
(v
) do
698 var elem
= v
.elems
.pop
699 elem
= v
.plusize
(elem
)
700 set_elem
(v
, null, elem
)
704 redef class Ntree_part
705 redef fun accept_collect_prod
(v
) do
706 print
"NOT YET IMPLEMENTED: `Tree` part; it is ignored"
707 # SKIP by doing nothing
710 redef fun accept_check_name_visitor
(v
) do
711 # SKIP by doing nothing