1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
18 # TODO: split in submodules
25 # Main visitor for the semantic analysis
27 # TODO clean-up the visitors
28 class CollectNameVisitor
32 var nprods
= new Array[Nprod]
34 # Symbol table to associate things (prods and exprs) with their name
35 var names
= new HashMap[String, Node]
37 # The associated grammar object
38 # Read the result of the visit here
41 # The associated NFA object
42 # Read the result of the visit here
43 var nfa
= new Automaton.empty
45 # The current production, used to initialize priorities
46 var prod
: nullable Production = null
48 # The current priority counter to name transformed productions
51 # Run the semantic analysis of the grammar
54 # First visit to collect names
57 # Second visit to use collected names and build things
58 var v2
= new CheckNameVisitor(self)
62 gram
.inline
(v2
.quesizes
.values
)
63 # Inline all the prods suffixed by '_inline' #TODO use a real keyword
64 for p
in gram
.prods
do
65 if not p
.name
.has_suffix
("_inline") then continue
70 # Build the NFA automaton
71 for t
in gram
.tokens
do
72 var nfa2
= t
.build_nfa
77 if not v2
.ignoreds
.is_empty
then
78 var ign
= new Token("Ignored")
79 var nfa2
= new Automaton.empty
80 for t
in v2
.ignoreds
do
82 var nfa3
= t
.build_nfa
91 redef fun visit
(n
) do n
.accept_collect_prod
(self)
94 private class CheckNameVisitor
97 # The collected altname, for the alternative
98 var altname
: nullable String = null
100 # The collected elements, for the alternative
101 var elems
= new Array[Element]
103 # The collected element names, for the alternative
104 var elems_names
= new Array[nullable String]
106 # The collected element names, for the nelem
107 var elemname
: nullable String = null
109 # Is the alternative transformed, for the alternative
112 # The current priority class
113 # Used the check, and transform the grammar
114 var pri
: nullable Npriority = null
116 # Known ignored tokens
117 var ignoreds
= new Array[Element]
119 # Known rejected tokens
120 var rejecteds
= new Array[Element]
122 # Pool of elements that are modified with + (reuse them!)
123 var plusizes
= new HashMap[Element, Production]
125 # Create a + version of an element
126 fun plusize
(e
: Element): Production
128 if plusizes
.has_key
(e
) then return plusizes
[e
]
130 var prod
= new Production(name
)
131 prod
.acname
= "Nodes[{e.acname}]"
132 v1
.gram
.prods
.add
(prod
)
133 prod
.new_alt
("{name}_one", e
)
134 prod
.new_alt
("{name}_more", prod
, e
)
139 # Pool of elements that are modified with ? (reuse them!)
140 var quesizes
= new HashMap[Element, Production]
142 # Create a ? version of an element
143 fun quesize
(e
: Element): Production
145 if quesizes
.has_key
(e
) then return quesizes
[e
]
147 var prod
= new Production(name
)
148 prod
.acname
= "nullable {e.acname}"
149 v1
.gram
.prods
.add
(prod
)
150 var a1
= prod
.new_alt
("{name}_one", e
)
151 a1
.codes
= [new CodePop]
152 var a0
= prod
.new_alt0
("{name}_none")
153 a0
.codes
= [new CodeNull]
158 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
159 var nexpr
: nullable Nexpr = null
161 # The current production, used to initialize alternatives
162 var prod
: nullable Production = null
164 # The main visitor, used to access the grammar of the symbol table
165 var v1
: CollectNameVisitor
167 redef fun visit
(n
) do n
.accept_check_name_visitor
(self)
171 private fun accept_collect_prod
(v
: CollectNameVisitor) do visit_children
(v
)
172 private fun accept_check_name_visitor
(v
: CheckNameVisitor) do visit_children
(v
)
176 # The associated token
177 var token
: nullable Token
179 # The associated name
180 var name
: nullable String
182 # The required expression (Nexpr that are used inside)
183 var precs
= new ArraySet[Nexpr]
185 redef fun accept_collect_prod
(v
) do
186 var id
= children
.first
.as(Nid)
188 if v
.names
.has_key
(name
) then
189 print
"{id.position.to_s} Error {name} already defined."
195 redef fun accept_check_name_visitor
(v
) do
201 # Flag to track circular regular expression
202 var is_building
= false
204 # The associated NFA (cached, see `build_nfa`)
205 private var nfa
: nullable Automaton
207 # Build the NFA, possibly building the NFA of required expressions
208 # Print an error if there is a circular dependency
209 # The result is cached
210 fun build_nfa
: Automaton do
212 if res
!= null then return res
214 print
"Error: circular regular expression {name.to_s}."
223 var nre
= self.children
[2]
231 # The named expression
232 var nexpr
: nullable Nexpr
234 redef fun accept_check_name_visitor
(v
) do
235 var id
= children
.first
.as(Nid)
237 if not v
.v1
.names
.has_key
(name
) then
238 print
"{id.position.to_s} Error: unknown name {name}"
242 var node
= v
.v1
.names
[name
]
243 if node
isa Nprod then
244 print
"{id.position.to_s} Error: cannot use production {name} in a regular expression"
247 else if not node
isa Nexpr then
251 v
.nexpr
.precs
.add
(node
)
261 redef fun accept_check_name_visitor
(v
) do
262 # Add elements to the ignored list
266 if e
isa Production then
267 print
"Error: cannot ignore {e}, it is a production"
270 else if e
isa Token then
271 # The token was build and registered during the visit
272 # So, unregister them, the big Ignored token will be build later
273 v
.v1
.gram
.tokens
.remove
(e
)
282 redef fun accept_check_name_visitor
(v
) do
283 # Add elements to the rejected list
284 v
.elems
= v
.rejecteds
287 if e
isa Production then
288 print
"Error: cannot reject {e}, it is a production"
291 else if e
isa Token then
292 # The token was build and registered during the visit
301 # The associated main production.
302 # i.e. the last priority class.
303 var prod
: nullable Production
305 # The associated most-priority production.
306 # i.e. the first priority class.
307 # If there is no priority then `sub_prod == prod`.
308 var sub_prod
: nullable Production
310 redef fun accept_collect_prod
(v
) do
311 var id
= children
.first
.as(Nid)
313 if v
.names
.has_key
(name
) then
314 print
"{id.position.to_s} Error {name} already defined."
319 var prod
= new Production(name
)
321 v
.gram
.prods
.add
(prod
)
323 # Check priority block
324 var pris
= children
[4]
325 if pris
isa Nodes[Npriority] then
326 var lastpri
= pris
.children
.last
327 lastpri
.is_last
= true
329 v
.pricpt
= pris
.children
.length
331 # Create a new production for the first priority class
332 # The main production will be used for the last priority class
334 prod
= new Production(name
+ "$0")
336 v
.gram
.prods
.add
(prod
)
344 redef fun accept_check_name_visitor
(v
) do
352 redef fun accept_check_name_visitor
(v
) do
353 var id
= children
[2].as(Nid)
356 var node
= v
.v1
.names
[name
]
357 if node
isa Nprod then
358 v
.prod
.spe
= node
.prod
.as(not null)
359 if v
.prod
.spe
.spe
!= null then
360 print
"Cannot transform into {name}, {name} is already transformed."
364 else if node
isa Nexpr then
365 print
"Cannot transform into {name}, {name} is a token."
373 redef fun accept_check_name_visitor
(v
) do
378 redef class Npriority
379 # It is the last priority group?
382 # The associated production
383 var prod
: nullable Production
385 # The production in the with the next less priority class.
386 # `null` if there is no priority or if the first priority class.
387 var next
: nullable Production
389 redef fun accept_collect_prod
(v
) do
398 prod
= new Production(spe
.name
+ "${v.pricpt}")
400 v
.gram
.prods
.add
(prod
.as(not null))
407 redef fun accept_check_name_visitor
(v
) do
413 # Inject a new alternative that goes to the next less priority class
414 var alt
= prod
.new_alt2
(prod
.name
+ "_" + prod
.alts
.length
.to_s
, [next
.as(not null)])
416 alt
.codes
= [new CodePop]
422 # Check and transform `v.elems` for priority
423 private fun check_priority
(v
: CheckNameVisitor) is abstract
426 redef class Npriority_left
427 redef fun check_priority
(v
) do
428 var p
= prod
.spe
or else prod
430 if v
.elems
.length
< 2 or v
.elems
.first
!= p
or v
.elems
.last
!= p
then
431 print
("Error: in a Left priority class, left and right must be the production")
434 v
.elems
.first
= prod
.as(not null)
435 v
.elems
.last
= next
.as(not null)
439 redef class Npriority_right
440 redef fun check_priority
(v
) do
441 var p
= prod
.spe
or else prod
443 if v
.elems
.length
< 2 or v
.elems
.first
!= p
or v
.elems
.last
!= p
then
444 print
("Error: in a Right priority class, left and right must be the production")
447 v
.elems
.first
= next
.as(not null)
448 v
.elems
.last
= prod
.as(not null)
452 redef class Npriority_unary
453 redef fun check_priority
(v
) do
454 var p
= prod
.spe
or else prod
456 if v
.elems
.length
< 2 or (v
.elems
.first
!= p
and v
.elems
.last
!= p
) then
457 print
("Error: in a Unary priority class, left or right must be the production")
460 if v
.elems
.first
== p
then v
.elems
.first
= prod
.as(not null)
461 if v
.elems
.last
== p
then v
.elems
.last
= prod
.as(not null)
465 redef class Alternative
466 # The short name of the alternative.
468 var short_name
: nullable String
472 # The associated alternative
473 var alt
: nullable Alternative
475 redef fun accept_check_name_visitor
(v
)
479 v
.elems
= new Array[Element]
480 v
.elems_names
= new Array[nullable String]
485 if pri
!= null then pri
.check_priority
(v
)
487 var prod
= v
.prod
.as(not null)
488 var prodabs
= prod
.spe
489 if prodabs
== null then prodabs
= prod
493 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
494 else if prod
.spe
== null and prod
.alts
.is_empty
and pri
== null then
498 if prodabs
.altone
then
499 prodabs
.altone
= false
500 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
502 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
505 if prodabs
.altone
then
506 prodabs
.altone
= false
507 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
509 name
= prodabs
.name
+ "_" + name
512 var alt
= prod
.new_alt2
(name
, v
.elems
)
513 alt
.short_name
= v
.altname
514 alt
.elems_names
.add_all
(v
.elems_names
)
518 alt
.codes
= [new CodePop]
524 redef fun accept_check_name_visitor
(v
)
526 var id
= children
[1].as(Nid)
529 var prod
= v
.prod
.as(not null)
530 var prodabs
= prod
.spe
531 if prodabs
== null then prodabs
= prod
532 for x
in prodabs
.alts
do
533 if x
.short_name
== name
then
534 print
"{id.position.to_s} Error: already an alternative named {name}"
542 # The associated element
543 var elem
: nullable Element
545 # Set the element and check things
546 private fun set_elem
(v
: CheckNameVisitor, pos
: nullable Position, elem
: Element)
548 assert self.elem
== null
550 if elem
isa Token then
551 if v
.ignoreds
.has
(elem
) then
553 print
"{pos} Error: {elem.name} is already an ignored token."
555 print
"Error: {elem.name} is already an ignored token."
559 if v
.rejecteds
.has
(elem
) then
561 print
"{pos} Error: {elem.name} is already a rejected token."
563 print
"Error: {elem.name} is already a rejected token."
573 # The associated expression (if any, ie defined in the lexer part)
574 var nexpr
: nullable Nexpr
575 # The associated text (if any, ie defined in the parser part)
576 var text
: nullable String
578 # Build a NFA according to nexpr or text
580 fun build_nfa
: Automaton
582 var nexpr
= self.nexpr
583 if nexpr
!= null then
584 assert self.text
== null
585 return nexpr
.build_nfa
589 var nfa
= new Automaton.epsilon
590 for c
in text
.chars
do
591 nfa
.concat
(new Automaton.atom
(c
.ascii
))
600 redef fun accept_check_name_visitor
(v
)
604 var elemname
= v
.elemname
605 if elemname
!= null and v
.elems_names
.has
(elemname
) then
607 while v
.elems_names
.has
(elemname
+i
.to_s
) do i
+= 1
608 elemname
= elemname
+i
.to_s
610 v
.elems_names
.add elemname
615 redef fun accept_check_name_visitor
(v
)
617 var id
= children
[1].as(Nid)
619 for n2
in v
.elems_names
do
621 print
"{id.position.to_s} Error: already an element named {name}."
630 redef fun accept_check_name_visitor
(v
) do
631 var id
= children
.first
.as(Nid)
633 if not v
.v1
.names
.has_key
(name
) then
634 print
"{id.position.to_s} Error: unknown name {name}"
638 var node
= v
.v1
.names
[name
]
639 var elem
: nullable Element
640 if node
isa Nprod then
642 else if node
isa Nexpr then
645 elem
= new Token(name
)
647 v
.v1
.gram
.tokens
.add
(elem
)
654 set_elem
(v
, id
.position
, elem
)
655 if v
.elemname
== null then v
.elemname
= name
659 redef class Nelem_str
660 redef fun accept_check_name_visitor
(v
) do
661 var str
= children
.first
.children
.first
.as(NToken)
664 var elem
: nullable Element
665 if v
.v1
.names
.has_key
(name
) then
666 elem
= v
.v1
.names
[name
].as(Nelem_str).elem
669 elem
= new Token(name
)
671 v
.v1
.gram
.tokens
.add
(elem
)
672 v
.v1
.names
[name
] = self
674 set_elem
(v
, str
.position
, elem
)
678 redef class Nelem_star
679 redef fun accept_check_name_visitor
(v
) do
681 var elem
= v
.elems
.pop
682 elem
= v
.plusize
(elem
)
683 elem
= v
.quesize
(elem
)
684 set_elem
(v
, null, elem
)
688 redef class Nelem_ques
689 redef fun accept_check_name_visitor
(v
) do
691 var elem
= v
.elems
.pop
692 elem
= v
.quesize
(elem
)
693 set_elem
(v
, null, elem
)
697 redef class Nelem_plus
698 redef fun accept_check_name_visitor
(v
) do
700 var elem
= v
.elems
.pop
701 elem
= v
.plusize
(elem
)
702 set_elem
(v
, null, elem
)
706 redef class Ntree_part
707 redef fun accept_collect_prod
(v
) do
708 print
"NOT YET IMPLEMENTED: `Tree` part; it is ignored"
709 # SKIP by doing nothing
712 redef fun accept_check_name_visitor
(v
) do
713 # SKIP by doing nothing