1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
18 # TODO: split in submodules
25 # Main visitor for the semantic analysis
27 # TODO clean-up the visitors
28 class CollectNameVisitor
31 var nprods
= new Array[Nprod]
33 # Symbol table to associate things (prods and exprs) with their name
34 var names
= new HashMap[String, Node]
36 # The associated grammar object
37 # Read the result of the visit here
40 # The associated NFA object
41 # Read the result of the visit here
42 var nfa
= new Automaton.empty
44 # The current production, used to initialize priorities
45 var prod
: nullable Production = null
47 # The current priority counter to name tranformed productions
50 # Run the semantic analysis of the grammar
53 # First visit to collect names
56 # Second visit to use collectec names and build rhings
57 var v2
= new CheckNameVisitor(self)
61 gram
.inline
(v2
.quesizes
.values
)
62 # Inlile all the prods sufixed by '_imline' #TODO use a real keyword
63 for p
in gram
.prods
do
64 if not p
.name
.has_suffix
("_inline") then continue
69 # Build the NFA automaton
70 for t
in gram
.tokens
do
71 var nfa2
= t
.build_nfa
76 if not v2
.ignoreds
.is_empty
then
77 var ign
= new Token("Ignored")
78 var nfa2
= new Automaton.empty
79 for t
in v2
.ignoreds
do
81 var nfa3
= t
.build_nfa
90 redef fun visit
(n
) do n
.accept_collect_prod
(self)
93 private class CheckNameVisitor
96 # The collected altname, for the alternative
97 var altname
: nullable String = null
99 # The collected elements, for the alternative
100 var elems
= new Array[Element]
102 # The collected element names, for the alternative
103 var elems_names
= new Array[nullable String]
105 # The collected elementname, for the nelem
106 var elemname
: nullable String = null
108 # Is the alternative transformed, for the alternative
111 # The current priority class
112 # Used the check, and tranform the grammar
113 var pri
: nullable Npriority = null
115 # Known ignored tokens
116 var ignoreds
= new Array[Element]
118 # Known rejected tokens
119 var rejecteds
= new Array[Element]
121 # Pool of elements that are modified with + (reuse them!)
122 private var plusizes
= new HashMap[Element, Production]
124 # Create a + version of an element
125 fun plusize
(e
: Element): Production
127 if plusizes
.has_key
(e
) then return plusizes
[e
]
129 var prod
= new Production(name
)
130 prod
.acname
= "Nodes[{e.acname}]"
131 v1
.gram
.prods
.add
(prod
)
132 prod
.new_alt
("{name}_one", e
)
133 prod
.new_alt
("{name}_more", prod
, e
)
138 # Pool of elements that are modified with ? (reuse them!)
139 private var quesizes
= new HashMap[Element, Production]
141 # Create a ? version of an element
142 fun quesize
(e
: Element): Production
144 if quesizes
.has_key
(e
) then return quesizes
[e
]
146 var prod
= new Production(name
)
147 prod
.acname
= "nullable {e.acname}"
148 v1
.gram
.prods
.add
(prod
)
149 var a1
= prod
.new_alt
("{name}_one", e
)
150 a1
.codes
= [new CodePop]
151 var a0
= prod
.new_alt0
("{name}_none")
152 a0
.codes
= [new CodeNull]
157 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
158 var nexpr
: nullable Nexpr
160 # The current production, used to initialize alternatives
161 var prod
: nullable Production
163 # The main visitor, used to access the grammar of the symbol table
164 var v1
: CollectNameVisitor
165 init(v1
: CollectNameVisitor) do self.v1
= v1
167 redef fun visit
(n
) do n
.accept_check_name_visitor
(self)
171 private fun accept_collect_prod
(v
: CollectNameVisitor) do visit_children
(v
)
172 private fun accept_check_name_visitor
(v
: CheckNameVisitor) do visit_children
(v
)
176 # The associated token
177 var token
: nullable Token
179 # The associated name
180 var name
: nullable String
182 # The required expression (Nexpr that are used inside)
183 var precs
= new ArraySet[Nexpr]
185 redef fun accept_collect_prod
(v
) do
186 var id
= children
.first
.as(Nid)
188 if v
.names
.has_key
(name
) then
189 print
"{id.position.to_s} Error {name} already defined."
195 redef fun accept_check_name_visitor
(v
) do
201 # Flag to track circular regular expression
202 var is_building
= false
204 # The associated NFA (cached, see `build_nfa`)
205 private var nfa
: nullable Automaton
207 # Build the NFA, possibily building the NFA of required expressions
208 # Print an error if there is a circular dependency
209 # The result is cached
210 fun build_nfa
: Automaton do
212 if res
!= null then return res
214 print
"Error: circular regular expression {name.to_s}."
223 var nre
= self.children
[2]
231 # The named expression
232 var nexpr
: nullable Nexpr
234 redef fun accept_check_name_visitor
(v
) do
235 var id
= children
.first
.as(Nid)
237 if not v
.v1
.names
.has_key
(name
) then
238 print
"{id.position.to_s} Error: unknown name {name}"
242 var node
= v
.v1
.names
[name
]
243 if node
isa Nprod then
244 print
"{id.position.to_s} Error: cannot use production {name} in a regular expression"
247 else if not node
isa Nexpr then
251 v
.nexpr
.precs
.add
(node
)
261 redef fun accept_check_name_visitor
(v
) do
262 # Add elements to the ignored list
266 if e
isa Production then
267 print
"Error: cannot ignore {e}, it is a production"
270 else if e
isa Token then
271 # The token was build and registered during the visit
272 # So, unregister then, the bit Ignred token will be build later
273 v
.v1
.gram
.tokens
.remove
(e
)
282 redef fun accept_check_name_visitor
(v
) do
283 # Add elements to the rejected list
284 v
.elems
= v
.rejecteds
287 if e
isa Production then
288 print
"Error: cannot reject {e}, it is a production"
291 else if e
isa Token then
292 # The token was build and registered during the visit
301 # The associated main production
302 # ie the last priority class
303 var prod
: nullable Production
305 # The associated most-priority production
306 # ie the first priority class
307 # If there is no priority then `sub_prod == prod`
308 var sub_prod
: nullable Production
310 redef fun accept_collect_prod
(v
) do
311 var id
= children
.first
.as(Nid)
313 if v
.names
.has_key
(name
) then
314 print
"{id.position.to_s} Error {name} already defined."
319 var prod
= new Production(name
)
321 v
.gram
.prods
.add
(prod
)
323 # Check priority block
324 var pris
= children
[4]
325 if pris
isa Nodes[Npriority] then
326 var lastpri
= pris
.children
.last
327 lastpri
.is_last
= true
329 v
.pricpt
= pris
.children
.length
331 # Create a new production for the first priority class
332 # The main production will be used for the last priority class
334 prod
= new Production(name
+ "$0")
336 v
.gram
.prods
.add
(prod
)
344 redef fun accept_check_name_visitor
(v
) do
352 redef fun accept_check_name_visitor
(v
) do
353 var id
= children
[2].as(Nid)
356 var node
= v
.v1
.names
[name
]
357 if node
isa Nprod then
358 v
.prod
.spe
= node
.prod
.as(not null)
359 if v
.prod
.spe
.spe
!= null then
360 print
"Cannot transform into {name}, {name} is already transformed."
364 else if node
isa Nexpr then
365 print
"Cannot transform into {name}, {name} is a token."
373 redef fun accept_check_name_visitor
(v
) do
378 redef class Npriority
381 # The associated production
382 var prod
: nullable Production
384 # The production in the with the next less priority class
385 # null is there is no priority or if the first priority class
386 var next
: nullable Production
388 redef fun accept_collect_prod
(v
) do
397 prod
= new Production(spe
.name
+ "${v.pricpt}")
399 v
.gram
.prods
.add
(prod
.as(not null))
406 redef fun accept_check_name_visitor
(v
) do
412 # Inject a new alternative that goes to the next less prioty class
413 var alt
= prod
.new_alt2
(prod
.name
+ "_" + prod
.alts
.length
.to_s
, [next
.as(not null)])
415 alt
.codes
= [new CodePop]
421 # Check and transform `v.elems` for priority
422 private fun check_priority
(v
: CheckNameVisitor) is abstract
425 redef class Npriority_left
426 redef fun check_priority
(v
) do
427 var p
= prod
.spe
or else prod
429 if v
.elems
.length
< 2 or v
.elems
.first
!= p
or v
.elems
.last
!= p
then
430 print
("Error: in a Left priority class, left and right must be the production")
433 v
.elems
.first
= prod
.as(not null)
434 v
.elems
.last
= next
.as(not null)
438 redef class Npriority_right
439 redef fun check_priority
(v
) do
440 var p
= prod
.spe
or else prod
442 if v
.elems
.length
< 2 or v
.elems
.first
!= p
or v
.elems
.last
!= p
then
443 print
("Error: in a Right priority class, left and right must be the production")
446 v
.elems
.first
= next
.as(not null)
447 v
.elems
.last
= prod
.as(not null)
451 redef class Npriority_unary
452 redef fun check_priority
(v
) do
453 var p
= prod
.spe
or else prod
455 if v
.elems
.length
< 2 or (v
.elems
.first
!= p
and v
.elems
.last
!= p
) then
456 print
("Error: in a Unary priority class, left or right must be the production")
459 if v
.elems
.first
== p
then v
.elems
.first
= prod
.as(not null)
460 if v
.elems
.last
== p
then v
.elems
.last
= prod
.as(not null)
464 redef class Alternative
465 var short_name
: nullable String
469 # The associated alternative
470 var alt
: nullable Alternative
472 redef fun accept_check_name_visitor
(v
)
476 v
.elems
= new Array[Element]
477 v
.elems_names
= new Array[nullable String]
482 if pri
!= null then pri
.check_priority
(v
)
484 var prod
= v
.prod
.as(not null)
485 var prodabs
= prod
.spe
486 if prodabs
== null then prodabs
= prod
490 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
491 else if prod
.spe
== null and prod
.alts
.is_empty
and pri
== null then
495 if prodabs
.altone
then
496 prodabs
.altone
= false
497 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
499 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
502 if prodabs
.altone
then
503 prodabs
.altone
= false
504 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
506 name
= prodabs
.name
+ "_" + name
509 var alt
= prod
.new_alt2
(name
, v
.elems
)
510 alt
.short_name
= v
.altname
511 alt
.elems_names
.add_all
(v
.elems_names
)
515 alt
.codes
= [new CodePop]
521 redef fun accept_check_name_visitor
(v
)
523 var id
= children
[1].as(Nid)
526 var prod
= v
.prod
.as(not null)
527 var prodabs
= prod
.spe
528 if prodabs
== null then prodabs
= prod
529 for x
in prodabs
.alts
do
530 if x
.short_name
== name
then
531 print
"{id.position.to_s} Error: already an alternative named {name}"
539 # The associated element
540 var elem
: nullable Element
542 # Set the element and check things
543 private fun set_elem
(v
: CheckNameVisitor, pos
: nullable Position, elem
: Element)
545 assert self.elem
== null
547 if elem
isa Token then
548 if v
.ignoreds
.has
(elem
) then
550 print
"{pos} Error: {elem.name} is already an ignored token."
552 print
"Error: {elem.name} is already an ignored token."
556 if v
.rejecteds
.has
(elem
) then
558 print
"{pos} Error: {elem.name} is already a rejected token."
560 print
"Error: {elem.name} is already a rejected token."
570 # The associated expression (if any, ie defined in the lexer part)
571 var nexpr
: nullable Nexpr
572 # The associated text (if any, ie defined in the parser part)
573 var text
: nullable String
575 # Build a NFA according to nexpr or text
577 fun build_nfa
: Automaton
579 var nexpr
= self.nexpr
580 if nexpr
!= null then
581 assert self.text
== null
582 return nexpr
.build_nfa
586 var nfa
= new Automaton.epsilon
587 for c
in text
.chars
do
588 nfa
.concat
(new Automaton.atom
(c
.ascii
))
597 redef fun accept_check_name_visitor
(v
)
601 var elemname
= v
.elemname
602 if elemname
!= null and v
.elems_names
.has
(elemname
) then
604 while v
.elems_names
.has
(elemname
+i
.to_s
) do i
+= 1
605 elemname
= elemname
+i
.to_s
607 v
.elems_names
.add elemname
612 redef fun accept_check_name_visitor
(v
)
614 var id
= children
[1].as(Nid)
616 for n2
in v
.elems_names
do
618 print
"{id.position.to_s} Error: already an element named {name}."
627 redef fun accept_check_name_visitor
(v
) do
628 var id
= children
.first
.as(Nid)
630 if not v
.v1
.names
.has_key
(name
) then
631 print
"{id.position.to_s} Error: unknown name {name}"
635 var node
= v
.v1
.names
[name
]
636 var elem
: nullable Element
637 if node
isa Nprod then
639 else if node
isa Nexpr then
642 elem
= new Token(name
)
644 v
.v1
.gram
.tokens
.add
(elem
)
651 set_elem
(v
, id
.position
, elem
)
652 if v
.elemname
== null then v
.elemname
= name
656 redef class Nelem_str
657 redef fun accept_check_name_visitor
(v
) do
658 var str
= children
.first
.children
.first
.as(NToken)
661 var elem
: nullable Element
662 if v
.v1
.names
.has_key
(name
) then
663 elem
= v
.v1
.names
[name
].as(Nelem_str).elem
666 elem
= new Token(name
)
668 v
.v1
.gram
.tokens
.add
(elem
)
669 v
.v1
.names
[name
] = self
671 set_elem
(v
, str
.position
, elem
)
675 redef class Nelem_star
676 redef fun accept_check_name_visitor
(v
) do
678 var elem
= v
.elems
.pop
679 elem
= v
.plusize
(elem
)
680 elem
= v
.quesize
(elem
)
681 set_elem
(v
, null, elem
)
685 redef class Nelem_ques
686 redef fun accept_check_name_visitor
(v
) do
688 var elem
= v
.elems
.pop
689 elem
= v
.quesize
(elem
)
690 set_elem
(v
, null, elem
)
694 redef class Nelem_plus
695 redef fun accept_check_name_visitor
(v
) do
697 var elem
= v
.elems
.pop
698 elem
= v
.plusize
(elem
)
699 set_elem
(v
, null, elem
)
703 redef class Ntree_part
704 redef fun accept_collect_prod
(v
) do
705 print
"NOT YET IMPLEMENTED: `Tree` part; it is ignored"
706 # SKIP by doing nothing
709 redef fun accept_check_name_visitor
(v
) do
710 # SKIP by doing nothing