1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
18 # TODO: split in submodules
25 # Main visitor for the semantic analysis
27 # TODO clean-up the visitors
28 class CollectNameVisitor
31 var nprods
= new Array[Nprod]
33 # Symbol table to associate things (prods and exprs) with their name
34 var names
= new HashMap[String, Node]
36 # The associated grammar object
37 # Read the result of the visit here
40 # The associated NFA object
41 # Read the result of the visit here
42 var nfa
= new Automaton.empty
44 # Run the semantic analysis of the grammar
47 # First visit to collect names
50 # Second visit to use collectec names and build rhings
51 var v2
= new CheckNameVisitor(self)
55 gram
.inline
(v2
.quesizes
.values
)
56 # Inlile all the prods sufixed by '_imline' #TODO use a real keyword
57 for p
in gram
.prods
do
58 if not p
.name
.has_suffix
("_inline") then continue
63 # Build the NFA automaton
64 for t
in gram
.tokens
do
65 var nfa2
= t
.build_nfa
70 if not v2
.ignoreds
.is_empty
then
71 var ign
= new Token("Ignored")
72 var nfa2
= new Automaton.empty
73 for t
in v2
.ignoreds
do
75 var nfa3
= t
.build_nfa
84 redef fun visit
(n
) do n
.accept_collect_prod
(self)
87 private class CheckNameVisitor
90 # The collected altname, for the alternative
91 var altname
: nullable String = null
93 # The collected elements, for the alternative
94 var elems
= new Array[Element]
96 # The collected element names, for the alternative
97 var elems_names
= new Array[nullable String]
99 # The collected elementname, for the nelem
100 var elemname
: nullable String = null
102 # Is the alternative transformed, for the alternative
105 # Known ignored tokens
106 var ignoreds
= new Array[Element]
108 # Known rejected tokens
109 var rejecteds
= new Array[Element]
111 # Pool of elements that are modified with + (reuse them!)
112 private var plusizes
= new HashMap[Element, Production]
114 # Create a + version of an element
115 fun plusize
(e
: Element): Production
117 if plusizes
.has_key
(e
) then return plusizes
[e
]
119 var prod
= new Production(name
)
120 prod
.acname
= "Nodes[{e.acname}]"
121 v1
.gram
.prods
.add
(prod
)
122 prod
.new_alt
("{name}_one", e
)
123 prod
.new_alt
("{name}_more", prod
, e
)
128 # Pool of elements that are modified with ? (reuse them!)
129 private var quesizes
= new HashMap[Element, Production]
131 # Create a ? version of an element
132 fun quesize
(e
: Element): Production
134 if quesizes
.has_key
(e
) then return quesizes
[e
]
136 var prod
= new Production(name
)
137 prod
.acname
= "nullable {e.acname}"
138 v1
.gram
.prods
.add
(prod
)
139 var a1
= prod
.new_alt
("{name}_one", e
)
140 a1
.codes
= [new CodePop]
141 var a0
= prod
.new_alt0
("{name}_none")
142 a0
.codes
= [new CodeNull]
147 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
148 var nexpr
: nullable Nexpr
150 # The current production, used to initialize alternatives
151 var prod
: nullable Production
153 # The main visitor, used to access the grammar of the symbol table
154 var v1
: CollectNameVisitor
155 init(v1
: CollectNameVisitor) do self.v1
= v1
157 redef fun visit
(n
) do n
.accept_check_name_visitor
(self)
161 private fun accept_collect_prod
(v
: CollectNameVisitor) do visit_children
(v
)
162 private fun accept_check_name_visitor
(v
: CheckNameVisitor) do visit_children
(v
)
166 # The associated token
167 var token
: nullable Token
169 # The associated name
170 var name
: nullable String
172 # The required expression (Nexpr that are used inside)
173 var precs
= new ArraySet[Nexpr]
175 redef fun accept_collect_prod
(v
) do
176 var id
= children
.first
.as(Nid)
178 if v
.names
.has_key
(name
) then
179 print
"{id.position.to_s} Error {name} already defined."
185 redef fun accept_check_name_visitor
(v
) do
191 # Flag to track circular regular expression
192 var is_building
= false
194 # The associated NFA (cached, see `build_nfa`)
195 private var nfa
: nullable Automaton
197 # Build the NFA, possibily building the NFA of required expressions
198 # Print an error if there is a circular dependency
199 # The result is cached
200 fun build_nfa
: Automaton do
202 if res
!= null then return res
204 print
"Error: circular regular expression {name.to_s}."
213 var nre
= self.children
[2]
221 # The named expression
222 var nexpr
: nullable Nexpr
224 redef fun accept_check_name_visitor
(v
) do
225 var id
= children
.first
.as(Nid)
227 if not v
.v1
.names
.has_key
(name
) then
228 print
"{id.position.to_s} Error: unknown name {name}"
232 var node
= v
.v1
.names
[name
]
233 if node
isa Nprod then
234 print
"{id.position.to_s} Error: cannot use production {name} in a regular expression"
237 else if not node
isa Nexpr then
241 v
.nexpr
.precs
.add
(node
)
251 redef fun accept_check_name_visitor
(v
) do
252 # Add elements to the ignored list
256 if e
isa Production then
257 print
"Error: cannot ignore {e}, it is a production"
260 else if e
isa Token then
261 # The token was build and registered during the visit
262 # So, unregister then, the bit Ignred token will be build later
263 v
.v1
.gram
.tokens
.remove
(e
)
272 redef fun accept_check_name_visitor
(v
) do
273 # Add elements to the rejected list
274 v
.elems
= v
.rejecteds
277 if e
isa Production then
278 print
"Error: cannot reject {e}, it is a production"
281 else if e
isa Token then
282 # The token was build and registered during the visit
291 # The associated production
292 var prod
: nullable Production
294 redef fun accept_collect_prod
(v
) do
295 var id
= children
.first
.as(Nid)
297 if v
.names
.has_key
(name
) then
298 print
"{id.position.to_s} Error {name} already defined."
303 prod
= new Production(name
)
304 v
.gram
.prods
.add
(prod
.as(not null))
306 redef fun accept_check_name_visitor
(v
) do
314 redef fun accept_check_name_visitor
(v
) do
315 var id
= children
[2].as(Nid)
318 var node
= v
.v1
.names
[name
]
319 if node
isa Nprod then
320 v
.prod
.spe
= node
.prod
.as(not null)
321 if v
.prod
.spe
.spe
!= null then
322 print
"Cannot transform into {name}, {name} is already transformed."
326 else if node
isa Nexpr then
327 print
"Cannot transform into {name}, {name} is a token."
335 redef fun accept_check_name_visitor
(v
) do
340 redef class Alternative
341 var short_name
: nullable String
345 # The associated alternative
346 var alt
: nullable Alternative
348 redef fun accept_check_name_visitor
(v
)
352 v
.elems
= new Array[Element]
353 v
.elems_names
= new Array[nullable String]
355 var prod
= v
.prod
.as(not null)
356 var prodabs
= prod
.spe
357 if prodabs
== null then prodabs
= prod
361 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
362 else if prod
.spe
== null and prod
.alts
.is_empty
then
366 if prodabs
.altone
then
367 prodabs
.altone
= false
368 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
370 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
373 if prodabs
.altone
then
374 prodabs
.altone
= false
375 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
377 name
= prodabs
.name
+ "_" + name
379 var alt
= prod
.new_alt2
(name
, v
.elems
)
380 alt
.short_name
= v
.altname
381 alt
.elems_names
.add_all
(v
.elems_names
)
385 alt
.codes
= [new CodePop]
391 redef fun accept_check_name_visitor
(v
)
393 var id
= children
[1].as(Nid)
396 var prod
= v
.prod
.as(not null)
397 var prodabs
= prod
.spe
398 if prodabs
== null then prodabs
= prod
399 for x
in prodabs
.alts
do
400 if x
.short_name
== name
then
401 print
"{id.position.to_s} Error: already an alternative named {name}"
409 # The associated element
410 var elem
: nullable Element
412 # Set the element and check things
413 private fun set_elem
(v
: CheckNameVisitor, pos
: nullable Position, elem
: Element)
415 assert self.elem
== null
417 if elem
isa Token then
418 if v
.ignoreds
.has
(elem
) then
420 print
"{pos} Error: {elem.name} is already an ignored token."
422 print
"Error: {elem.name} is already an ignored token."
426 if v
.rejecteds
.has
(elem
) then
428 print
"{pos} Error: {elem.name} is already a rejected token."
430 print
"Error: {elem.name} is already a rejected token."
440 # The associated expression (if any, ie defined in the lexer part)
441 var nexpr
: nullable Nexpr
442 # The associated text (if any, ie defined in the parser part)
443 var text
: nullable String
445 # Build a NFA according to nexpr or text
447 fun build_nfa
: Automaton
449 var nexpr
= self.nexpr
450 if nexpr
!= null then
451 assert self.text
== null
452 return nexpr
.build_nfa
456 var nfa
= new Automaton.epsilon
457 for c
in text
.chars
do
458 nfa
.concat
(new Automaton.atom
(c
.ascii
))
467 redef fun accept_check_name_visitor
(v
)
471 var elemname
= v
.elemname
472 if elemname
!= null and v
.elems_names
.has
(elemname
) then
474 while v
.elems_names
.has
(elemname
+i
.to_s
) do i
+= 1
475 elemname
= elemname
+i
.to_s
477 v
.elems_names
.add elemname
482 redef fun accept_check_name_visitor
(v
)
484 var id
= children
[1].as(Nid)
486 for n2
in v
.elems_names
do
488 print
"{id.position.to_s} Error: already an element named {name}."
497 redef fun accept_check_name_visitor
(v
) do
498 var id
= children
.first
.as(Nid)
500 if not v
.v1
.names
.has_key
(name
) then
501 print
"{id.position.to_s} Error: unknown name {name}"
505 var node
= v
.v1
.names
[name
]
506 var elem
: nullable Element
507 if node
isa Nprod then
509 else if node
isa Nexpr then
512 elem
= new Token(name
)
514 v
.v1
.gram
.tokens
.add
(elem
)
521 set_elem
(v
, id
.position
, elem
)
522 if v
.elemname
== null then v
.elemname
= name
526 redef class Nelem_str
527 redef fun accept_check_name_visitor
(v
) do
528 var str
= children
.first
.children
.first
.as(NToken)
531 var elem
: nullable Element
532 if v
.v1
.names
.has_key
(name
) then
533 elem
= v
.v1
.names
[name
].as(Nelem_str).elem
536 elem
= new Token(name
)
538 v
.v1
.gram
.tokens
.add
(elem
)
539 v
.v1
.names
[name
] = self
541 set_elem
(v
, str
.position
, elem
)
545 redef class Nelem_star
546 redef fun accept_check_name_visitor
(v
) do
548 var elem
= v
.elems
.pop
549 elem
= v
.plusize
(elem
)
550 elem
= v
.quesize
(elem
)
551 set_elem
(v
, null, elem
)
555 redef class Nelem_ques
556 redef fun accept_check_name_visitor
(v
) do
558 var elem
= v
.elems
.pop
559 elem
= v
.quesize
(elem
)
560 set_elem
(v
, null, elem
)
564 redef class Nelem_plus
565 redef fun accept_check_name_visitor
(v
) do
567 var elem
= v
.elems
.pop
568 elem
= v
.plusize
(elem
)
569 set_elem
(v
, null, elem
)