1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
18 # TODO: split in submodules
25 # Main visitor for the semantic analysis
27 # TODO clean-up the visitors
28 class CollectNameVisitor
31 var nprods
= new Array[Nprod]
33 # Symbol table to associate things (prods and exprs) with their name
34 var names
= new HashMap[String, Node]
36 # The associated grammar object
37 # Read the result of the visit here
40 # The associated NFA object
41 # Read the result of the visit here
42 var nfa
= new Automaton.empty
44 # Run the semantic analysis of the grammar
47 # First visit to collect names
50 # Second visit to use collectec names and build rhings
51 var v2
= new CheckNameVisitor(self)
55 gram
.inline
(v2
.quesizes
.values
)
56 # Inlile all the prods sufixed by '_imline' #TODO use a real keyword
57 for p
in gram
.prods
do
58 if not p
.name
.has_suffix
("_inline") then continue
63 # Build the NFA automaton
64 for t
in gram
.tokens
do
68 nfa2
= new Automaton.epsilon
69 for c
in t
.text
.as(not null) do
70 nfa2
.concat
(new Automaton.atom
(c
.ascii
))
73 nfa2
= nexpr
.build_nfa
75 nfa
.states
.add_all nfa2
.states
76 nfa
.start
.add_trans
(nfa2
.start
, null)
77 for s
in nfa2
.accept
do nfa
.add_tag
(s
, t
)
78 nfa
.accept
.add_all nfa2
.accept
82 redef fun visit
(n
) do n
.accept_collect_prod
(self)
85 private class CheckNameVisitor
88 # The collected altname, for the alternative
89 var altname
: nullable String = null
91 # The collected elements, for the alternative
92 var elems
= new Array[Element]
94 # The collected element names, for the alternative
95 var elems_names
= new Array[nullable String]
97 # The collected elementname, for the nelem
98 var elemname
: nullable String = null
100 # Is the alternative transformed, for the alternative
103 # Known rejected tokens
104 var rejecteds
= new Array[Element]
106 # Pool of elements that are modified with + (reuse them!)
107 private var plusizes
= new HashMap[Element, Production]
109 # Create a + version of an element
110 fun plusize
(e
: Element): Production
112 if plusizes
.has_key
(e
) then return plusizes
[e
]
114 var prod
= new Production(name
)
115 prod
.acname
= "Nodes[{e.acname}]"
116 v1
.gram
.prods
.add
(prod
)
117 prod
.new_alt
("{name}_one", e
)
118 prod
.new_alt
("{name}_more", prod
, e
)
123 # Pool of elements that are modified with ? (reuse them!)
124 private var quesizes
= new HashMap[Element, Production]
126 # Create a ? version of an element
127 fun quesize
(e
: Element): Production
129 if quesizes
.has_key
(e
) then return quesizes
[e
]
131 var prod
= new Production(name
)
132 prod
.acname
= "nullable {e.acname}"
133 v1
.gram
.prods
.add
(prod
)
134 var a1
= prod
.new_alt
("{name}_one", e
)
135 a1
.codes
= [new CodePop]
136 var a0
= prod
.new_alt0
("{name}_none")
137 a0
.codes
= [new CodeNull]
142 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
143 var nexpr
: nullable Nexpr
145 # The current production, used to initialize alternatives
146 var prod
: nullable Production
148 # The main visitor, used to access the grammar of the symbol table
149 var v1
: CollectNameVisitor
150 init(v1
: CollectNameVisitor) do self.v1
= v1
152 redef fun visit
(n
) do n
.accept_check_name_visitor
(self)
156 private fun accept_collect_prod
(v
: CollectNameVisitor) do visit_children
(v
)
157 private fun accept_check_name_visitor
(v
: CheckNameVisitor) do visit_children
(v
)
161 # The associated token
162 var token
: nullable Token
164 # The associated name
165 var name
: nullable String
167 # The required expression (Nexpr that are used inside)
168 var precs
= new ArraySet[Nexpr]
170 redef fun accept_collect_prod
(v
) do
171 var id
= children
.first
.as(Nid)
173 if v
.names
.has_key
(name
) then
174 print
"{id.position} Error {name} already defined."
180 redef fun accept_check_name_visitor
(v
) do
186 # Flag to track circular regular expression
187 var is_building
= false
189 # The associated NFA (cached, see `build_nfa`)
190 private var nfa
: nullable Automaton
192 # Build the NFA, possibily building the NFA of required expressions
193 # Print an error if there is a circular dependency
194 # The result is cached
195 fun build_nfa
: Automaton do
197 if res
!= null then return res
199 print
"Error: circular regular expression {name.to_s}."
208 var nre
= self.children
[2]
216 # The named expression
217 var nexpr
: nullable Nexpr
219 redef fun accept_check_name_visitor
(v
) do
220 var id
= children
.first
.as(Nid)
222 if not v
.v1
.names
.has_key
(name
) then
223 print
"{id.position} Error: unknown name {name}"
227 var node
= v
.v1
.names
[name
]
228 if node
isa Nprod then
229 print
"{id.position} Error: cannot use production {name} in a regular expression"
232 else if not node
isa Nexpr then
236 v
.nexpr
.precs
.add
(node
)
247 var elem
: nullable Element
249 redef fun accept_check_name_visitor
(v
) do
250 var id
= children
[1].as(Nid)
252 if not v
.v1
.names
.has_key
(name
) then
253 print
"{id.position} Error: unknown name {name}"
257 var node
= v
.v1
.names
[name
]
258 var elem
: nullable Element
259 if node
isa Nprod then
260 print
"{id.position} Error: cannot ignore a production"
263 else if node
isa Nexpr then
266 elem
= new Token("Ignored")
268 v
.v1
.gram
.tokens
.add
(elem
)
279 redef fun accept_check_name_visitor
(v
) do
280 # Add elements to the rejected list
281 v
.elems
= v
.rejecteds
284 if e
isa Production then
285 print
"Error: cannot reject {e}, it is a production"
288 else if e
isa Token then
289 # The token was build and registered during the visit
298 # The associated production
299 var prod
: nullable Production
301 redef fun accept_collect_prod
(v
) do
302 var id
= children
.first
.as(Nid)
304 if v
.names
.has_key
(name
) then
305 print
"{id.position} Error {name} already defined."
310 prod
= new Production(name
)
311 v
.gram
.prods
.add
(prod
.as(not null))
313 redef fun accept_check_name_visitor
(v
) do
321 redef fun accept_check_name_visitor
(v
) do
322 var id
= children
[2].as(Nid)
325 var node
= v
.v1
.names
[name
]
326 if node
isa Nprod then
327 v
.prod
.spe
= node
.prod
.as(not null)
328 if v
.prod
.spe
.spe
!= null then
329 print
"Cannot transform into {name}, {name} is already transformed."
333 else if node
isa Nexpr then
334 print
"Cannot transform into {name}, {name} is a token."
342 redef fun accept_check_name_visitor
(v
) do
343 var id
= children
[2].as(Nid)
350 redef class Alternative
351 var short_name
: nullable String
355 # The associated alternative
356 var alt
: nullable Alternative
358 redef fun accept_check_name_visitor
(v
)
362 v
.elems
= new Array[Element]
363 v
.elems_names
= new Array[nullable String]
365 var prod
= v
.prod
.as(not null)
366 var prodabs
= prod
.spe
367 if prodabs
== null then prodabs
= prod
371 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
372 else if prod
.spe
== null and prod
.alts
.is_empty
then
376 if prodabs
.altone
then
377 prodabs
.altone
= false
378 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
380 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
383 if prodabs
.altone
then
384 prodabs
.altone
= false
385 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
387 name
= prodabs
.name
+ "_" + name
389 var alt
= prod
.new_alt2
(name
, v
.elems
)
390 alt
.short_name
= v
.altname
391 alt
.elems_names
.add_all
(v
.elems_names
)
395 alt
.codes
= [new CodePop]
401 redef fun accept_check_name_visitor
(v
)
403 var id
= children
[1].as(Nid)
406 var prod
= v
.prod
.as(not null)
407 var prodabs
= prod
.spe
408 if prodabs
== null then prodabs
= prod
409 for x
in prodabs
.alts
do
410 if x
.short_name
== name
then
411 print
"{id.position} Error: already an alternative named {name}"
419 # The associated element
420 var elem
: nullable Element
422 # Set the element and check things
423 fun set_elem
(v
: CheckNameVisitor, pos
: nullable Position, elem
: Element)
425 assert self.elem
== null
427 if elem
isa Token and v
.rejecteds
.has
(elem
) then
429 print
"{pos} Error: {elem.name} is already a rejected token."
431 print
"Error: {elem.name} is already a rejected token."
440 # The associated expression (if any, ie defined in the lexer part)
441 var nexpr
: nullable Nexpr
442 # The associated text (if any, ie defined in the parser part)
443 var text
: nullable String
447 redef fun accept_check_name_visitor
(v
)
451 var elemname
= v
.elemname
452 if elemname
!= null and v
.elems_names
.has
(elemname
) then
454 while v
.elems_names
.has
(elemname
+i
.to_s
) do i
+= 1
455 elemname
= elemname
+i
.to_s
457 v
.elems_names
.add elemname
462 redef fun accept_check_name_visitor
(v
)
464 var id
= children
[1].as(Nid)
466 for n2
in v
.elems_names
do
468 print
"{id.position} Error: already an element named {name}."
477 redef fun accept_check_name_visitor
(v
) do
478 var id
= children
.first
.as(Nid)
480 if not v
.v1
.names
.has_key
(name
) then
481 print
"{id.position} Error: unknown name {name}"
485 var node
= v
.v1
.names
[name
]
486 var elem
: nullable Element
487 if node
isa Nprod then
489 else if node
isa Nexpr then
492 elem
= new Token(name
)
494 v
.v1
.gram
.tokens
.add
(elem
)
501 set_elem
(v
, id
.position
, elem
)
502 if v
.elemname
== null then v
.elemname
= name
506 redef class Nelem_str
507 redef fun accept_check_name_visitor
(v
) do
508 var str
= children
.first
.as(Nstr)
511 var elem
: nullable Element
512 if v
.v1
.names
.has_key
(name
) then
513 elem
= v
.v1
.names
[name
].as(Nelem_str).elem
516 elem
= new Token(name
)
518 v
.v1
.gram
.tokens
.add
(elem
)
519 v
.v1
.names
[name
] = self
521 set_elem
(v
, str
.position
, elem
)
525 redef class Nelem_star
526 redef fun accept_check_name_visitor
(v
) do
528 var elem
= v
.elems
.pop
529 elem
= v
.plusize
(elem
)
530 elem
= v
.quesize
(elem
)
531 set_elem
(v
, null, elem
)
535 redef class Nelem_ques
536 redef fun accept_check_name_visitor
(v
) do
538 var elem
= v
.elems
.pop
539 elem
= v
.quesize
(elem
)
540 set_elem
(v
, null, elem
)
544 redef class Nelem_plus
545 redef fun accept_check_name_visitor
(v
) do
547 var elem
= v
.elems
.pop
548 elem
= v
.plusize
(elem
)
549 set_elem
(v
, null, elem
)