1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
18 # TODO: split in submodules
25 # Main visitor for the semantic analysis
27 # TODO clean-up the visitors
28 class CollectNameVisitor
31 var nprods
= new Array[Nprod]
33 # Symbol table to associate things (prods and exprs) with their name
34 var names
= new HashMap[String, Node]
36 # The associated grammar object
37 # Read the result of the visit here
40 # The associated NFA object
41 # Read the result of the visit here
42 var nfa
= new Automaton.empty
44 # Run the semantic analysis of the grammar
47 # First visit to collect names
50 # Second visit to use collectec names and build rhings
51 var v2
= new CheckNameVisitor(self)
55 gram
.inline
(v2
.quesizes
.values
)
56 # Inlile all the prods sufixed by '_imline' #TODO use a real keyword
57 for p
in gram
.prods
do
58 if not p
.name
.has_suffix
("_inline") then continue
63 # Build the NFA automaton
64 for t
in gram
.tokens
do
68 nfa2
= new Automaton.epsilon
69 for c
in t
.text
.as(not null) do
70 nfa2
.concat
(new Automaton.atom
(c
.ascii
))
73 nfa2
= nexpr
.build_nfa
75 nfa
.states
.add_all nfa2
.states
76 nfa
.start
.add_trans
(nfa2
.start
, null)
77 for s
in nfa2
.accept
do nfa
.add_tag
(s
, t
)
78 nfa
.accept
.add_all nfa2
.accept
82 redef fun visit
(n
) do n
.accept_collect_prod
(self)
85 private class CheckNameVisitor
88 # The collected altname, for the alternative
89 var altname
: nullable String = null
91 # The collected elements, for the alternative
92 var elems
= new Array[Element]
94 # The collected element names, for the alternative
95 var elems_names
= new Array[nullable String]
97 # The collected elementname, for the nelem
98 var elemname
: nullable String = null
100 # Is the alternative transformed, for the alternative
103 # Known rejected tokens
104 var rejecteds
= new Array[Token]
106 # Pool of elements that are modified with + (reuse them!)
107 private var plusizes
= new HashMap[Element, Production]
109 # Create a + version of an element
110 fun plusize
(e
: Element): Production
112 if plusizes
.has_key
(e
) then return plusizes
[e
]
114 var prod
= new Production(name
)
115 prod
.acname
= "Nodes[{e.acname}]"
116 v1
.gram
.prods
.add
(prod
)
117 prod
.new_alt
("{name}_one", e
)
118 prod
.new_alt
("{name}_more", prod
, e
)
123 # Pool of elements that are modified with ? (reuse them!)
124 private var quesizes
= new HashMap[Element, Production]
126 # Create a ? version of an element
127 fun quesize
(e
: Element): Production
129 if quesizes
.has_key
(e
) then return quesizes
[e
]
131 var prod
= new Production(name
)
132 prod
.acname
= "nullable {e.acname}"
133 v1
.gram
.prods
.add
(prod
)
134 var a1
= prod
.new_alt
("{name}_one", e
)
135 a1
.codes
= [new CodePop]
136 var a0
= prod
.new_alt0
("{name}_none")
137 a0
.codes
= [new CodeNull]
142 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
143 var nexpr
: nullable Nexpr
145 # The current production, used to initialize alternatives
146 var prod
: nullable Production
148 # The main visitor, used to access the grammar of the symbol table
149 var v1
: CollectNameVisitor
150 init(v1
: CollectNameVisitor) do self.v1
= v1
152 redef fun visit
(n
) do n
.accept_check_name_visitor
(self)
156 private fun accept_collect_prod
(v
: CollectNameVisitor) do visit_children
(v
)
157 private fun accept_check_name_visitor
(v
: CheckNameVisitor) do visit_children
(v
)
161 # The associated token
162 var token
: nullable Token
164 # The associated name
165 var name
: nullable String
167 # The required expression (Nexpr that are used inside)
168 var precs
= new ArraySet[Nexpr]
170 redef fun accept_collect_prod
(v
) do
171 var id
= children
.first
.as(Nid)
176 redef fun accept_check_name_visitor
(v
) do
182 # Flag to track circular regular expression
183 var is_building
= false
185 # The associated NFA (cached, see `build_nfa`)
186 private var nfa
: nullable Automaton
188 # Build the NFA, possibily building the NFA of required expressions
189 # Print an error if there is a circular dependency
190 # The result is cached
191 fun build_nfa
: Automaton do
193 if res
!= null then return res
195 print
"Error: circular regular expression {name.to_s}."
204 var nre
= self.children
[2]
212 # The named expression
213 var nexpr
: nullable Nexpr
215 redef fun accept_check_name_visitor
(v
) do
216 var id
= children
.first
.as(Nid)
218 if not v
.v1
.names
.has_key
(name
) then
219 print
"{id.position} Error: unknown name {name}"
223 var node
= v
.v1
.names
[name
]
224 if node
isa Nprod then
225 print
"{id.position} Error: cannot use production {name} in a regular expression"
228 else if not node
isa Nexpr then
232 v
.nexpr
.precs
.add
(node
)
243 var elem
: nullable Element
245 redef fun accept_check_name_visitor
(v
) do
246 var id
= children
[1].as(Nid)
248 if not v
.v1
.names
.has_key
(name
) then
249 print
"{id.position} Error: unknown name {name}"
253 var node
= v
.v1
.names
[name
]
254 var elem
: nullable Element
255 if node
isa Nprod then
256 print
"{id.position} Error: cannot ignore a production"
259 else if node
isa Nexpr then
262 elem
= new Token("Ignored")
264 v
.v1
.gram
.tokens
.add
(elem
)
275 redef fun accept_check_name_visitor
(v
) do
276 v
.elems
= new Array[Element]
279 if e
isa Production then
280 print
"Error: cannot reject {e}, it is a production"
283 else if e
isa Token then
284 # The token was build and registered during the visit
285 # Just add it to the rejected list
295 # The associated production
296 var prod
: nullable Production
298 redef fun accept_collect_prod
(v
) do
299 var id
= children
.first
.as(Nid)
303 prod
= new Production(name
)
304 v
.gram
.prods
.add
(prod
.as(not null))
306 redef fun accept_check_name_visitor
(v
) do
314 redef fun accept_check_name_visitor
(v
) do
315 var id
= children
[2].as(Nid)
318 var node
= v
.v1
.names
[name
]
319 if node
isa Nprod then
320 v
.prod
.spe
= node
.prod
.as(not null)
321 if v
.prod
.spe
.spe
!= null then
322 print
"Cannot transform into {name}, {name} is already transformed."
326 else if node
isa Nexpr then
327 print
"Cannot transform into {name}, {name} is a token."
335 redef fun accept_check_name_visitor
(v
) do
336 var id
= children
[2].as(Nid)
344 # The associated alternative
345 var alt
: nullable Alternative
347 redef fun accept_check_name_visitor
(v
)
351 v
.elems
= new Array[Element]
352 v
.elems_names
= new Array[nullable String]
354 var prod
= v
.prod
.as(not null)
355 var prodabs
= prod
.spe
356 if prodabs
== null then prodabs
= prod
360 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
361 else if prod
.spe
== null and prod
.alts
.is_empty
then
365 if prodabs
.altone
then
366 prodabs
.altone
= false
367 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
369 name
= prod
.name
+ "_" + prod
.alts
.length
.to_s
372 if prodabs
.altone
then
373 prodabs
.altone
= false
374 prodabs
.alts
.first
.name
= prodabs
.name
+ "_0"
376 name
= prodabs
.name
+ "_" + name
378 var alt
= prod
.new_alt2
(name
, v
.elems
)
379 alt
.elems_names
.add_all
(v
.elems_names
)
383 alt
.codes
= [new CodePop]
389 redef fun accept_check_name_visitor
(v
)
391 var id
= children
[1].as(Nid)
398 # The associated element
399 var elem
: nullable Element
401 # Set the element and check things
402 fun set_elem
(v
: CheckNameVisitor, pos
: nullable Position, elem
: Element)
404 assert self.elem
== null
407 if elem
isa Token and v
.rejecteds
.has
(elem
) then
409 print
"{pos} Error: {elem.name} is already a rejected token."
411 print
"Error: {elem.name} is already a rejected token."
421 # The associated expression (if any, ie defined in the lexer part)
422 var nexpr
: nullable Nexpr
423 # The associated text (if any, ie defined in the parser part)
424 var text
: nullable String
428 redef fun accept_check_name_visitor
(v
)
432 var elemname
= v
.elemname
433 if elemname
!= null and v
.elems_names
.has
(elemname
) then
435 while v
.elems_names
.has
(elemname
+i
.to_s
) do i
+= 1
436 elemname
= elemname
+i
.to_s
438 v
.elems_names
.add elemname
443 redef fun accept_check_name_visitor
(v
)
445 var id
= children
[1].as(Nid)
452 redef fun accept_check_name_visitor
(v
) do
453 var id
= children
.first
.as(Nid)
455 if not v
.v1
.names
.has_key
(name
) then
456 print
"{id.position} Error: unknown name {name}"
460 var node
= v
.v1
.names
[name
]
461 var elem
: nullable Element
462 if node
isa Nprod then
464 else if node
isa Nexpr then
467 elem
= new Token(name
)
469 v
.v1
.gram
.tokens
.add
(elem
)
476 set_elem
(v
, id
.position
, elem
)
477 if v
.elemname
== null then v
.elemname
= name
481 redef class Nelem_str
482 redef fun accept_check_name_visitor
(v
) do
483 var str
= children
.first
.as(Nstr)
486 var elem
: nullable Element
487 if v
.v1
.names
.has_key
(name
) then
488 elem
= v
.v1
.names
[name
].as(Nelem_str).elem
491 elem
= new Token(name
)
493 v
.v1
.gram
.tokens
.add
(elem
)
494 v
.v1
.names
[name
] = self
496 set_elem
(v
, str
.position
, elem
)
500 redef class Nelem_star
501 redef fun accept_check_name_visitor
(v
) do
503 var elem
= v
.elems
.pop
504 elem
= v
.plusize
(elem
)
505 elem
= v
.quesize
(elem
)
506 set_elem
(v
, null, elem
)
510 redef class Nelem_ques
511 redef fun accept_check_name_visitor
(v
) do
513 var elem
= v
.elems
.pop
514 elem
= v
.quesize
(elem
)
515 set_elem
(v
, null, elem
)
519 redef class Nelem_plus
520 redef fun accept_check_name_visitor
(v
) do
522 var elem
= v
.elems
.pop
523 elem
= v
.plusize
(elem
)
524 set_elem
(v
, null, elem
)