nitcc: introduce nitcc
[nit.git] / contrib / nitcc / nitcc_semantic.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
17 #
18 # TODO: split in submodules
19 module nitcc_semantic
20
21 import nitcc_parser
22 import grammar
23 import re2nfa
24
25 # Main visitor for the semantic analysis
26 #
27 # TODO clean-up the visitors
28 class CollectNameVisitor
29 super Visitor
30
31 var nprods = new Array[Nprod]
32
33 # Symbol table to associate things (prods and exprs) with their name
34 var names = new HashMap[String, Node]
35
36 # The associated grammar object
37 # Read the result of the visit here
38 var gram = new Gram
39
40 # The associated NFA object
41 # Read the result of the visit here
42 var nfa = new Automaton.empty
43
44 # Run the semantic analysis of the grammar
45 fun start(n: Node)
46 do
47 # First visit to collect names
48 enter_visit(n)
49
50 # Second visit to use collectec names and build rhings
51 var v2 = new CheckNameVisitor(self)
52 v2.enter_visit(n)
53
54 # Inline all the ?
55 gram.inline(v2.quesizes.values)
56 # Inlile all the prods sufixed by '_imline' #TODO use a real keyword
57 for p in gram.prods do
58 if not p.name.has_suffix("_inline") then continue
59 print "inline {p}"
60 gram.inline([p])
61 end
62
63 # Build the NFA automaton
64 for t in gram.tokens do
65 var nexpr = t.nexpr
66 var nfa2
67 if nexpr == null then
68 nfa2 = new Automaton.epsilon
69 for c in t.text.as(not null) do
70 nfa2.concat(new Automaton.atom(c.ascii))
71 end
72 else
73 nfa2 = nexpr.build_nfa
74 end
75 nfa.states.add_all nfa2.states
76 nfa.start.add_trans(nfa2.start, null)
77 for s in nfa2.accept do nfa.add_tag(s, t)
78 nfa.accept.add_all nfa2.accept
79 end
80 end
81
82 redef fun visit(n) do n.accept_collect_prod(self)
83 end
84
85 private class CheckNameVisitor
86 super Visitor
87
88 # The collected altname, for the alternative
89 var altname: nullable String = null
90
91 # The collected elements, for the alternative
92 var elems = new Array[Element]
93
94 # The collected element names, for the alternative
95 var elems_names = new Array[nullable String]
96
97 # The collected elementname, for the nelem
98 var elemname: nullable String = null
99
100 # Is the alternative transformed, for the alternative
101 var trans = false
102
103 # Pool of elements that are modified with + (reuse them!)
104 private var plusizes = new HashMap[Element, Production]
105
106 # Create a + version of an element
107 fun plusize(e: Element): Production
108 do
109 if plusizes.has_key(e) then return plusizes[e]
110 var name = "{e}+"
111 var prod = new Production(name)
112 prod.acname = "Nodes[{e.acname}]"
113 v1.gram.prods.add(prod)
114 prod.new_alt("{name}_one", e)
115 prod.new_alt("{name}_more", prod, e)
116 plusizes[e] = prod
117 return prod
118 end
119
120 # Pool of elements that are modified with ? (reuse them!)
121 private var quesizes = new HashMap[Element, Production]
122
123 # Create a ? version of an element
124 fun quesize(e: Element): Production
125 do
126 if quesizes.has_key(e) then return quesizes[e]
127 var name = "{e}?"
128 var prod = new Production(name)
129 prod.acname = "nullable {e.acname}"
130 v1.gram.prods.add(prod)
131 var a1 = prod.new_alt("{name}_one", e)
132 a1.codes = [new CodePop]
133 var a0 = prod.new_alt0("{name}_none")
134 a0.codes = [new CodeNull]
135 quesizes[e] = prod
136 return prod
137 end
138
139 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
140 var nexpr: nullable Nexpr
141
142 # The current production, used to initialize alternatives
143 var prod: nullable Production
144
145 # The main visitor, used to access the grammar of the symbol table
146 var v1: CollectNameVisitor
147 init(v1: CollectNameVisitor) do self.v1 = v1
148
149 redef fun visit(n) do n.accept_check_name_visitor(self)
150 end
151
152 redef class Node
153 private fun accept_collect_prod(v: CollectNameVisitor) do visit_children(v)
154 private fun accept_check_name_visitor(v: CheckNameVisitor) do visit_children(v)
155 end
156
157 redef class Nexpr
158 # The associated token
159 var token: nullable Token
160
161 # The associated name
162 var name: nullable String
163
164 # The required expression (Nexpr that are used inside)
165 var precs = new ArraySet[Nexpr]
166
167 redef fun accept_collect_prod(v) do
168 var id = children.first.as(Nid)
169 var name = id.text
170 v.names[name] = self
171 self.name = name
172 end
173 redef fun accept_check_name_visitor(v) do
174 v.nexpr = self
175 super
176 v.nexpr = null
177 end
178
179 # Flag to track circular regular expression
180 var is_building = false
181
182 # The associated NFA (cached, see `build_nfa`)
183 private var nfa: nullable Automaton
184
185 # Build the NFA, possibily building the NFA of required expressions
186 # Print an error if there is a circular dependency
187 # The result is cached
188 fun build_nfa: Automaton do
189 var res = nfa
190 if res != null then return res
191 if is_building then
192 print "Error: circular regular expression {name.to_s}."
193 exit(1)
194 abort
195 end
196 is_building = true
197 for p in precs do
198 p.build_nfa
199 end
200 is_building = false
201 var nre = self.children[2]
202 res = nre.make_rfa
203 nfa = res
204 return res
205 end
206 end
207
208 redef class Nre_id
209 # The named expression
210 var nexpr: nullable Nexpr
211
212 redef fun accept_check_name_visitor(v) do
213 var id = children.first.as(Nid)
214 var name = id.text
215 if not v.v1.names.has_key(name) then
216 print "Error: unknown name {name}"
217 exit(1)
218 abort
219 end
220 var node = v.v1.names[name]
221 if node isa Nprod then
222 print "Error: cannot use production {name} in a regular expression"
223 exit(1)
224 abort
225 else if not node isa Nexpr then
226 abort
227 end
228 nexpr = node
229 v.nexpr.precs.add(node)
230 end
231
232 redef fun make_rfa
233 do
234 return nexpr.nfa.dup
235 end
236 end
237
238 redef class Nign
239 # The named element
240 var elem: nullable Element
241
242 redef fun accept_check_name_visitor(v) do
243 var id = children[1].as(Nid)
244 var name = id.text
245 if not v.v1.names.has_key(name) then
246 print "Error: unknown name {name}"
247 exit(1)
248 abort
249 end
250 var node = v.v1.names[name]
251 var elem: nullable Element
252 if node isa Nprod then
253 print "Error cannot ignore a production"
254 exit(1)
255 abort
256 else if node isa Nexpr then
257 elem = node.token
258 if elem == null then
259 elem = new Token("Ignored")
260 elem.nexpr = node
261 v.v1.gram.tokens.add(elem)
262 node.token = elem
263 end
264 else
265 abort
266 end
267 self.elem = elem
268 end
269 end
270
271 redef class Nprod
272 # The associated production
273 var prod: nullable Production
274
275 redef fun accept_collect_prod(v) do
276 var id = children.first.as(Nid)
277 var name = id.text
278 v.names[name] = self
279 v.nprods.add(self)
280 prod = new Production(name)
281 v.gram.prods.add(prod.as(not null))
282 end
283 redef fun accept_check_name_visitor(v) do
284 v.prod = prod
285 super
286 v.prod = null
287 end
288 end
289
290 redef class Nptrans
291 redef fun accept_check_name_visitor(v) do
292 var id = children[2].as(Nid)
293 var name = id.text
294
295 var node = v.v1.names[name]
296 if node isa Nprod then
297 v.prod.spe = node.prod.as(not null)
298 if v.prod.spe.spe != null then
299 print "Cannot transform into {name}, {name} is already transformed."
300 exit(1)
301 abort
302 end
303 else if node isa Nexpr then
304 print "Cannot transform into {name}, {name} is a token."
305 else
306 abort
307 end
308 end
309 end
310
311 redef class Natrans
312 redef fun accept_check_name_visitor(v) do
313 var id = children[2].as(Nid)
314 var name = id.text
315
316 v.trans = true
317 end
318 end
319
320 redef class Nalt
321 # The associated alternative
322 var alt: nullable Alternative
323
324 redef fun accept_check_name_visitor(v)
325 do
326 v.altname = null
327 v.trans = false
328 v.elems = new Array[Element]
329 v.elems_names = new Array[nullable String]
330 super
331 var prod = v.prod.as(not null)
332 var prodabs = prod.spe
333 if prodabs == null then prodabs = prod
334 var name = v.altname
335 if name == null then
336 if v.trans then
337 name = prod.name + "_" + prod.alts.length.to_s
338 else if prod.spe == null and prod.alts.is_empty then
339 name = prod.name
340 prod.altone = true
341 else
342 if prodabs.altone then
343 prodabs.altone = false
344 prodabs.alts.first.name = prodabs.name + "_0"
345 end
346 name = prod.name + "_" + prod.alts.length.to_s
347 end
348 else
349 if prodabs.altone then
350 prodabs.altone = false
351 prodabs.alts.first.name = prodabs.name + "_0"
352 end
353 name = prodabs.name + "_" + name
354 end
355 var alt = prod.new_alt2(name, v.elems)
356 alt.elems_names.add_all(v.elems_names)
357 self.alt = alt
358 if v.trans then
359 alt.trans = true
360 alt.codes = [new CodePop]
361 end
362 end
363 end
364
365 redef class Naltid
366 redef fun accept_check_name_visitor(v)
367 do
368 var id = children[1].as(Nid)
369 var name = id.text
370 v.altname = name
371 end
372 end
373
374 redef class Nelem
375 # The associated element
376 var elem: nullable Element
377 end
378
379 redef class Token
380 # The associated expression (if any, ie defined in the lexer part)
381 var nexpr: nullable Nexpr
382 # The associated text (if any, ie defined in the parser part)
383 var text: nullable String
384 end
385
386 redef class Nnelem
387 redef fun accept_check_name_visitor(v)
388 do
389 v.elemname = null
390 super
391 var elemname = v.elemname
392 if elemname != null and v.elems_names.has(elemname) then
393 var i = 2
394 while v.elems_names.has(elemname+i.to_s) do i += 1
395 elemname = elemname+i.to_s
396 end
397 v.elems_names.add elemname
398 end
399 end
400
401 redef class Nelemid
402 redef fun accept_check_name_visitor(v)
403 do
404 var id = children[1].as(Nid)
405 var name = id.text
406 v.elemname = name
407 end
408 end
409
410 redef class Nelem_id
411 redef fun accept_check_name_visitor(v) do
412 var id = children.first.as(Nid)
413 var name = id.text
414 if not v.v1.names.has_key(name) then
415 print "Error: unknown name {name}"
416 exit(1)
417 abort
418 end
419 var node = v.v1.names[name]
420 var elem: nullable Element
421 if node isa Nprod then
422 elem = node.prod
423 else if node isa Nexpr then
424 elem = node.token
425 if elem == null then
426 elem = new Token(name)
427 elem.nexpr = node
428 v.v1.gram.tokens.add(elem)
429 node.token = elem
430 end
431 else
432 abort
433 end
434 assert elem != null
435 self.elem = elem
436 v.elems.add(elem)
437 if v.elemname == null then v.elemname = name
438 end
439 end
440
441 redef class Nelem_str
442 redef fun accept_check_name_visitor(v) do
443 var str = children.first.as(Nstr)
444 var text = str.value
445 var name = str.text
446 var elem: nullable Element
447 if v.v1.names.has_key(name) then
448 elem = v.v1.names[name].as(Nelem_str).elem
449 assert elem != null
450 else
451 elem = new Token(name)
452 elem.text = text
453 v.v1.gram.tokens.add(elem)
454 v.v1.names[name] = self
455 end
456 self.elem = elem
457 v.elems.add(elem)
458 end
459 end
460
461 redef class Nelem_star
462 redef fun accept_check_name_visitor(v) do
463 super
464 var elem = v.elems.pop
465 elem = v.plusize(elem)
466 elem = v.quesize(elem)
467 v.elems.add elem
468 end
469 end
470
471 redef class Nelem_ques
472 redef fun accept_check_name_visitor(v) do
473 super
474 var elem = v.elems.pop
475 elem = v.quesize(elem)
476 v.elems.add elem
477 end
478 end
479
480 redef class Nelem_plus
481 redef fun accept_check_name_visitor(v) do
482 super
483 var elem = v.elems.pop
484 elem = v.plusize(elem)
485 v.elems.add elem
486 end
487 end