nitcc: accept but ignore the Tree part
[nit.git] / contrib / nitcc / src / nitcc_semantic.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Semantic analysis of a sablecc grammar file
16 # User to validate the file and generate the grammar and the nfa
17 #
18 # TODO: split in submodules
19 module nitcc_semantic
20
21 import nitcc_parser
22 import grammar
23 import re2nfa
24
25 # Main visitor for the semantic analysis
26 #
27 # TODO clean-up the visitors
28 class CollectNameVisitor
29 super Visitor
30
31 var nprods = new Array[Nprod]
32
33 # Symbol table to associate things (prods and exprs) with their name
34 var names = new HashMap[String, Node]
35
36 # The associated grammar object
37 # Read the result of the visit here
38 var gram = new Gram
39
40 # The associated NFA object
41 # Read the result of the visit here
42 var nfa = new Automaton.empty
43
44 # Run the semantic analysis of the grammar
45 fun start(n: Node)
46 do
47 # First visit to collect names
48 enter_visit(n)
49
50 # Second visit to use collectec names and build rhings
51 var v2 = new CheckNameVisitor(self)
52 v2.enter_visit(n)
53
54 # Inline all the ?
55 gram.inline(v2.quesizes.values)
56 # Inlile all the prods sufixed by '_imline' #TODO use a real keyword
57 for p in gram.prods do
58 if not p.name.has_suffix("_inline") then continue
59 print "inline {p}"
60 gram.inline([p])
61 end
62
63 # Build the NFA automaton
64 for t in gram.tokens do
65 var nfa2 = t.build_nfa
66 nfa2.tag_accept(t)
67 nfa.absorb(nfa2)
68 end
69
70 if not v2.ignoreds.is_empty then
71 var ign = new Token("Ignored")
72 var nfa2 = new Automaton.empty
73 for t in v2.ignoreds do
74 assert t isa Token
75 var nfa3 = t.build_nfa
76 nfa2.alternate(nfa3)
77 end
78 nfa2.tag_accept(ign)
79 nfa.absorb(nfa2)
80 end
81
82 end
83
84 redef fun visit(n) do n.accept_collect_prod(self)
85 end
86
87 private class CheckNameVisitor
88 super Visitor
89
90 # The collected altname, for the alternative
91 var altname: nullable String = null
92
93 # The collected elements, for the alternative
94 var elems = new Array[Element]
95
96 # The collected element names, for the alternative
97 var elems_names = new Array[nullable String]
98
99 # The collected elementname, for the nelem
100 var elemname: nullable String = null
101
102 # Is the alternative transformed, for the alternative
103 var trans = false
104
105 # Known ignored tokens
106 var ignoreds = new Array[Element]
107
108 # Known rejected tokens
109 var rejecteds = new Array[Element]
110
111 # Pool of elements that are modified with + (reuse them!)
112 private var plusizes = new HashMap[Element, Production]
113
114 # Create a + version of an element
115 fun plusize(e: Element): Production
116 do
117 if plusizes.has_key(e) then return plusizes[e]
118 var name = "{e}+"
119 var prod = new Production(name)
120 prod.acname = "Nodes[{e.acname}]"
121 v1.gram.prods.add(prod)
122 prod.new_alt("{name}_one", e)
123 prod.new_alt("{name}_more", prod, e)
124 plusizes[e] = prod
125 return prod
126 end
127
128 # Pool of elements that are modified with ? (reuse them!)
129 private var quesizes = new HashMap[Element, Production]
130
131 # Create a ? version of an element
132 fun quesize(e: Element): Production
133 do
134 if quesizes.has_key(e) then return quesizes[e]
135 var name = "{e}?"
136 var prod = new Production(name)
137 prod.acname = "nullable {e.acname}"
138 v1.gram.prods.add(prod)
139 var a1 = prod.new_alt("{name}_one", e)
140 a1.codes = [new CodePop]
141 var a0 = prod.new_alt0("{name}_none")
142 a0.codes = [new CodeNull]
143 quesizes[e] = prod
144 return prod
145 end
146
147 # The current nexpr, used to track dependency on named expressions (see `Nexpr::precs`)
148 var nexpr: nullable Nexpr
149
150 # The current production, used to initialize alternatives
151 var prod: nullable Production
152
153 # The main visitor, used to access the grammar of the symbol table
154 var v1: CollectNameVisitor
155 init(v1: CollectNameVisitor) do self.v1 = v1
156
157 redef fun visit(n) do n.accept_check_name_visitor(self)
158 end
159
160 redef class Node
161 private fun accept_collect_prod(v: CollectNameVisitor) do visit_children(v)
162 private fun accept_check_name_visitor(v: CheckNameVisitor) do visit_children(v)
163 end
164
165 redef class Nexpr
166 # The associated token
167 var token: nullable Token
168
169 # The associated name
170 var name: nullable String
171
172 # The required expression (Nexpr that are used inside)
173 var precs = new ArraySet[Nexpr]
174
175 redef fun accept_collect_prod(v) do
176 var id = children.first.as(Nid)
177 var name = id.text
178 if v.names.has_key(name) then
179 print "{id.position.to_s} Error {name} already defined."
180 exit(1)
181 end
182 v.names[name] = self
183 self.name = name
184 end
185 redef fun accept_check_name_visitor(v) do
186 v.nexpr = self
187 super
188 v.nexpr = null
189 end
190
191 # Flag to track circular regular expression
192 var is_building = false
193
194 # The associated NFA (cached, see `build_nfa`)
195 private var nfa: nullable Automaton
196
197 # Build the NFA, possibily building the NFA of required expressions
198 # Print an error if there is a circular dependency
199 # The result is cached
200 fun build_nfa: Automaton do
201 var res = nfa
202 if res != null then return res
203 if is_building then
204 print "Error: circular regular expression {name.to_s}."
205 exit(1)
206 abort
207 end
208 is_building = true
209 for p in precs do
210 p.build_nfa
211 end
212 is_building = false
213 var nre = self.children[2]
214 res = nre.make_rfa
215 nfa = res
216 return res
217 end
218 end
219
220 redef class Nre_id
221 # The named expression
222 var nexpr: nullable Nexpr
223
224 redef fun accept_check_name_visitor(v) do
225 var id = children.first.as(Nid)
226 var name = id.text
227 if not v.v1.names.has_key(name) then
228 print "{id.position.to_s} Error: unknown name {name}"
229 exit(1)
230 abort
231 end
232 var node = v.v1.names[name]
233 if node isa Nprod then
234 print "{id.position.to_s} Error: cannot use production {name} in a regular expression"
235 exit(1)
236 abort
237 else if not node isa Nexpr then
238 abort
239 end
240 nexpr = node
241 v.nexpr.precs.add(node)
242 end
243
244 redef fun make_rfa
245 do
246 return nexpr.nfa.dup
247 end
248 end
249
250 redef class Nign
251 redef fun accept_check_name_visitor(v) do
252 # Add elements to the ignored list
253 v.elems = v.ignoreds
254 super
255 for e in v.elems do
256 if e isa Production then
257 print "Error: cannot ignore {e}, it is a production"
258 exit(1)
259 abort
260 else if e isa Token then
261 # The token was build and registered during the visit
262 # So, unregister then, the bit Ignred token will be build later
263 v.v1.gram.tokens.remove(e)
264 else
265 abort
266 end
267 end
268 end
269 end
270
271 redef class Nrej
272 redef fun accept_check_name_visitor(v) do
273 # Add elements to the rejected list
274 v.elems = v.rejecteds
275 super
276 for e in v.elems do
277 if e isa Production then
278 print "Error: cannot reject {e}, it is a production"
279 exit(1)
280 abort
281 else if e isa Token then
282 # The token was build and registered during the visit
283 else
284 abort
285 end
286 end
287 end
288 end
289
290 redef class Nprod
291 # The associated production
292 var prod: nullable Production
293
294 redef fun accept_collect_prod(v) do
295 var id = children.first.as(Nid)
296 var name = id.text
297 if v.names.has_key(name) then
298 print "{id.position.to_s} Error {name} already defined."
299 exit(1)
300 end
301 v.names[name] = self
302 v.nprods.add(self)
303 prod = new Production(name)
304 v.gram.prods.add(prod.as(not null))
305 end
306 redef fun accept_check_name_visitor(v) do
307 v.prod = prod
308 super
309 v.prod = null
310 end
311 end
312
313 redef class Nptrans
314 redef fun accept_check_name_visitor(v) do
315 var id = children[2].as(Nid)
316 var name = id.text
317
318 var node = v.v1.names[name]
319 if node isa Nprod then
320 v.prod.spe = node.prod.as(not null)
321 if v.prod.spe.spe != null then
322 print "Cannot transform into {name}, {name} is already transformed."
323 exit(1)
324 abort
325 end
326 else if node isa Nexpr then
327 print "Cannot transform into {name}, {name} is a token."
328 else
329 abort
330 end
331 end
332 end
333
334 redef class Natrans
335 redef fun accept_check_name_visitor(v) do
336 v.trans = true
337 end
338 end
339
340 redef class Alternative
341 var short_name: nullable String
342 end
343
344 redef class Nalt
345 # The associated alternative
346 var alt: nullable Alternative
347
348 redef fun accept_check_name_visitor(v)
349 do
350 v.altname = null
351 v.trans = false
352 v.elems = new Array[Element]
353 v.elems_names = new Array[nullable String]
354 super
355 var prod = v.prod.as(not null)
356 var prodabs = prod.spe
357 if prodabs == null then prodabs = prod
358 var name = v.altname
359 if name == null then
360 if v.trans then
361 name = prod.name + "_" + prod.alts.length.to_s
362 else if prod.spe == null and prod.alts.is_empty then
363 name = prod.name
364 prod.altone = true
365 else
366 if prodabs.altone then
367 prodabs.altone = false
368 prodabs.alts.first.name = prodabs.name + "_0"
369 end
370 name = prod.name + "_" + prod.alts.length.to_s
371 end
372 else
373 if prodabs.altone then
374 prodabs.altone = false
375 prodabs.alts.first.name = prodabs.name + "_0"
376 end
377 name = prodabs.name + "_" + name
378 end
379 var alt = prod.new_alt2(name, v.elems)
380 alt.short_name = v.altname
381 alt.elems_names.add_all(v.elems_names)
382 self.alt = alt
383 if v.trans then
384 alt.trans = true
385 alt.codes = [new CodePop]
386 end
387 end
388 end
389
390 redef class Naltid
391 redef fun accept_check_name_visitor(v)
392 do
393 var id = children[1].as(Nid)
394 var name = id.text
395 v.altname = name
396 var prod = v.prod.as(not null)
397 var prodabs = prod.spe
398 if prodabs == null then prodabs = prod
399 for x in prodabs.alts do
400 if x.short_name == name then
401 print "{id.position.to_s} Error: already an alternative named {name}"
402 exit(1)
403 end
404 end
405 end
406 end
407
408 redef class Nelem
409 # The associated element
410 var elem: nullable Element
411
412 # Set the element and check things
413 private fun set_elem(v: CheckNameVisitor, pos: nullable Position, elem: Element)
414 do
415 assert self.elem == null
416 self.elem = elem
417 if elem isa Token then
418 if v.ignoreds.has(elem) then
419 if pos != null then
420 print "{pos} Error: {elem.name} is already an ignored token."
421 else
422 print "Error: {elem.name} is already an ignored token."
423 end
424 exit(1)
425 end
426 if v.rejecteds.has(elem) then
427 if pos != null then
428 print "{pos} Error: {elem.name} is already a rejected token."
429 else
430 print "Error: {elem.name} is already a rejected token."
431 end
432 exit(1)
433 end
434 end
435 v.elems.push(elem)
436 end
437 end
438
439 redef class Token
440 # The associated expression (if any, ie defined in the lexer part)
441 var nexpr: nullable Nexpr
442 # The associated text (if any, ie defined in the parser part)
443 var text: nullable String
444
445 # Build a NFA according to nexpr or text
446 # Does not tag it!
447 fun build_nfa: Automaton
448 do
449 var nexpr = self.nexpr
450 if nexpr != null then
451 assert self.text == null
452 return nexpr.build_nfa
453 end
454 var text = self.text
455 if text != null then
456 var nfa = new Automaton.epsilon
457 for c in text.chars do
458 nfa.concat(new Automaton.atom(c.ascii))
459 end
460 return nfa
461 end
462 abort
463 end
464 end
465
466 redef class Nnelem
467 redef fun accept_check_name_visitor(v)
468 do
469 v.elemname = null
470 super
471 var elemname = v.elemname
472 if elemname != null and v.elems_names.has(elemname) then
473 var i = 2
474 while v.elems_names.has(elemname+i.to_s) do i += 1
475 elemname = elemname+i.to_s
476 end
477 v.elems_names.add elemname
478 end
479 end
480
481 redef class Nelemid
482 redef fun accept_check_name_visitor(v)
483 do
484 var id = children[1].as(Nid)
485 var name = id.text
486 for n2 in v.elems_names do
487 if name == n2 then
488 print "{id.position.to_s} Error: already an element named {name}."
489 exit(1)
490 end
491 end
492 v.elemname = name
493 end
494 end
495
496 redef class Nelem_id
497 redef fun accept_check_name_visitor(v) do
498 var id = children.first.as(Nid)
499 var name = id.text
500 if not v.v1.names.has_key(name) then
501 print "{id.position.to_s} Error: unknown name {name}"
502 exit(1)
503 abort
504 end
505 var node = v.v1.names[name]
506 var elem: nullable Element
507 if node isa Nprod then
508 elem = node.prod
509 else if node isa Nexpr then
510 elem = node.token
511 if elem == null then
512 elem = new Token(name)
513 elem.nexpr = node
514 v.v1.gram.tokens.add(elem)
515 node.token = elem
516 end
517 else
518 abort
519 end
520 assert elem != null
521 set_elem(v, id.position, elem)
522 if v.elemname == null then v.elemname = name
523 end
524 end
525
526 redef class Nelem_str
527 redef fun accept_check_name_visitor(v) do
528 var str = children.first.children.first.as(NToken)
529 var text = str.value
530 var name = str.text
531 var elem: nullable Element
532 if v.v1.names.has_key(name) then
533 elem = v.v1.names[name].as(Nelem_str).elem
534 assert elem != null
535 else
536 elem = new Token(name)
537 elem.text = text
538 v.v1.gram.tokens.add(elem)
539 v.v1.names[name] = self
540 end
541 set_elem(v, str.position, elem)
542 end
543 end
544
545 redef class Nelem_star
546 redef fun accept_check_name_visitor(v) do
547 super
548 var elem = v.elems.pop
549 elem = v.plusize(elem)
550 elem = v.quesize(elem)
551 set_elem(v, null, elem)
552 end
553 end
554
555 redef class Nelem_ques
556 redef fun accept_check_name_visitor(v) do
557 super
558 var elem = v.elems.pop
559 elem = v.quesize(elem)
560 set_elem(v, null, elem)
561 end
562 end
563
564 redef class Nelem_plus
565 redef fun accept_check_name_visitor(v) do
566 super
567 var elem = v.elems.pop
568 elem = v.plusize(elem)
569 set_elem(v, null, elem)
570 end
571 end
572
573 redef class Ntree_part
574 redef fun accept_collect_prod(v) do
575 print "NOT YET IMPLEMENTED: `Tree` part; it is ignored"
576 # SKIP by doing nothing
577 end
578
579 redef fun accept_check_name_visitor(v) do
580 # SKIP by doing nothing
581 end
582 end