nitgs: opt_direct_call_monomorph call `before_send` to deal with special cases
[nit.git] / contrib / nitcc / src / grammar.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # A gramar describing a language
16 class Gram
17 # The productions (non-terminal) of the conctete grammar
18 var prods = new Array[Production]
19
20 # The additionnal abstract productions of the grammar
21 # TODO clean AST
22 var ast_prods = new Array[Production]
23
24 # The tokens (terminals) of the grammar
25 var tokens = new Array[Token]
26
27 # Dump of the concrete grammar and the transformations
28 fun pretty: String
29 do
30 var res = new Buffer
31 for p in prods do
32 if p.spe != null then
33 res.append("{p.name} \{-> {p.spe.name}\}=\n")
34 else
35 res.append("{p.name} =\n")
36 end
37 var last = p.alts.last
38 for a in p.alts do
39 res.append("\t\{{a.name}:\} {a.elems.join(" ")}")
40 if a.codes == null then a.make_codes
41 res.append("\n\t\t\{-> {a.codes.join(", ")}\}")
42 if a == last then res.append(" ;\n") else res.append(" |\n")
43 end
44 if p.is_nullable then res.append "\t// is nullable\n"
45 if not p.firsts.is_empty then
46 res.append "\t// firsts:\n"
47 for x in p.firsts do res.append "\t// {x}\n"
48 end
49 if not p.afters.is_empty then
50 res.append "\t// afters:\n"
51 for x in p.afters do res.append "\t// {x}\n"
52 end
53 end
54 return res.to_s
55 end
56
57 # Inline (ie. remove from the conctete grammar) some production
58 # REQUIRE: no circular production in `prods`
59 fun inline(prods: Collection[Production])
60 do
61 for p in self.prods do
62 for a in p.alts.to_a do
63 var to_inline = false
64 for e in a.elems do
65 if e isa Production and prods.has(e) then to_inline = true
66 end
67 if not to_inline then continue
68
69 if a.codes == null then a.make_codes
70
71 var a0 = new Alternative(p, a.name, new Array[Element])
72 a0.trans = true
73 a0.codes = new Array[Code]
74 var pool = [a0]
75 var pool2 = new Array[Alternative]
76 for e in a.elems do
77 if not e isa Production or not prods.has(e) then
78 for x in pool do
79 x.elems.add(e)
80 x.codes.add(new CodePop)
81 end
82 continue
83 end
84 if p == e then
85 print "Circular inlining on {p} :: {a}"
86 abort
87 end
88 pool2.clear
89 for a2 in e.alts do
90 if a2.codes == null then a2.make_codes
91 for x in pool do
92 var name = a.name + "_" + pool2.length.to_s
93 var na = new Alternative(p, name, new Array[Element])
94 na.trans = true
95 pool2.add(na)
96 na.elems.add_all(x.elems)
97 na.elems.add_all(a2.elems)
98 na.codes = new Array[Code]
99 na.codes.add_all(x.codes.as(not null))
100 na.codes.add_all(a2.codes.as(not null))
101 end
102 end
103 var tmp = pool
104 pool = pool2
105 pool2 = tmp
106 end
107 for x in pool do
108 x.codes.add(a.codes.last)
109 end
110 p.ast_alts.add(a)
111 p.alts.remove(a)
112 p.alts.add_all(pool)
113 end
114 end
115 for p in prods do
116 self.prods.remove(p)
117 self.ast_prods.add(p)
118 end
119 end
120
121 # Compute a LR automaton
122 fun lr0: LRAutomaton
123 do
124 var start = new Production("_start")
125 start.accept = true
126 var eof = new Token("Eof")
127 tokens.add(eof)
128 start.new_alt("Start", self.prods.first, eof)
129 prods.add(start)
130
131 analyse
132
133 var first = new LRState("Start")
134 first.number = 0
135 for i in start.start_state do first.add(i)
136
137 var automaton = new LRAutomaton(self)
138
139 var todo = new List[LRState]
140 todo.add first
141 var seen = new HashSet[LRState]
142 seen.add first
143
144 while not todo.is_empty do
145 var state = todo.shift
146
147 #print state
148 automaton.states.add(state)
149 state.analysis
150
151 var nexts = new HashMap[Element, LRState]
152 for i in state.items do
153 var e = i.next
154 if e == null then continue
155 if nexts.has_key(e) then
156 nexts[e].add(i.avance)
157 else
158 var name
159 if state == automaton.states.first then
160 name = e.to_s
161 else
162 name = "{state.name} {e}"
163 end
164 var next = new LRState(name)
165 nexts[e] = next
166 next.add(i.avance)
167 end
168 end
169
170 for e, next in nexts do
171
172 #print "#states: {seen.length}"
173
174 # Look for an existing LR0 state in the automaton
175 var new_state = true
176 for n in seen do
177 if n == next then
178 next = n
179 new_state = false
180 break
181 end
182 end
183
184 # If not, add it to the pool and the todo-list
185 if new_state then
186 next.number = seen.length
187 assert not seen.has(next)
188 seen.add(next)
189 todo.add(next)
190 end
191
192 # Add the transition
193 var t = new LRTransition(state, next, e)
194 state.outs.add t
195 next.ins.add t
196 end
197 end
198 return automaton
199 end
200
201 # Compute `nullables`, `firsts` and `afters` of productions
202 fun analyse
203 do
204 loop
205 var changed = false
206 for p in prods do
207 if p.is_nullable then continue
208 for a in p.alts do
209 var nullabl = true
210 for e in a.elems do
211 if e isa Token then
212 nullabl = false
213 break
214 else if e isa Production then
215 if not e.is_nullable then nullabl = false
216 break
217 else
218 abort
219 end
220 end
221 if nullabl then
222 p.is_nullable = true
223 changed = true
224 end
225 end
226 end
227 if not changed then break
228 end
229
230 loop
231 var changed = false
232 for p in prods do
233 var fs = p.firsts
234 for a in p.alts do
235 var i = a.first_item
236 loop
237 var e = i.next
238 if e == null then break
239 if e isa Token then
240 if try_add(fs, i) then changed = true
241 break
242 else if e isa Production then
243 for t in e.firsts do
244 if try_add(fs, t) then changed = true
245 end
246 if not e.is_nullable then break
247 else
248 abort
249 end
250 i = i.avance
251 end
252 end
253 end
254 if not changed then break
255 end
256
257 loop
258 var changed = false
259 for p1 in prods do
260 for a in p1.alts do
261 var p0: nullable Production = null
262 var i = a.first_item
263 loop
264 var e = i.next
265 if e == null then break
266 var p = p0
267 if e isa Production then p0 = e else p0 = null
268 if p == null then
269 i = i.avance
270 continue
271 end
272
273 var afs = p.afters
274
275 if e isa Token then
276 if try_add(afs, i) then changed = true
277 else if e isa Production then
278 for t in e.firsts do
279 if try_add(afs, t) then changed = true
280 end
281 if e.is_nullable then
282 for t in e.afters do
283 if try_add(afs, t) then changed = true
284 end
285 end
286 else
287 abort
288 end
289 i = i.avance
290 end
291 if p0 != null then
292 var afs = p0.afters
293 for t in p1.afters do
294 if try_add(afs, t) then changed = true
295 end
296 end
297 end
298 end
299 if not changed then break
300 end
301 end
302
303 # used by `analyse`
304 private fun try_add(set: Set[Item], t: Item): Bool
305 do
306 var res = not set.has(t)
307 if res then set.add(t)
308 return res
309 end
310
311 end
312
313 # A production of a grammar
314 class Production
315 super Element
316 # The alternative of the production
317 var alts = new Array[Alternative]
318
319 # Additionnal alternatives in the AST
320 var ast_alts = new Array[Alternative]
321
322 # Is self the accept production
323 var accept = false
324
325 # Is self transformed to a other production for the AST
326 # FIXME: cleaup AST
327 var spe: nullable Production writable = null
328
329 # Is self contains only a single alternative (then no need for a abstract production class in the AST)
330 # FIXME cleanup AST
331 var altone writable = false
332
333 # The cname of the class in the AST
334 # FIXME: cleanup AST
335 redef fun acname do
336 if spe != null then return spe.acname
337 return super
338 end
339
340 # Is the production nullable
341 var is_nullable = false
342 # The first tokens of the production
343 var firsts = new HashSet[Item]
344 # The tokens that may follows the production (as in SLR)
345 var afters = new HashSet[Item]
346
347
348 # Crate a new empty alternative
349 fun new_alt0(name: String): Alternative
350 do
351 var a = new Alternative(self, name, new Array[Element])
352 alts.add a
353 return a
354 end
355
356 # Crate a new alternative with some elements
357 fun new_alt(name: String, elems: Element...): Alternative
358 do
359 var a = new Alternative(self, name, elems)
360 alts.add a
361 return a
362 end
363
364 # Crate a new alternative with some elements
365 fun new_alt2(name: String, elems: Array[Element]): Alternative
366 do
367 var a = new Alternative(self, name, elems)
368 alts.add a
369 return a
370 end
371
372 # Return a set of items for the production
373 fun start_state: Array[Item]
374 do
375 var res = new Array[Item]
376 for a in alts do
377 res.add a.first_item
378 end
379 return res
380 end
381
382 # States in the LR automaton that has a outgoing transition on self
383 var gotos = new ArraySet[LRState]
384 end
385
386 # An alternative of a production
387 class Alternative
388 # The production
389 var prod: Production
390
391 # The name of the alternative
392 var name: String writable
393
394 # The elements of the alternative
395 var elems: Array[Element]
396
397 # The first item of the alternative
398 var first_item = new Item(self, 0)
399
400 # The name of the elements
401 var elems_names = new Array[nullable String]
402
403 # Return a name for a given element (or a number if unamed)
404 fun elemname(i: Int): String
405 do
406 if i >= elems_names.length then return "{i}"
407 var n = elems_names[i]
408 if n == null then return "{i}"
409 return n
410 end
411
412 redef fun to_s do return "{prod.name}::{name}={elems.join(" ")}"
413
414 # A mangled name
415 fun cname: String do
416 return "N{name.to_cmangle}"
417 end
418
419 # The code for the reduction
420 var codes: nullable Array[Code] writable = null
421
422 # Is the alternative transformed (ie not in the AST)
423 var trans writable = false
424
425 # Imitialize codes with the elements
426 fun make_codes
427 do
428 if codes != null then return
429 var codes = new Array[Code]
430 self.codes = codes
431 for e in elems do
432 codes.add(new CodePop)
433 end
434 codes.add(new CodeNew(self))
435 end
436 end
437
438 # A step in the construction of the AST. used to modelize transformations
439 interface Code
440 end
441 # Get a element from the stack
442 class CodePop
443 super Code
444 redef fun to_s do return "pop"
445 end
446 # Allocate a new AST node for an alternative using the correct number of poped elements
447 class CodeNew
448 super Code
449 var alt: Alternative
450 redef fun to_s do return "New {alt.name}/{alt.elems.length}"
451 end
452 # Get null
453 class CodeNull
454 super Code
455 redef fun to_s do return "null"
456 end
457
458 # Elements of an alternative.
459 # Either a `Production` or a `Token`
460 abstract class Element
461 # The name of the element
462 var name: String
463 redef fun to_s do return name
464
465 private var acname_cache: nullable String = null
466
467 # The mangled name of the element
468 fun cname: String do return "N{name.to_cmangle}"
469
470 # the name of the class in the AST
471 fun acname: String do
472 var res = acname_cache
473 if res == null then
474 res = "N{to_s.to_cmangle}"
475 acname_cache = res
476 end
477 return res
478 end
479 fun acname=(s: String) do acname_cache = s
480 end
481
482 # A terminal element
483 class Token
484 super Element
485 # States of the LR automatio that shift on self
486 var shifts = new ArraySet[LRState]
487 # States of the LR automatio that reduce on self in the lookahead(1)
488 var reduces = new ArraySet[LRState]
489 end
490
491 #
492
493 # A LR automaton
494 class LRAutomaton
495 # The grammar of the automaton
496 var grammar: Gram
497
498 # The set of states
499 var states = new Array[LRState]
500
501 # Dump of the automaton
502 fun pretty: String
503 do
504 var res = new Array[String]
505 res.add "* LRAutomaton: {states.length} states\n"
506 for s in states do
507 res.add "s{s.number} {s.name}\n"
508 res.add "\tCORE\n"
509 for i in s.core do
510 res.add "\t\t{i}\n"
511 if i.next != null then continue
512 for i2 in s.lookahead(i) do
513 res.add "\t\t\t{i2}\n"
514 end
515 end
516 res.add "\tOTHER ITEMS\n"
517 for i in s.items do
518 if s.core.has(i) then continue
519 res.add "\t\t{i}\n"
520 if i.next != null then continue
521 for i2 in s.lookahead(i) do
522 res.add "\t\t\t{i2}\n"
523 end
524 end
525 res.add "\tTRANSITIONS {s.outs.length}\n"
526 for t in s.outs do
527 res.add "\t\t{t.elem} |-> s{t.to.number}\n"
528 end
529 res.add "\tACTIONS\n"
530 if s.is_lr0 then
531 res.add "\t\tSTATE LR0\n"
532 else
533 res.add "\t\tSTATE SLR\n"
534 for t, a in s.guarded_reduce do
535 if a.length > 1 then
536 res.add "\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
537 break
538 else if s.shifts.has(t) then
539 res.add "\t\t/!\\ SHIFT/REDUCE CONFLICT\n"
540 break
541 end
542 end
543 end
544 if not s.shifts.is_empty then
545 res.add "\t\tSHIFT {s.shifts.join(" ")}\n"
546 end
547 for r in s.reduces do
548 res.add "\t\tREDUCE {r}\n"
549 end
550 end
551 return res.to_s
552 end
553
554 # Generate a graphviz file of the automaton
555 fun to_dot(path: String)
556 do
557 var f = new OFStream.open(path)
558 f.write("digraph g \{\n")
559 f.write("rankdir=LR;\n")
560 f.write("node[shape=Mrecord,height=0];\n")
561
562 for s in states do
563 f.write "s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|"
564 for i in s.core do
565 f.write "{i.to_s.escape_to_dot}\\l"
566 end
567 f.write("|")
568 for i in s.items do
569 if s.core.has(i) then continue
570 f.write "{i.to_s.escape_to_dot}\\l"
571 end
572 f.write "\""
573 if not s.is_lr0 then
574 var conflict = false
575 for t, a in s.guarded_reduce do
576 if a.length > 1 then
577 f.write ",color=red"
578 conflict = true
579 break
580 else if s.shifts.has(t) then
581 f.write ",color=orange"
582 conflict = true
583 break
584 end
585 end
586 if not conflict then
587 f.write ",color=blue"
588 end
589 end
590 f.write "];\n"
591 for t in s.outs do
592 f.write "s{s.number} -> s{t.to.number} [label=\"{t.elem.to_s.escape_to_dot}\"];\n"
593 end
594 end
595 f.write("\}\n")
596 f.close
597 end
598
599 # Generate the parser of the automaton
600 fun gen_to_nit(filepath: String, name: String)
601 do
602 var gen = new Generator
603 gen.gen_to_nit(self, name)
604 var f = new OFStream.open(filepath)
605 for s in gen.out do
606 f.write(s)
607 f.write("\n")
608 end
609 f.close
610 end
611 end
612
613 redef class String
614 # escape string used in labels for graphviz
615 fun escape_to_dot: String
616 do
617 return escape_more_to_c("|\{\}")
618 end
619 end
620
621 private class Generator
622 var out = new Array[String]
623 fun add(s: String) do out.add(s)
624 fun gen_to_nit(autom: LRAutomaton, name: String)
625 do
626 var states = autom.states
627 var gram = autom.grammar
628
629 add "# Parser generated by nitcc for the grammar {name}"
630 add "import nitcc_runtime"
631
632 add "class Parser_{name}"
633 add "\tsuper Parser"
634 add "\tredef fun start_state do return state_{states.first.cname}"
635 add "end"
636
637 add "redef class Object"
638 for s in states do
639 add "\tprivate fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
640 end
641 for p in gram.prods do
642 add "\tprivate fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
643 end
644 add "end"
645
646 add "redef class NToken"
647 for s in states do
648 if not s.need_guard then continue
649 add "\t# guarded action for state {s.name}"
650 add "\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)"
651 add "\tprivate fun action_s{s.cname}(parser: Parser) do"
652 if s.reduces.length != 1 then
653 add "\t\tparser.parse_error"
654 else
655 gen_reduce_to_nit(s.reduces.first)
656 end
657 add "\tend"
658 end
659 add "end"
660
661 for t in gram.tokens do
662 if t.name == "Eof" then
663 add "redef class {t.cname}"
664 else
665 add "class {t.cname}"
666 end
667 add "\tsuper NToken"
668 for s in t.shifts do
669 if not s.need_guard then continue
670 add "\tredef fun action_s{s.cname}(parser) do"
671 gen_shift_to_nit(s, t)
672 add "\tend"
673 end
674 for s in t.reduces do
675 if not s.need_guard then continue
676 if s.reduces.length <= 1 then continue
677 add "\tredef fun action_s{s.cname}(parser) do"
678 gen_reduce_to_nit(s.guarded_reduce[t].first.alt)
679 add "\tend"
680 end
681 add "\tredef fun node_name do return \"{t.name.escape_to_nit}\""
682 add "end"
683 end
684
685 add "redef class LRGoto"
686 for s in states do
687 if s.gotos.length <= 1 then continue
688 add "\tprivate fun goto_s{s.cname}(parser: Parser) do abort"
689 end
690 add "end"
691
692 for p in gram.prods do
693 add "class Goto_{p.cname}"
694 add "\tsuper LRGoto"
695 for s in p.gotos do
696 if s.gotos.length <= 1 then continue
697 add "\tredef fun goto_s{s.cname}(parser) do"
698 gen_goto_to_nit(s, p)
699 add "\tend"
700 end
701 add "end"
702 end
703
704 var ps = gram.prods.to_a
705 ps.add_all(gram.ast_prods)
706 for p in ps do
707 if p.spe == null and not p.altone then
708 if p.name.has_suffix("?") or p.name.has_suffix("+") then continue
709 add "class {p.acname}"
710 add "\tsuper NProd"
711 add "\tredef fun node_name do return \"{p.name.escape_to_nit}\""
712 add "end"
713 end
714
715 var als = p.alts.to_a
716 als.add_all(p.ast_alts)
717 for a in als do
718 if a.trans then continue
719 add "class {a.cname}"
720 if p.altone then
721 add "\tsuper NProd"
722 else
723 add "\tsuper {p.acname}"
724 end
725 add "\tredef fun node_name do return \"{a.name.escape_to_nit}\""
726 var initarg = new Array[String]
727 for i in [0..a.elems.length[ do
728 add "\tvar n_{a.elemname(i)}: {a.elems[i].acname}"
729 initarg.add("n_{a.elemname(i)}: {a.elems[i].acname}")
730 end
731 if initarg.is_empty then
732 add "\tinit do end"
733 else
734 add "\tinit({initarg.join(", ")}) do"
735 for i in [0..a.elems.length[ do
736 add "\t\tself.n_{a.elemname(i)} = n_{a.elemname(i)}"
737 end
738 add "\tend"
739 end
740 add "\tredef fun number_of_children do return {a.elems.length}"
741 add "\tredef fun child(i) do"
742 for i in [0..a.elems.length[ do
743 add "\t\tif i == {i} then return n_{a.elemname(i)}"
744 end
745 add "\t\tabort"
746 add "\tend"
747 add "end"
748 end
749 end
750
751 for s in states do
752 add "# State {s.name}"
753 add "private class LRState{s.cname}"
754 add "\tsuper LRState"
755
756 add "\tredef fun to_s do return \"{s.name.escape_to_nit}\""
757
758 var err = new Array[String]
759 for t in s.outs do
760 var e = t.elem
761 if e isa Production then err.add e.name
762 end
763 if err.is_empty then for t in s.outs do
764 var e = t.elem
765 if e isa Token then err.add e.name
766 end
767
768 add "\tredef fun error_msg do return \"{err.join(", ").escape_to_nit}\""
769
770 add "\tredef fun action(parser) do"
771 if s.need_guard then
772 add "\t\tparser.peek_token.action_s{s.cname}(parser)"
773 else if s.reduces.length == 1 then
774 gen_reduce_to_nit(s.reduces.first)
775 else
776 abort
777 end
778 add "\tend"
779
780 if not s.gotos.is_empty then
781 add "\tredef fun goto(parser, goto) do"
782 if s.gotos.length > 1 then
783 add "\t\tgoto.goto_s{s.cname}(parser)"
784 else
785 gen_goto_to_nit(s, s.gotos.first)
786 end
787 add "\tend"
788 end
789
790 add "end"
791 end
792
793
794 end
795
796 fun gen_shift_to_nit(s: LRState, t: Token)
797 do
798 var dest = s.trans(t)
799 add "\t\tparser.shift(state_{dest.cname})"
800
801 end
802
803 fun gen_goto_to_nit(s: LRState, p: Production)
804 do
805 var dest = s.trans(p)
806 add "\t\tparser.push(state_{dest.cname})"
807 end
808
809 fun gen_reduce_to_nit(alt: Alternative)
810 do
811 add "\t\t# REDUCE {alt}"
812 var i = alt.elems.length - 1
813 for e in alt.elems.to_a.reversed do
814 add "\t\tvar n{i} = parser.pop.as({e.acname})"
815 i -= 1
816 end
817
818 if alt.name.has_suffix("+_more") then
819 add "\t\tvar prod = n0"
820 add "\t\tn0.children.add(n1)"
821 else if alt.name.has_suffix("+_one") then
822 add "\t\tvar prod = new {alt.prod.acname}"
823 add "\t\tprod.children.add(n0)"
824 else
825 alt.make_codes
826 var cpt = 0
827 i = 0
828 var st = new Array[String]
829 for c in alt.codes.as(not null) do
830 if c isa CodePop then
831 st.add "n{i}"
832 i += 1
833 else if c isa CodeNull then
834 st.add "null"
835 else if c isa CodeNew then
836 var calt = c.alt
837 cpt += 1
838 var from = st.length - calt.elems.length
839 var args = new List[String]
840 for j in [from..st.length[ do
841 args.unshift(st.pop)
842 end
843
844 if args.is_empty then
845 add "\t\tvar p{cpt} = new {calt.cname}"
846 else
847 add "\t\tvar p{cpt} = new {calt.cname}({args.join(", ")})"
848 end
849 #var x = 0
850 #for j in [from..st.length[ do
851 #if st[j] == "null" then continue
852 #add "\t\tp{cpt}.n_{calt.elemname(x)} = {st[j]}"
853 #x += 1
854 #end
855 st.add("p{cpt}")
856 end
857 end
858 assert st.length == 1
859 add "\t\tvar prod = {st.first}"
860 end
861
862 add "\t\tparser.node_stack.push prod"
863 if alt.prod.accept then
864 add "\t\tparser.stop = true"
865 else
866 add "\t\tparser.goto(goto_{alt.prod.cname})"
867 end
868 end
869 end
870
871 # A state in a LR automaton
872 class LRState
873 # name of the automaton (short part from the start)
874 var name: String
875
876 # malglen name
877 fun cname: String do return name.to_cmangle
878
879 # number
880 var number: Int = -1
881
882 # Set of all items
883 var items = new HashSet[Item]
884
885 # Set of items only in the core
886 var core = new HashSet[Item]
887
888 # Outgoing transitions
889 var ins = new Array[LRTransition]
890
891 # Ingoing transitions
892 var outs = new Array[LRTransition]
893
894 # trans function
895 fun trans(e: Element): nullable LRState
896 do
897 for t in outs do if t.elem == e then return t.to
898 return null
899 end
900
901 redef fun ==(o) do return o isa LRState and core == o.core
902 redef fun hash do return items.length
903
904 redef fun to_s do return items.join(" ; ")
905
906 # Add and item in the core
907 fun add(i: Item): Bool
908 do
909 if items.has(i) then return false
910
911 items.add(i)
912 if i.pos > 0 or i.alt.prod.accept then core.add(i)
913 return true
914 end
915
916 # Recusively extends item outside the core
917 fun extends(i: Item)
918 do
919 var e = i.next
920 if e == null then return
921 if not e isa Production then return
922 for i2 in e.start_state do
923 if add(i2) then extends(i2)
924 end
925 end
926
927 # SLR lookahead
928 fun lookahead(i: Item): Set[Item]
929 do
930 return i.alt.prod.afters
931 end
932
933 # Set of all reductions
934 var reduces = new ArraySet[Alternative]
935 # Set of all shifts
936 var shifts = new ArraySet[Token]
937 # Set of all goto
938 var gotos = new ArraySet[Production]
939 # Reduction guarded by tokens
940 var guarded_reduce = new HashMap[Token, Set[Item]]
941 # Shitfs guarded by tokens
942 var guarded_shift = new HashMap[Token, Set[Item]]
943
944 # Does the state need a guard to perform an action?
945 fun need_guard: Bool do return not shifts.is_empty or reduces.length > 1
946
947 # Is the state LR0?
948 fun is_lr0: Bool do return reduces.length <= 1 and shifts.is_empty or reduces.is_empty
949
950 # compute guards and conflicts
951 fun analysis
952 do
953 # Extends the core
954 for i in items.to_a do
955 extends(i)
956 end
957
958 # Collect action and conflicts
959 for i in items do
960 var n = i.next
961 if n == null then
962 reduces.add(i.alt)
963 for i2 in lookahead(i) do
964 var t = i2.next
965 assert t isa Token
966 t.reduces.add(self)
967 if not guarded_reduce.has_key(t) then
968 guarded_reduce[t] = new ArraySet[Item]
969 end
970 guarded_reduce[t].add(i)
971 end
972 else if n isa Token then
973 shifts.add(n)
974 n.shifts.add(self)
975 if not guarded_shift.has_key(n) then
976 guarded_shift[n] = new ArraySet[Item]
977 end
978 guarded_shift[n].add(i)
979 else if n isa Production then
980 gotos.add(n)
981 n.gotos.add(self)
982 else
983 abort
984 end
985 end
986 for t, a in guarded_reduce do
987 if a.length > 1 then
988 print "REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
989 for i in a do print "\treduce: {i}"
990 else if guarded_shift.has_key(t) then
991 var ri = a.first
992 var confs = new Array[Item]
993 var ress = new Array[String]
994 var g = guarded_shift[t]
995 for si in lookahead(ri) do
996 if si.next != t then continue
997 if not g.has(si) then
998 confs.add(si)
999 continue
1000 end
1001 var p = ri.alt.prod
1002 var csi: nullable Item = null
1003 for bsi in back_expand(si) do
1004 if bsi.alt.prod == p then
1005 csi = bsi
1006 break
1007 end
1008 end
1009 if csi == null then
1010 confs.add(si)
1011 continue
1012 end
1013 ress.add "\tshift: {si}"
1014 if si != csi then
1015 ress.add "\tcore shift: {csi}"
1016 end
1017 end
1018 if confs.is_empty then
1019 print "Automatic Dangling on state {self.number} {self.name} for token {t}:"
1020 print "\treduce: {ri}"
1021 for r in ress do print r
1022 guarded_reduce.keys.remove(t)
1023 else
1024 print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1025 print "\treduce: {ri}"
1026 for i in guarded_shift[t] do print "\tshift: {i}"
1027 end
1028 end
1029 end
1030 end
1031
1032 # Return `i` and all other items of the state that expands, directly or undirectly, to `i`
1033 fun back_expand(i: Item): Set[Item]
1034 do
1035 var res = new ArraySet[Item]
1036 var todo = [i]
1037 res.add(i)
1038 while not todo.is_empty do
1039 var x = todo.pop
1040 if x.pos > 0 then continue
1041 var p = x.alt.prod
1042 for y in items do
1043 if y.next != p then continue
1044 if res.has(y) then continue
1045 res.add(y)
1046 todo.add(y)
1047 end
1048 end
1049 return res
1050 end
1051 end
1052
1053 # A transition in a LR automaton
1054 class LRTransition
1055 # The origin state
1056 var from: LRState
1057 # The testination state
1058 var to: LRState
1059 # The element labeling the transition
1060 var elem: Element
1061 end
1062
1063 # A alternative with a cursor (dot) before an element
1064 class Item
1065 # The alternative
1066 var alt: Alternative
1067 # The dot index (0 means before the first element)
1068 var pos: Int
1069
1070 redef fun ==(o) do return o isa Item and alt == o.alt and pos == o.pos
1071 redef fun hash do return alt.hash + pos
1072
1073 redef fun to_s
1074 do
1075 var b = new Buffer
1076 b.append("{alt.prod.name}::{alt.name}=")
1077 for i in [0..alt.elems.length[
1078 do
1079 if pos == i then b.append(".") else b.append(" ")
1080 b.append(alt.elems[i].to_s)
1081 end
1082 if pos == alt.elems.length then b.append(".")
1083 return b.to_s
1084 end
1085
1086 # The element thatr follow the dot, null if the fdot is at the end
1087 fun next: nullable Element
1088 do
1089 if pos >= alt.elems.length then return null
1090 return alt.elems[pos]
1091 end
1092
1093 # SLR loohahead
1094 fun lookahead: Set[Token]
1095 do
1096 var res = new HashSet[Token]
1097 var p = pos + 1
1098 while p < alt.elems.length do
1099 var e = alt.elems[p]
1100 if e isa Token then
1101 res.add(e)
1102 break
1103 else if e isa Production then
1104 #res.add_all(e.firsts)
1105 if not e.is_nullable then break
1106 end
1107 p += 1
1108 end
1109 return res
1110 end
1111
1112 # The item that advanced once
1113 fun avance: Item
1114 do
1115 var res = new Item(alt, pos+1)
1116 return res
1117 end
1118 end