*: remove newly superfluous static types on attributes
[nit.git] / contrib / nitcc / src / grammar.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # A grammar describing a language
16 class Gram
17 # The productions (non-terminal) of the concrete grammar
18 var prods = new Array[Production]
19
20 # The additional abstract productions of the grammar
21 # TODO clean AST
22 var ast_prods = new Array[Production]
23
24 # The tokens (terminals) of the grammar
25 var tokens = new Array[Token]
26
27 # Dump of the concrete grammar and the transformations
28 fun pretty: String
29 do
30 var res = new FlatBuffer
31 for p in prods do
32 if p.spe != null then
33 res.append("{p.name} \{-> {p.spe.name}\}=\n")
34 else
35 res.append("{p.name} =\n")
36 end
37 var last = null
38 if not p.alts.is_empty then last = p.alts.last
39 for a in p.alts do
40 res.append("\t\{{a.name}:\} {a.elems.join(" ")}")
41 if a.codes == null then a.make_codes
42 res.append("\n\t\t\{-> {a.codes.join(", ")}\}")
43 if a == last then res.append(" ;\n") else res.append(" |\n")
44 end
45 if p.is_nullable then res.append "\t// is nullable\n"
46 if not p.firsts.is_empty then
47 res.append "\t// firsts:\n"
48 for x in p.firsts do res.append "\t// {x}\n"
49 end
50 if not p.afters.is_empty then
51 res.append "\t// afters:\n"
52 for x in p.afters do res.append "\t// {x}\n"
53 end
54 end
55 return res.to_s
56 end
57
58 # Inline (ie. remove from the concrete grammar) some production
59 # REQUIRE: no circular production in `prods`
60 fun inline(prods: Collection[Production])
61 do
62 for p in self.prods do
63 for a in p.alts.to_a do
64 if a.phony then continue
65 var to_inline = false
66 for e in a.elems do
67 if e isa Production and prods.has(e) then to_inline = true
68 end
69 if not to_inline then continue
70
71 if a.codes == null then a.make_codes
72
73 var a0 = new Alternative(p, a.name, new Array[Element])
74 a0.trans = true
75 a0.codes = new Array[Code]
76 var pool = [a0]
77 var pool2 = new Array[Alternative]
78 for e in a.elems do
79 if not e isa Production or not prods.has(e) then
80 for x in pool do
81 x.elems.add(e)
82 x.codes.add(new CodePop)
83 end
84 continue
85 end
86 if p == e then
87 print "Circular inlining on {p} :: {a}"
88 abort
89 end
90 pool2.clear
91 for a2 in e.alts do
92 if a.phony then continue
93 if a2.codes == null then a2.make_codes
94 for x in pool do
95 var name = a.name + "_" + pool2.length.to_s
96 var na = new Alternative(p, name, new Array[Element])
97 na.trans = true
98 pool2.add(na)
99 na.elems.add_all(x.elems)
100 na.elems.add_all(a2.elems)
101 na.codes = new Array[Code]
102 na.codes.add_all(x.codes.as(not null))
103 na.codes.add_all(a2.codes.as(not null))
104 end
105 end
106 var tmp = pool
107 pool = pool2
108 pool2 = tmp
109 end
110 for x in pool do
111 x.codes.add(a.codes.last)
112 end
113 p.ast_alts.add(a)
114 p.alts.remove(a)
115 p.alts.add_all(pool)
116 end
117 end
118 for p in prods do
119 self.prods.remove(p)
120 self.ast_prods.add(p)
121 end
122 end
123
124 # Compute a LR automaton
125 fun lr0: LRAutomaton
126 do
127 var start = new Production("_start")
128 start.accept = true
129 var eof = new Token("Eof")
130 tokens.add(eof)
131 start.new_alt("Start", self.prods.first, eof)
132 prods.add(start)
133
134 analyse
135
136 var first = new LRState("Start")
137 first.number = 0
138 for i in start.start_state do first.add(i)
139
140 var automaton = new LRAutomaton(self)
141
142 var todo = new List[LRState]
143 todo.add first
144 var seen = new HashSet[LRState]
145 seen.add first
146
147 while not todo.is_empty do
148 var state = todo.shift
149
150 #print state
151 automaton.states.add(state)
152 state.analysis
153
154 var nexts = new HashMap[Element, LRState]
155 for i in state.items do
156 var e = i.next
157 if e == null then continue
158 if nexts.has_key(e) then
159 nexts[e].add(i.avance)
160 else
161 var name
162 if state == automaton.states.first then
163 name = e.to_s
164 else
165 name = "{state.name} {e}"
166 end
167 var next = new LRState(name)
168 nexts[e] = next
169 next.add(i.avance)
170 end
171 end
172
173 for e, next in nexts do
174
175 #print "#states: {seen.length}"
176
177 # Look for an existing LR0 state in the automaton
178 var new_state = true
179 for n in seen do
180 if n == next then
181 next = n
182 new_state = false
183 break
184 end
185 end
186
187 # If not, add it to the pool and the todo-list
188 if new_state then
189 next.number = seen.length
190 assert not seen.has(next)
191 seen.add(next)
192 todo.add(next)
193 end
194
195 # Add the transition
196 var t = new LRTransition(state, next, e)
197 state.outs.add t
198 next.ins.add t
199 end
200 end
201 return automaton
202 end
203
204 # Compute `nullables`, `firsts` and `afters` of productions
205 fun analyse
206 do
207 loop
208 var changed = false
209 for p in prods do
210 if p.is_nullable then continue
211 for a in p.alts do
212 if a.phony then continue
213 var nullabl = true
214 for e in a.elems do
215 if e isa Token then
216 nullabl = false
217 break
218 else if e isa Production then
219 if not e.is_nullable then nullabl = false
220 break
221 else
222 abort
223 end
224 end
225 if nullabl then
226 p.is_nullable = true
227 changed = true
228 end
229 end
230 end
231 if not changed then break
232 end
233
234 loop
235 var changed = false
236 for p in prods do
237 var fs = p.firsts
238 for a in p.alts do
239 if a.phony then continue
240 var i = a.first_item
241 loop
242 var e = i.next
243 if e == null then break
244 if e isa Token then
245 if try_add(fs, i) then changed = true
246 break
247 else if e isa Production then
248 for t in e.firsts do
249 if try_add(fs, t) then changed = true
250 end
251 if not e.is_nullable then break
252 else
253 abort
254 end
255 i = i.avance
256 end
257 end
258 end
259 if not changed then break
260 end
261
262 loop
263 var changed = false
264 for p1 in prods do
265 for a in p1.alts do
266 if a.phony then continue
267 var p0: nullable Production = null
268 var i = a.first_item
269 loop
270 var e = i.next
271 if e == null then break
272 var p = p0
273 if e isa Production then p0 = e else p0 = null
274 if p == null then
275 i = i.avance
276 continue
277 end
278
279 var afs = p.afters
280
281 if e isa Token then
282 if try_add(afs, i) then changed = true
283 else if e isa Production then
284 for t in e.firsts do
285 if try_add(afs, t) then changed = true
286 end
287 if e.is_nullable then
288 for t in e.afters do
289 if try_add(afs, t) then changed = true
290 end
291 end
292 else
293 abort
294 end
295 i = i.avance
296 end
297 if p0 != null then
298 var afs = p0.afters
299 for t in p1.afters do
300 if try_add(afs, t) then changed = true
301 end
302 end
303 end
304 end
305 if not changed then break
306 end
307 end
308
309 # used by `analyse`
310 private fun try_add(set: Set[Item], t: Item): Bool
311 do
312 var res = not set.has(t)
313 if res then set.add(t)
314 return res
315 end
316
317 end
318
319 # A production of a grammar
320 class Production
321 super Element
322 # The alternative of the production
323 var alts = new Array[Alternative]
324
325 # Additional alternatives in the AST
326 var ast_alts = new Array[Alternative]
327
328 # Is self the accept production
329 var accept = false
330
331 # Is self transformed to a other production for the AST
332 # FIXME: cleanup AST
333 var spe: nullable Production = null is writable
334
335 # Is self contains only a single alternative (then no need for a abstract production class in the AST)
336 # FIXME cleanup AST
337 var altone = false is writable
338
339 # The cname of the class in the AST
340 # FIXME: cleanup AST
341 redef fun acname do
342 if spe != null then return spe.acname
343 return super
344 end
345
346 # Is the production nullable
347 var is_nullable = false
348
349 # The first tokens of the production
350 var firsts = new HashSet[Item]
351
352 # The tokens that may follows the production (as in SLR)
353 var afters = new HashSet[Item]
354
355
356 # Crate a new empty alternative
357 fun new_alt0(name: String): Alternative
358 do
359 var a = new Alternative(self, name, new Array[Element])
360 alts.add a
361 return a
362 end
363
364 # Crate a new alternative with some elements
365 fun new_alt(name: String, elems: Element...): Alternative
366 do
367 var a = new Alternative(self, name, elems)
368 alts.add a
369 return a
370 end
371
372 # Crate a new alternative with some elements
373 fun new_alt2(name: String, elems: Array[Element]): Alternative
374 do
375 var a = new Alternative(self, name, elems)
376 alts.add a
377 return a
378 end
379
380 # Return a set of items for the production
381 fun start_state: Array[Item]
382 do
383 var res = new Array[Item]
384 for a in alts do
385 if a.phony then continue
386 res.add a.first_item
387 end
388 return res
389 end
390
391 # States in the LR automaton that has a outgoing transition on self
392 var gotos = new ArraySet[LRState]
393 end
394
395 # An alternative of a production
396 class Alternative
397 # The production
398 var prod: Production
399
400 # The name of the alternative
401 var name: String is writable
402
403 # The elements of the alternative
404 var elems: Array[Element]
405
406 # The first item of the alternative
407 var first_item = new Item(self, 0)
408
409 # The name of the elements
410 var elems_names = new Array[nullable String]
411
412 # Return a name for a given element (or a number if unamed)
413 fun elemname(i: Int): String
414 do
415 if i >= elems_names.length then return "{i}"
416 var n = elems_names[i]
417 if n == null then return "{i}"
418 return n
419 end
420
421 redef fun to_s do return "{prod.name}::{name}={elems.join(" ")}"
422
423 # A mangled name
424 fun cname: String do
425 return "N{name.to_cmangle}"
426 end
427
428 # The code for the reduction
429 var codes: nullable Array[Code] = null is writable
430
431 # Is the alternative transformed (ie not in the AST)
432 var trans = false is writable
433
434 # Is the alternative unparsable? (ie not in the automaton)
435 var phony = false is writable
436
437 # Initialize codes with the elements
438 fun make_codes
439 do
440 if codes != null then return
441 var codes = new Array[Code]
442 self.codes = codes
443 for e in elems do
444 codes.add(new CodePop)
445 end
446 codes.add(new CodeNew(self))
447 end
448 end
449
450 # A step in the construction of the AST.
451 # Used to model transformations
452 interface Code
453 end
454
455 # Get a element from the stack
456 class CodePop
457 super Code
458 redef fun to_s do return "pop"
459 end
460
461 # Allocate a new AST node for an alternative using the correct number of popped elements
462 class CodeNew
463 super Code
464
465 # The associated alternative
466 var alt: Alternative
467
468 redef fun to_s do return "New {alt.name}/{alt.elems.length}"
469 end
470
471 # Get null
472 class CodeNull
473 super Code
474 redef fun to_s do return "null"
475 end
476
477 # Elements of an alternative.
478 # Either a `Production` or a `Token`
479 abstract class Element
480 # The name of the element
481 var name: String
482 redef fun to_s do return name
483
484 private var acname_cache: nullable String = null
485
486 # The mangled name of the element
487 fun cname: String do return "N{name.to_cmangle}"
488
489 # The name of the class in the AST
490 fun acname: String do
491 var res = acname_cache
492 if res == null then
493 res = "N{to_s.to_cmangle}"
494 acname_cache = res
495 end
496 return res
497 end
498
499 # The name of the class in the AST
500 fun acname=(s: String) do acname_cache = s
501 end
502
503 # A terminal element
504 class Token
505 super Element
506 # States of the LR automaton that shift on self
507 var shifts = new ArraySet[LRState]
508 # States of the LR automaton that reduce on self in the lookahead(1)
509 var reduces = new ArraySet[LRState]
510 end
511
512 #
513
514 # A LR automaton
515 class LRAutomaton
516 # The grammar of the automaton
517 var grammar: Gram
518
519 # The set of states
520 var states = new Array[LRState]
521
522 # Dump of the automaton
523 fun pretty: String
524 do
525 var res = new Array[String]
526 res.add "* LRAutomaton: {states.length} states\n"
527 for s in states do
528 res.add "s{s.number} {s.name}\n"
529 res.add "\tCORE\n"
530 for i in s.core do
531 res.add "\t\t{i}\n"
532 if i.next != null then continue
533 for i2 in s.lookahead(i) do
534 res.add "\t\t\t{i2}\n"
535 end
536 end
537 res.add "\tOTHER ITEMS\n"
538 for i in s.items do
539 if s.core.has(i) then continue
540 res.add "\t\t{i}\n"
541 if i.next != null then continue
542 for i2 in s.lookahead(i) do
543 res.add "\t\t\t{i2}\n"
544 end
545 end
546 res.add "\tTRANSITIONS {s.outs.length}\n"
547 for t in s.outs do
548 res.add "\t\t{t.elem} |-> s{t.to.number}\n"
549 end
550 res.add "\tACTIONS\n"
551 if s.is_lr0 then
552 res.add "\t\tSTATE LR0\n"
553 else
554 res.add "\t\tSTATE SLR\n"
555 for t, a in s.guarded_reduce do
556 if a.length > 1 then
557 res.add "\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
558 break
559 else if s.shifts.has(t) then
560 res.add "\t\t/!\\ SHIFT/REDUCE CONFLICT\n"
561 break
562 end
563 end
564 end
565 if not s.shifts.is_empty then
566 res.add "\t\tSHIFT {s.shifts.join(" ")}\n"
567 end
568 for r in s.reduces do
569 res.add "\t\tREDUCE {r}\n"
570 end
571 end
572 return res.join
573 end
574
575 # Generate a graphviz file of the automaton
576 fun to_dot(path: String)
577 do
578 var f = new FileWriter.open(path)
579 f.write("digraph g \{\n")
580 f.write("rankdir=LR;\n")
581 f.write("node[shape=Mrecord,height=0];\n")
582
583 for s in states do
584 f.write "s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|"
585 for i in s.core do
586 f.write "{i.to_s.escape_to_dot}\\l"
587 end
588 f.write("|")
589 for i in s.items do
590 if s.core.has(i) then continue
591 f.write "{i.to_s.escape_to_dot}\\l"
592 end
593 f.write "\""
594 if not s.is_lr0 then
595 var conflict = false
596 for t, a in s.guarded_reduce do
597 if a.length > 1 then
598 f.write ",color=red"
599 conflict = true
600 break
601 else if s.shifts.has(t) then
602 f.write ",color=orange"
603 conflict = true
604 break
605 end
606 end
607 if not conflict then
608 f.write ",color=blue"
609 end
610 end
611 f.write "];\n"
612 for t in s.outs do
613 f.write "s{s.number} -> s{t.to.number} [label=\"{t.elem.to_s.escape_to_dot}\"];\n"
614 end
615 end
616 f.write("\}\n")
617 f.close
618 end
619
620 # Generate the parser of the automaton
621 fun gen_to_nit(filepath: String, name: String)
622 do
623 var gen = new Generator
624 gen.gen_to_nit(self, name)
625 var f = new FileWriter.open(filepath)
626 for s in gen.out do
627 f.write(s)
628 f.write("\n")
629 end
630 f.close
631 end
632 end
633
634 private class Generator
635 var out = new Array[String]
636 fun add(s: String) do out.add(s)
637 fun gen_to_nit(autom: LRAutomaton, name: String)
638 do
639 var states = autom.states
640 var gram = autom.grammar
641
642 add "# Parser generated by nitcc for the grammar {name}"
643 add "module {name}_parser is generated, no_warning(\"missing-doc\",\"old-init\")"
644 add "import nitcc_runtime"
645
646 add "class Parser_{name}"
647 add "\tsuper Parser"
648 add "\tredef fun start_state do return state_{states.first.cname}"
649 add "end"
650
651 for s in states do
652 add "private fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
653 end
654 for p in gram.prods do
655 add "private fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
656 for a in p.alts do
657 add "private fun reduce_{a.cname}(parser: Parser) do"
658 gen_reduce_to_nit(a)
659 add "end"
660 end
661 end
662
663 add "redef class NToken"
664 for s in states do
665 if not s.need_guard then continue
666 add "\t# guarded action for state {s.name}"
667 add "\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)"
668 add "\tprivate fun action_s{s.cname}(parser: Parser) do"
669 if s.reduces.length != 1 then
670 add "\t\tparser.parse_error"
671 else
672 add "\t\treduce_{s.reduces.first.cname}(parser)"
673 #gen_reduce_to_nit(s.reduces.first)
674 end
675 add "\tend"
676 end
677 add "end"
678
679 for t in gram.tokens do
680 if t.name == "Eof" then
681 add "redef class {t.cname}"
682 else
683 add "class {t.cname}"
684 end
685 add "\tsuper NToken"
686 for s in t.shifts do
687 if not s.need_guard then continue
688 add "\tredef fun action_s{s.cname}(parser) do"
689 gen_shift_to_nit(s, t)
690 add "\tend"
691 end
692 for s in t.reduces do
693 if not s.need_guard then continue
694 if s.reduces.length <= 1 then continue
695 add "\tredef fun action_s{s.cname}(parser) do"
696 add "\t\treduce_{s.guarded_reduce[t].first.alt.cname}(parser)"
697 #gen_reduce_to_nit(s.guarded_reduce[t].first.alt)
698 add "\tend"
699 end
700 add "\tredef fun node_name do return \"{t.name.escape_to_nit}\""
701 add "end"
702 end
703
704 add "redef class LRGoto"
705 for s in states do
706 if s.gotos.length <= 1 then continue
707 add "\tprivate fun goto_s{s.cname}(parser: Parser) do abort"
708 end
709 add "end"
710
711 for p in gram.prods do
712 add "class Goto_{p.cname}"
713 add "\tsuper LRGoto"
714 for s in p.gotos do
715 if s.gotos.length <= 1 then continue
716 add "\tredef fun goto_s{s.cname}(parser) do"
717 gen_goto_to_nit(s, p)
718 add "\tend"
719 end
720 add "end"
721 end
722
723 var ps = gram.prods.to_a
724 ps.add_all(gram.ast_prods)
725 for p in ps do
726 if p.spe == null and not p.altone then
727 if p.name.has_suffix("?") or p.name.has_suffix("+") then continue
728 add "class {p.acname}"
729 add "\tsuper NProd"
730 add "\tredef fun node_name do return \"{p.name.escape_to_nit}\""
731 add "end"
732 end
733
734 var als = p.alts.to_a
735 als.add_all(p.ast_alts)
736 for a in als do
737 if a.trans then continue
738 add "class {a.cname}"
739 if p.altone then
740 add "\tsuper NProd"
741 else
742 add "\tsuper {p.acname}"
743 end
744 add "\tredef fun node_name do return \"{a.name.escape_to_nit}\""
745 var initarg = new Array[String]
746 for i in [0..a.elems.length[ do
747 add "\tvar n_{a.elemname(i)}: {a.elems[i].acname}"
748 initarg.add("n_{a.elemname(i)}: {a.elems[i].acname}")
749 end
750 if initarg.is_empty then
751 add "\tinit do end"
752 else
753 add "\tinit({initarg.join(", ")}) do"
754 for i in [0..a.elems.length[ do
755 add "\t\tself.n_{a.elemname(i)} = n_{a.elemname(i)}"
756 end
757 add "\tend"
758 end
759 add "\tredef fun number_of_children do return {a.elems.length}"
760 add "\tredef fun child(i) do"
761 for i in [0..a.elems.length[ do
762 add "\t\tif i == {i} then return n_{a.elemname(i)}"
763 end
764 add "\t\tabort"
765 add "\tend"
766 add "end"
767 end
768 end
769
770 for s in states do
771 add "# State {s.name}"
772 add "private class LRState{s.cname}"
773 add "\tsuper LRState"
774
775 add "\tredef fun to_s do return \"{s.name.escape_to_nit}\""
776
777 var err = new Array[String]
778 for t in s.outs do
779 var e = t.elem
780 if e isa Production then err.add e.name
781 end
782 if err.is_empty then for t in s.outs do
783 var e = t.elem
784 if e isa Token then err.add e.name
785 end
786
787 add "\tredef fun error_msg do return \"{err.join(", ").escape_to_nit}\""
788
789 add "\tredef fun action(parser) do"
790 if s.need_guard then
791 add "\t\tparser.peek_token.action_s{s.cname}(parser)"
792 else if s.reduces.length == 1 then
793 add "\t\treduce_{s.reduces.first.cname}(parser)"
794 #gen_reduce_to_nit(s.reduces.first)
795 else
796 abort
797 end
798 add "\tend"
799
800 if not s.gotos.is_empty then
801 add "\tredef fun goto(parser, goto) do"
802 if s.gotos.length > 1 then
803 add "\t\tgoto.goto_s{s.cname}(parser)"
804 else
805 gen_goto_to_nit(s, s.gotos.first)
806 end
807 add "\tend"
808 end
809
810 add "end"
811 end
812
813
814 end
815
816 fun gen_shift_to_nit(s: LRState, t: Token)
817 do
818 var dest = s.trans(t)
819 add "\t\tparser.shift(state_{dest.cname})"
820
821 end
822
823 fun gen_goto_to_nit(s: LRState, p: Production)
824 do
825 var dest = s.trans(p)
826 add "\t\tparser.push(state_{dest.cname})"
827 end
828
829 fun gen_reduce_to_nit(alt: Alternative)
830 do
831 add "\t\t# REDUCE {alt}"
832 var i = alt.elems.length - 1
833 for e in alt.elems.to_a.reversed do
834 add "\t\tvar n{i} = parser.pop.as({e.acname})"
835 i -= 1
836 end
837
838 if alt.name.has_suffix("+_more") then
839 add "\t\tvar prod = n0"
840 add "\t\tn0.children.add(n1)"
841 else if alt.name.has_suffix("+_one") then
842 add "\t\tvar prod = new {alt.prod.acname}"
843 add "\t\tprod.children.add(n0)"
844 else
845 alt.make_codes
846 var cpt = 0
847 i = 0
848 var st = new Array[String]
849 for c in alt.codes.as(not null) do
850 if c isa CodePop then
851 st.add "n{i}"
852 i += 1
853 else if c isa CodeNull then
854 st.add "null"
855 else if c isa CodeNew then
856 var calt = c.alt
857 cpt += 1
858 var from = st.length - calt.elems.length
859 var args = new List[String]
860 for j in [from..st.length[ do
861 args.unshift(st.pop)
862 end
863
864 if args.is_empty then
865 add "\t\tvar p{cpt} = new {calt.cname}"
866 else
867 add "\t\tvar p{cpt} = new {calt.cname}({args.join(", ")})"
868 end
869 #var x = 0
870 #for j in [from..st.length[ do
871 #if st[j] == "null" then continue
872 #add "\t\tp{cpt}.n_{calt.elemname(x)} = {st[j]}"
873 #x += 1
874 #end
875 st.add("p{cpt}")
876 end
877 end
878 assert st.length == 1
879 add "\t\tvar prod = {st.first}"
880 end
881
882 add "\t\tparser.node_stack.push prod"
883 if alt.prod.accept then
884 add "\t\tparser.stop = true"
885 else
886 add "\t\tparser.goto(goto_{alt.prod.cname})"
887 end
888 end
889 end
890
891 # A state in a LR automaton
892 class LRState
893 # Name of the automaton (short part from the start)
894 var name: String
895
896 # Mangled name
897 var cname: String is lazy do return name.to_cmangle
898
899 # Number
900 var number = -1
901
902 # Set of all items
903 var items = new HashSet[Item]
904
905 # Set of items only in the core
906 var core = new HashSet[Item]
907
908 # Outgoing transitions
909 var ins = new Array[LRTransition]
910
911 # Ingoing transitions
912 var outs = new Array[LRTransition]
913
914 # Trans function
915 fun trans(e: Element): nullable LRState
916 do
917 for t in outs do if t.elem == e then return t.to
918 return null
919 end
920
921 redef fun ==(o) do return o isa LRState and core == o.core
922 redef fun hash do return items.length
923
924 redef fun to_s do return items.join(" ; ")
925
926 # Add and item in the core
927 fun add(i: Item): Bool
928 do
929 if items.has(i) then return false
930
931 items.add(i)
932 if i.pos > 0 or i.alt.prod.accept then core.add(i)
933 return true
934 end
935
936 # Recursively extends item outside the core
937 fun extends(i: Item)
938 do
939 var e = i.next
940 if e == null then return
941 if not e isa Production then return
942 for i2 in e.start_state do
943 if add(i2) then extends(i2)
944 end
945 end
946
947 # SLR lookahead
948 fun lookahead(i: Item): Set[Item]
949 do
950 return i.alt.prod.afters
951 end
952
953 # Set of all reductions
954 var reduces = new ArraySet[Alternative]
955 # Set of all shifts
956 var shifts = new ArraySet[Token]
957 # Set of all goto
958 var gotos = new ArraySet[Production]
959 # Reduction guarded by tokens
960 var guarded_reduce = new HashMap[Token, Set[Item]]
961 # Shifts guarded by tokens
962 var guarded_shift = new HashMap[Token, Set[Item]]
963
964 # Does the state need a guard to perform an action?
965 fun need_guard: Bool do return not shifts.is_empty or reduces.length > 1
966
967 # Is the state LR0?
968 fun is_lr0: Bool do return reduces.length <= 1 and shifts.is_empty or reduces.is_empty
969
970 # Compute guards and conflicts
971 fun analysis
972 do
973 # Extends the core
974 for i in items.to_a do
975 extends(i)
976 end
977
978 # Collect action and conflicts
979 for i in items do
980 var n = i.next
981 if n == null then
982 reduces.add(i.alt)
983 for i2 in lookahead(i) do
984 var t = i2.next
985 assert t isa Token
986 t.reduces.add(self)
987 if not guarded_reduce.has_key(t) then
988 guarded_reduce[t] = new ArraySet[Item]
989 end
990 guarded_reduce[t].add(i)
991 end
992 else if n isa Token then
993 shifts.add(n)
994 n.shifts.add(self)
995 if not guarded_shift.has_key(n) then
996 guarded_shift[n] = new ArraySet[Item]
997 end
998 guarded_shift[n].add(i)
999 else if n isa Production then
1000 gotos.add(n)
1001 n.gotos.add(self)
1002 else
1003 abort
1004 end
1005 end
1006 # Token to remove as reduction guard to solve S/R conflicts
1007 var removed_reduces = new Array[Token]
1008 for t, a in guarded_reduce do
1009 if a.length > 1 then
1010 print "REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1011 for i in a do print "\treduce: {i}"
1012 conflicting_items.add_all a
1013 end
1014 if guarded_shift.has_key(t) then
1015 var ri = a.first
1016 var confs = new Array[Item]
1017 var ress = new Array[String]
1018 var g = guarded_shift[t]
1019 for si in lookahead(ri) do
1020 if si.next != t then continue
1021 if not g.has(si) then
1022 confs.add(si)
1023 continue
1024 end
1025 var p = ri.alt.prod
1026 var csi: nullable Item = null
1027 for bsi in back_expand(si) do
1028 if bsi.alt.prod == p then
1029 csi = bsi
1030 break
1031 end
1032 end
1033 if csi == null then
1034 confs.add(si)
1035 continue
1036 end
1037 ress.add "\tshift: {si}"
1038 if si != csi then
1039 ress.add "\tcore shift: {csi}"
1040 end
1041 end
1042 if confs.is_empty then
1043 print "Automatic Dangling on state {self.number} {self.name} for token {t}:"
1044 print "\treduce: {ri}"
1045 for r in ress do print r
1046 removed_reduces.add t
1047 else
1048 print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1049 print "\treduce: {ri}"
1050 for i in guarded_shift[t] do print "\tshift: {i}"
1051 removed_reduces.add t
1052 conflicting_items.add_all a
1053 conflicting_items.add_all guarded_shift[t]
1054 end
1055 end
1056 end
1057 for t in removed_reduces do
1058 guarded_reduce.keys.remove(t)
1059 t.reduces.remove(self)
1060 end
1061 end
1062
1063 # Items within a reduce/reduce or a shift/reduce conflict.
1064 #
1065 # Is computed by `analysis`
1066 var conflicting_items = new ArraySet[Item]
1067
1068 # Return `i` and all other items of the state that expands, directly or indirectly, to `i`
1069 fun back_expand(i: Item): Set[Item]
1070 do
1071 var res = new ArraySet[Item]
1072 var todo = [i]
1073 res.add(i)
1074 while not todo.is_empty do
1075 var x = todo.pop
1076 if x.pos > 0 then continue
1077 var p = x.alt.prod
1078 for y in items do
1079 if y.next != p then continue
1080 if res.has(y) then continue
1081 res.add(y)
1082 todo.add(y)
1083 end
1084 end
1085 return res
1086 end
1087 end
1088
1089 # A transition in a LR automaton
1090 class LRTransition
1091 # The origin state
1092 var from: LRState
1093 # The destination state
1094 var to: LRState
1095 # The element labeling the transition
1096 var elem: Element
1097 end
1098
1099 # A alternative with a cursor (dot) before an element
1100 class Item
1101 # The alternative
1102 var alt: Alternative
1103 # The dot index (0 means before the first element)
1104 var pos: Int
1105
1106 redef fun ==(o) do return o isa Item and alt == o.alt and pos == o.pos
1107 redef fun hash do return alt.hash + pos
1108
1109 redef fun to_s
1110 do
1111 var b = new FlatBuffer
1112 b.append("{alt.prod.name}::{alt.name}=")
1113 for i in [0..alt.elems.length[
1114 do
1115 if pos == i then b.append(".") else b.append(" ")
1116 b.append(alt.elems[i].to_s)
1117 end
1118 if pos == alt.elems.length then b.append(".")
1119 return b.to_s
1120 end
1121
1122 # The element that follows the dot, null if the dot is at the end
1123 fun next: nullable Element
1124 do
1125 if pos >= alt.elems.length then return null
1126 return alt.elems[pos]
1127 end
1128
1129 # SLR lookahead
1130 fun lookahead: Set[Token]
1131 do
1132 var res = new HashSet[Token]
1133 var p = pos + 1
1134 while p < alt.elems.length do
1135 var e = alt.elems[p]
1136 if e isa Token then
1137 res.add(e)
1138 break
1139 else if e isa Production then
1140 #res.add_all(e.firsts)
1141 if not e.is_nullable then break
1142 end
1143 p += 1
1144 end
1145 return res
1146 end
1147
1148 # The item that advanced once
1149 fun avance: Item
1150 do
1151 var res = new Item(alt, pos+1)
1152 return res
1153 end
1154 end