0a10bebbfb8cd300b51a927e1ef06f13cf95c47a
[nit.git] / contrib / nitcc / src / grammar.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # A grammar describing a language
16 class Gram
17 # The productions (non-terminal) of the concrete grammar
18 var prods = new Array[Production]
19
20 # The additional abstract productions of the grammar
21 # TODO clean AST
22 var ast_prods = new Array[Production]
23
24 # The tokens (terminals) of the grammar
25 var tokens = new Array[Token]
26
27 # Dump of the concrete grammar and the transformations
28 fun pretty: String
29 do
30 var res = new FlatBuffer
31 for p in prods do
32 if p.spe != null then
33 res.append("{p.name} \{-> {p.spe.name}\}=\n")
34 else
35 res.append("{p.name} =\n")
36 end
37 var last = null
38 if not p.alts.is_empty then last = p.alts.last
39 for a in p.alts do
40 res.append("\t\{{a.name}:\} {a.elems.join(" ")}")
41 if a.codes == null then a.make_codes
42 res.append("\n\t\t\{-> {a.codes.join(", ")}\}")
43 if a == last then res.append(" ;\n") else res.append(" |\n")
44 end
45 if p.is_nullable then res.append "\t// is nullable\n"
46 if not p.firsts.is_empty then
47 res.append "\t// firsts:\n"
48 for x in p.firsts do res.append "\t// {x}\n"
49 end
50 if not p.afters.is_empty then
51 res.append "\t// afters:\n"
52 for x in p.afters do res.append "\t// {x}\n"
53 end
54 end
55 return res.to_s
56 end
57
58 # Inline (ie. remove from the concrete grammar) some production
59 # REQUIRE: no circular production in `prods`
60 fun inline(prods: Collection[Production])
61 do
62 for p in self.prods do
63 for a in p.alts.to_a do
64 if a.phony then continue
65 var to_inline = false
66 for e in a.elems do
67 if e isa Production and prods.has(e) then to_inline = true
68 end
69 if not to_inline then continue
70
71 if a.codes == null then a.make_codes
72
73 var a0 = new Alternative(p, a.name, new Array[Element])
74 a0.trans = true
75 a0.codes = new Array[Code]
76 var pool = [a0]
77 var pool2 = new Array[Alternative]
78 for e in a.elems do
79 if not e isa Production or not prods.has(e) then
80 for x in pool do
81 x.elems.add(e)
82 x.codes.add(new CodePop)
83 end
84 continue
85 end
86 if p == e then
87 print "Circular inlining on {p} :: {a}"
88 abort
89 end
90 pool2.clear
91 for a2 in e.alts do
92 if a.phony then continue
93 if a2.codes == null then a2.make_codes
94 for x in pool do
95 var name = a.name + "_" + pool2.length.to_s
96 var na = new Alternative(p, name, new Array[Element])
97 na.trans = true
98 pool2.add(na)
99 na.elems.add_all(x.elems)
100 na.elems.add_all(a2.elems)
101 na.codes = new Array[Code]
102 na.codes.add_all(x.codes.as(not null))
103 na.codes.add_all(a2.codes.as(not null))
104 end
105 end
106 var tmp = pool
107 pool = pool2
108 pool2 = tmp
109 end
110 for x in pool do
111 x.codes.add(a.codes.last)
112 end
113 p.ast_alts.add(a)
114 p.alts.remove(a)
115 p.alts.add_all(pool)
116 end
117 end
118 for p in prods do
119 self.prods.remove(p)
120 self.ast_prods.add(p)
121 end
122 end
123
124 # Compute a LR automaton
125 fun lr0: LRAutomaton
126 do
127 var start = new Production("_start")
128 start.accept = true
129 var eof = new Token("Eof")
130 tokens.add(eof)
131 start.new_alt("Start", self.prods.first, eof)
132 prods.add(start)
133
134 analyse
135
136 var first = new LRState("Start")
137 first.number = 0
138 for i in start.start_state do first.add(i)
139
140 var automaton = new LRAutomaton(self)
141
142 var todo = new List[LRState]
143 todo.add first
144 var seen = new HashSet[LRState]
145 seen.add first
146
147 while not todo.is_empty do
148 var state = todo.shift
149
150 #print state
151 automaton.states.add(state)
152 state.analysis
153
154 var nexts = new HashMap[Element, LRState]
155 for i in state.items do
156 var e = i.next
157 if e == null then continue
158 if nexts.has_key(e) then
159 nexts[e].add(i.avance)
160 else
161 var name
162 if state == automaton.states.first then
163 name = e.to_s
164 else
165 name = "{state.name} {e}"
166 end
167 var next = new LRState(name)
168 nexts[e] = next
169 next.add(i.avance)
170 end
171 end
172
173 for e, next in nexts do
174
175 #print "#states: {seen.length}"
176
177 # Look for an existing LR0 state in the automaton
178 var new_state = true
179 for n in seen do
180 if n == next then
181 next = n
182 new_state = false
183 break
184 end
185 end
186
187 # If not, add it to the pool and the todo-list
188 if new_state then
189 next.number = seen.length
190 assert not seen.has(next)
191 seen.add(next)
192 todo.add(next)
193 end
194
195 # Add the transition
196 var t = new LRTransition(state, next, e)
197 state.outs.add t
198 next.ins.add t
199 end
200 end
201 return automaton
202 end
203
204 # Compute `nullables`, `firsts` and `afters` of productions
205 fun analyse
206 do
207 loop
208 var changed = false
209 for p in prods do
210 if p.is_nullable then continue
211 for a in p.alts do
212 if a.phony then continue
213 var nullabl = true
214 for e in a.elems do
215 if e isa Token then
216 nullabl = false
217 break
218 else if e isa Production then
219 if not e.is_nullable then nullabl = false
220 break
221 else
222 abort
223 end
224 end
225 if nullabl then
226 p.is_nullable = true
227 changed = true
228 end
229 end
230 end
231 if not changed then break
232 end
233
234 loop
235 var changed = false
236 for p in prods do
237 var fs = p.firsts
238 for a in p.alts do
239 if a.phony then continue
240 var i = a.first_item
241 loop
242 var e = i.next
243 if e == null then break
244 if e isa Token then
245 if try_add(fs, i) then changed = true
246 break
247 else if e isa Production then
248 for t in e.firsts do
249 if try_add(fs, t) then changed = true
250 end
251 if not e.is_nullable then break
252 else
253 abort
254 end
255 i = i.avance
256 end
257 end
258 end
259 if not changed then break
260 end
261
262 loop
263 var changed = false
264 for p1 in prods do
265 for a in p1.alts do
266 if a.phony then continue
267 var p0: nullable Production = null
268 var i = a.first_item
269 loop
270 var e = i.next
271 if e == null then break
272 var p = p0
273 if e isa Production then p0 = e else p0 = null
274 if p == null then
275 i = i.avance
276 continue
277 end
278
279 var afs = p.afters
280
281 if e isa Token then
282 if try_add(afs, i) then changed = true
283 else if e isa Production then
284 for t in e.firsts do
285 if try_add(afs, t) then changed = true
286 end
287 if e.is_nullable then
288 for t in e.afters do
289 if try_add(afs, t) then changed = true
290 end
291 end
292 else
293 abort
294 end
295 i = i.avance
296 end
297 if p0 != null then
298 var afs = p0.afters
299 for t in p1.afters do
300 if try_add(afs, t) then changed = true
301 end
302 end
303 end
304 end
305 if not changed then break
306 end
307 end
308
309 # used by `analyse`
310 private fun try_add(set: Set[Item], t: Item): Bool
311 do
312 var res = not set.has(t)
313 if res then set.add(t)
314 return res
315 end
316
317 end
318
319 # A production of a grammar
320 class Production
321 super Element
322 # The alternative of the production
323 var alts = new Array[Alternative]
324
325 # Additional alternatives in the AST
326 var ast_alts = new Array[Alternative]
327
328 # Is self the accept production
329 var accept = false
330
331 # Is self transformed to a other production for the AST
332 # FIXME: cleanup AST
333 var spe: nullable Production = null is writable
334
335 # Is self contains only a single alternative (then no need for a abstract production class in the AST)
336 # FIXME cleanup AST
337 var altone = false is writable
338
339 # The cname of the class in the AST
340 # FIXME: cleanup AST
341 redef fun acname do
342 if spe != null then return spe.acname
343 return super
344 end
345
346 # Is the production nullable
347 var is_nullable = false
348
349 # The first tokens of the production
350 var firsts = new HashSet[Item]
351
352 # The tokens that may follows the production (as in SLR)
353 var afters = new HashSet[Item]
354
355
356 # Crate a new empty alternative
357 fun new_alt0(name: String): Alternative
358 do
359 var a = new Alternative(self, name, new Array[Element])
360 alts.add a
361 return a
362 end
363
364 # Crate a new alternative with some elements
365 fun new_alt(name: String, elems: Element...): Alternative
366 do
367 var a = new Alternative(self, name, elems)
368 alts.add a
369 return a
370 end
371
372 # Crate a new alternative with some elements
373 fun new_alt2(name: String, elems: Array[Element]): Alternative
374 do
375 var a = new Alternative(self, name, elems)
376 alts.add a
377 return a
378 end
379
380 # Return a set of items for the production
381 fun start_state: Array[Item]
382 do
383 var res = new Array[Item]
384 for a in alts do
385 if a.phony then continue
386 res.add a.first_item
387 end
388 return res
389 end
390
391 # States in the LR automaton that has a outgoing transition on self
392 var gotos = new ArraySet[LRState]
393 end
394
395 # An alternative of a production
396 class Alternative
397 # The production
398 var prod: Production
399
400 # The name of the alternative
401 var name: String is writable
402
403 # The elements of the alternative
404 var elems: Array[Element]
405
406 # The first item of the alternative
407 var first_item = new Item(self, 0)
408
409 # The name of the elements
410 var elems_names = new Array[nullable String]
411
412 # Return a name for a given element (or a number if unamed)
413 fun elemname(i: Int): String
414 do
415 if i >= elems_names.length then return "{i}"
416 var n = elems_names[i]
417 if n == null then return "{i}"
418 return n
419 end
420
421 redef fun to_s do return "{prod.name}::{name}={elems.join(" ")}"
422
423 # A mangled name
424 fun cname: String do
425 return "N{name.to_cmangle}"
426 end
427
428 # The code for the reduction
429 var codes: nullable Array[Code] = null is writable
430
431 # Is the alternative transformed (ie not in the AST)
432 var trans = false is writable
433
434 # Is the alternative unparsable? (ie not in the automaton)
435 var phony = false is writable
436
437 # Initialize codes with the elements
438 fun make_codes
439 do
440 if codes != null then return
441 var codes = new Array[Code]
442 self.codes = codes
443 for e in elems do
444 codes.add(new CodePop)
445 end
446 codes.add(new CodeNew(self))
447 end
448 end
449
450 # A step in the construction of the AST.
451 # Used to model transformations
452 interface Code
453 end
454
455 # Get a element from the stack
456 class CodePop
457 super Code
458 redef fun to_s do return "pop"
459 end
460
461 # Allocate a new AST node for an alternative using the correct number of popped elements
462 class CodeNew
463 super Code
464
465 # The associated alternative
466 var alt: Alternative
467
468 redef fun to_s do return "New {alt.name}/{alt.elems.length}"
469 end
470
471 # Get null
472 class CodeNull
473 super Code
474 redef fun to_s do return "null"
475 end
476
477 # Elements of an alternative.
478 # Either a `Production` or a `Token`
479 abstract class Element
480 # The name of the element
481 var name: String
482 redef fun to_s do return name
483
484 private var acname_cache: nullable String = null
485
486 # The mangled name of the element
487 fun cname: String do return "N{name.to_cmangle}"
488
489 # The name of the class in the AST
490 fun acname: String do
491 var res = acname_cache
492 if res == null then
493 res = "N{to_s.to_cmangle}"
494 acname_cache = res
495 end
496 return res
497 end
498
499 # The name of the class in the AST
500 fun acname=(s: String) do acname_cache = s
501 end
502
503 # A terminal element
504 class Token
505 super Element
506 # States of the LR automaton that shift on self
507 var shifts = new ArraySet[LRState]
508 # States of the LR automaton that reduce on self in the lookahead(1)
509 var reduces = new ArraySet[LRState]
510 end
511
512 #
513
514 # A LR automaton
515 class LRAutomaton
516 # The grammar of the automaton
517 var grammar: Gram
518
519 # The set of states
520 var states = new Array[LRState]
521
522 # Dump of the automaton
523 fun pretty: String
524 do
525 var res = new Array[String]
526 res.add "* LRAutomaton: {states.length} states\n"
527 for s in states do
528 res.add "s{s.number} {s.name}\n"
529 res.add "\tCORE\n"
530 for i in s.core do
531 res.add "\t\t{i}\n"
532 if i.next != null then continue
533 for i2 in s.lookahead(i) do
534 res.add "\t\t\t{i2}\n"
535 end
536 end
537 res.add "\tOTHER ITEMS\n"
538 for i in s.items do
539 if s.core.has(i) then continue
540 res.add "\t\t{i}\n"
541 if i.next != null then continue
542 for i2 in s.lookahead(i) do
543 res.add "\t\t\t{i2}\n"
544 end
545 end
546 res.add "\tTRANSITIONS {s.outs.length}\n"
547 for t in s.outs do
548 res.add "\t\t{t.elem} |-> s{t.to.number}\n"
549 end
550 res.add "\tACTIONS\n"
551 if s.is_lr0 then
552 res.add "\t\tSTATE LR0\n"
553 else
554 res.add "\t\tSTATE SLR\n"
555 for t, a in s.guarded_reduce do
556 if a.length > 1 then
557 res.add "\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
558 break
559 else if s.shifts.has(t) then
560 res.add "\t\t/!\\ SHIFT/REDUCE CONFLICT\n"
561 break
562 end
563 end
564 end
565 if not s.shifts.is_empty then
566 res.add "\t\tSHIFT {s.shifts.join(" ")}\n"
567 end
568 for r in s.reduces do
569 res.add "\t\tREDUCE {r}\n"
570 end
571 end
572 return res.to_s
573 end
574
575 # Generate a graphviz file of the automaton
576 fun to_dot(path: String)
577 do
578 var f = new FileWriter.open(path)
579 f.write("digraph g \{\n")
580 f.write("rankdir=LR;\n")
581 f.write("node[shape=Mrecord,height=0];\n")
582
583 for s in states do
584 f.write "s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|"
585 for i in s.core do
586 f.write "{i.to_s.escape_to_dot}\\l"
587 end
588 f.write("|")
589 for i in s.items do
590 if s.core.has(i) then continue
591 f.write "{i.to_s.escape_to_dot}\\l"
592 end
593 f.write "\""
594 if not s.is_lr0 then
595 var conflict = false
596 for t, a in s.guarded_reduce do
597 if a.length > 1 then
598 f.write ",color=red"
599 conflict = true
600 break
601 else if s.shifts.has(t) then
602 f.write ",color=orange"
603 conflict = true
604 break
605 end
606 end
607 if not conflict then
608 f.write ",color=blue"
609 end
610 end
611 f.write "];\n"
612 for t in s.outs do
613 f.write "s{s.number} -> s{t.to.number} [label=\"{t.elem.to_s.escape_to_dot}\"];\n"
614 end
615 end
616 f.write("\}\n")
617 f.close
618 end
619
620 # Generate the parser of the automaton
621 fun gen_to_nit(filepath: String, name: String)
622 do
623 var gen = new Generator
624 gen.gen_to_nit(self, name)
625 var f = new FileWriter.open(filepath)
626 for s in gen.out do
627 f.write(s)
628 f.write("\n")
629 end
630 f.close
631 end
632 end
633
634 private class Generator
635 var out = new Array[String]
636 fun add(s: String) do out.add(s)
637 fun gen_to_nit(autom: LRAutomaton, name: String)
638 do
639 var states = autom.states
640 var gram = autom.grammar
641
642 add "# Parser generated by nitcc for the grammar {name}"
643 add "module {name}_parser is no_warning(\"missing-doc\",\"old-init\")"
644 add "import nitcc_runtime"
645
646 add "class Parser_{name}"
647 add "\tsuper Parser"
648 add "\tredef fun start_state do return state_{states.first.cname}"
649 add "end"
650
651 for s in states do
652 add "private fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
653 end
654 for p in gram.prods do
655 add "private fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
656 end
657
658 add "redef class NToken"
659 for s in states do
660 if not s.need_guard then continue
661 add "\t# guarded action for state {s.name}"
662 add "\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)"
663 add "\tprivate fun action_s{s.cname}(parser: Parser) do"
664 if s.reduces.length != 1 then
665 add "\t\tparser.parse_error"
666 else
667 gen_reduce_to_nit(s.reduces.first)
668 end
669 add "\tend"
670 end
671 add "end"
672
673 for t in gram.tokens do
674 if t.name == "Eof" then
675 add "redef class {t.cname}"
676 else
677 add "class {t.cname}"
678 end
679 add "\tsuper NToken"
680 for s in t.shifts do
681 if not s.need_guard then continue
682 add "\tredef fun action_s{s.cname}(parser) do"
683 gen_shift_to_nit(s, t)
684 add "\tend"
685 end
686 for s in t.reduces do
687 if not s.need_guard then continue
688 if s.reduces.length <= 1 then continue
689 add "\tredef fun action_s{s.cname}(parser) do"
690 gen_reduce_to_nit(s.guarded_reduce[t].first.alt)
691 add "\tend"
692 end
693 add "\tredef fun node_name do return \"{t.name.escape_to_nit}\""
694 add "end"
695 end
696
697 add "redef class LRGoto"
698 for s in states do
699 if s.gotos.length <= 1 then continue
700 add "\tprivate fun goto_s{s.cname}(parser: Parser) do abort"
701 end
702 add "end"
703
704 for p in gram.prods do
705 add "class Goto_{p.cname}"
706 add "\tsuper LRGoto"
707 for s in p.gotos do
708 if s.gotos.length <= 1 then continue
709 add "\tredef fun goto_s{s.cname}(parser) do"
710 gen_goto_to_nit(s, p)
711 add "\tend"
712 end
713 add "end"
714 end
715
716 var ps = gram.prods.to_a
717 ps.add_all(gram.ast_prods)
718 for p in ps do
719 if p.spe == null and not p.altone then
720 if p.name.has_suffix("?") or p.name.has_suffix("+") then continue
721 add "class {p.acname}"
722 add "\tsuper NProd"
723 add "\tredef fun node_name do return \"{p.name.escape_to_nit}\""
724 add "end"
725 end
726
727 var als = p.alts.to_a
728 als.add_all(p.ast_alts)
729 for a in als do
730 if a.trans then continue
731 add "class {a.cname}"
732 if p.altone then
733 add "\tsuper NProd"
734 else
735 add "\tsuper {p.acname}"
736 end
737 add "\tredef fun node_name do return \"{a.name.escape_to_nit}\""
738 var initarg = new Array[String]
739 for i in [0..a.elems.length[ do
740 add "\tvar n_{a.elemname(i)}: {a.elems[i].acname}"
741 initarg.add("n_{a.elemname(i)}: {a.elems[i].acname}")
742 end
743 if initarg.is_empty then
744 add "\tinit do end"
745 else
746 add "\tinit({initarg.join(", ")}) do"
747 for i in [0..a.elems.length[ do
748 add "\t\tself.n_{a.elemname(i)} = n_{a.elemname(i)}"
749 end
750 add "\tend"
751 end
752 add "\tredef fun number_of_children do return {a.elems.length}"
753 add "\tredef fun child(i) do"
754 for i in [0..a.elems.length[ do
755 add "\t\tif i == {i} then return n_{a.elemname(i)}"
756 end
757 add "\t\tabort"
758 add "\tend"
759 add "end"
760 end
761 end
762
763 for s in states do
764 add "# State {s.name}"
765 add "private class LRState{s.cname}"
766 add "\tsuper LRState"
767
768 add "\tredef fun to_s do return \"{s.name.escape_to_nit}\""
769
770 var err = new Array[String]
771 for t in s.outs do
772 var e = t.elem
773 if e isa Production then err.add e.name
774 end
775 if err.is_empty then for t in s.outs do
776 var e = t.elem
777 if e isa Token then err.add e.name
778 end
779
780 add "\tredef fun error_msg do return \"{err.join(", ").escape_to_nit}\""
781
782 add "\tredef fun action(parser) do"
783 if s.need_guard then
784 add "\t\tparser.peek_token.action_s{s.cname}(parser)"
785 else if s.reduces.length == 1 then
786 gen_reduce_to_nit(s.reduces.first)
787 else
788 abort
789 end
790 add "\tend"
791
792 if not s.gotos.is_empty then
793 add "\tredef fun goto(parser, goto) do"
794 if s.gotos.length > 1 then
795 add "\t\tgoto.goto_s{s.cname}(parser)"
796 else
797 gen_goto_to_nit(s, s.gotos.first)
798 end
799 add "\tend"
800 end
801
802 add "end"
803 end
804
805
806 end
807
808 fun gen_shift_to_nit(s: LRState, t: Token)
809 do
810 var dest = s.trans(t)
811 add "\t\tparser.shift(state_{dest.cname})"
812
813 end
814
815 fun gen_goto_to_nit(s: LRState, p: Production)
816 do
817 var dest = s.trans(p)
818 add "\t\tparser.push(state_{dest.cname})"
819 end
820
821 fun gen_reduce_to_nit(alt: Alternative)
822 do
823 add "\t\t# REDUCE {alt}"
824 var i = alt.elems.length - 1
825 for e in alt.elems.to_a.reversed do
826 add "\t\tvar n{i} = parser.pop.as({e.acname})"
827 i -= 1
828 end
829
830 if alt.name.has_suffix("+_more") then
831 add "\t\tvar prod = n0"
832 add "\t\tn0.children.add(n1)"
833 else if alt.name.has_suffix("+_one") then
834 add "\t\tvar prod = new {alt.prod.acname}"
835 add "\t\tprod.children.add(n0)"
836 else
837 alt.make_codes
838 var cpt = 0
839 i = 0
840 var st = new Array[String]
841 for c in alt.codes.as(not null) do
842 if c isa CodePop then
843 st.add "n{i}"
844 i += 1
845 else if c isa CodeNull then
846 st.add "null"
847 else if c isa CodeNew then
848 var calt = c.alt
849 cpt += 1
850 var from = st.length - calt.elems.length
851 var args = new List[String]
852 for j in [from..st.length[ do
853 args.unshift(st.pop)
854 end
855
856 if args.is_empty then
857 add "\t\tvar p{cpt} = new {calt.cname}"
858 else
859 add "\t\tvar p{cpt} = new {calt.cname}({args.join(", ")})"
860 end
861 #var x = 0
862 #for j in [from..st.length[ do
863 #if st[j] == "null" then continue
864 #add "\t\tp{cpt}.n_{calt.elemname(x)} = {st[j]}"
865 #x += 1
866 #end
867 st.add("p{cpt}")
868 end
869 end
870 assert st.length == 1
871 add "\t\tvar prod = {st.first}"
872 end
873
874 add "\t\tparser.node_stack.push prod"
875 if alt.prod.accept then
876 add "\t\tparser.stop = true"
877 else
878 add "\t\tparser.goto(goto_{alt.prod.cname})"
879 end
880 end
881 end
882
883 # A state in a LR automaton
884 class LRState
885 # Name of the automaton (short part from the start)
886 var name: String
887
888 # Mangled name
889 fun cname: String do return name.to_cmangle
890
891 # Number
892 var number: Int = -1
893
894 # Set of all items
895 var items = new HashSet[Item]
896
897 # Set of items only in the core
898 var core = new HashSet[Item]
899
900 # Outgoing transitions
901 var ins = new Array[LRTransition]
902
903 # Ingoing transitions
904 var outs = new Array[LRTransition]
905
906 # Trans function
907 fun trans(e: Element): nullable LRState
908 do
909 for t in outs do if t.elem == e then return t.to
910 return null
911 end
912
913 redef fun ==(o) do return o isa LRState and core == o.core
914 redef fun hash do return items.length
915
916 redef fun to_s do return items.join(" ; ")
917
918 # Add and item in the core
919 fun add(i: Item): Bool
920 do
921 if items.has(i) then return false
922
923 items.add(i)
924 if i.pos > 0 or i.alt.prod.accept then core.add(i)
925 return true
926 end
927
928 # Recursively extends item outside the core
929 fun extends(i: Item)
930 do
931 var e = i.next
932 if e == null then return
933 if not e isa Production then return
934 for i2 in e.start_state do
935 if add(i2) then extends(i2)
936 end
937 end
938
939 # SLR lookahead
940 fun lookahead(i: Item): Set[Item]
941 do
942 return i.alt.prod.afters
943 end
944
945 # Set of all reductions
946 var reduces = new ArraySet[Alternative]
947 # Set of all shifts
948 var shifts = new ArraySet[Token]
949 # Set of all goto
950 var gotos = new ArraySet[Production]
951 # Reduction guarded by tokens
952 var guarded_reduce = new HashMap[Token, Set[Item]]
953 # Shifts guarded by tokens
954 var guarded_shift = new HashMap[Token, Set[Item]]
955
956 # Does the state need a guard to perform an action?
957 fun need_guard: Bool do return not shifts.is_empty or reduces.length > 1
958
959 # Is the state LR0?
960 fun is_lr0: Bool do return reduces.length <= 1 and shifts.is_empty or reduces.is_empty
961
962 # Compute guards and conflicts
963 fun analysis
964 do
965 # Extends the core
966 for i in items.to_a do
967 extends(i)
968 end
969
970 # Collect action and conflicts
971 for i in items do
972 var n = i.next
973 if n == null then
974 reduces.add(i.alt)
975 for i2 in lookahead(i) do
976 var t = i2.next
977 assert t isa Token
978 t.reduces.add(self)
979 if not guarded_reduce.has_key(t) then
980 guarded_reduce[t] = new ArraySet[Item]
981 end
982 guarded_reduce[t].add(i)
983 end
984 else if n isa Token then
985 shifts.add(n)
986 n.shifts.add(self)
987 if not guarded_shift.has_key(n) then
988 guarded_shift[n] = new ArraySet[Item]
989 end
990 guarded_shift[n].add(i)
991 else if n isa Production then
992 gotos.add(n)
993 n.gotos.add(self)
994 else
995 abort
996 end
997 end
998 # Token to remove as reduction guard to solve S/R conflicts
999 var removed_reduces = new Array[Token]
1000 for t, a in guarded_reduce do
1001 if a.length > 1 then
1002 print "REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1003 for i in a do print "\treduce: {i}"
1004 else if guarded_shift.has_key(t) then
1005 var ri = a.first
1006 var confs = new Array[Item]
1007 var ress = new Array[String]
1008 var g = guarded_shift[t]
1009 for si in lookahead(ri) do
1010 if si.next != t then continue
1011 if not g.has(si) then
1012 confs.add(si)
1013 continue
1014 end
1015 var p = ri.alt.prod
1016 var csi: nullable Item = null
1017 for bsi in back_expand(si) do
1018 if bsi.alt.prod == p then
1019 csi = bsi
1020 break
1021 end
1022 end
1023 if csi == null then
1024 confs.add(si)
1025 continue
1026 end
1027 ress.add "\tshift: {si}"
1028 if si != csi then
1029 ress.add "\tcore shift: {csi}"
1030 end
1031 end
1032 if confs.is_empty then
1033 print "Automatic Dangling on state {self.number} {self.name} for token {t}:"
1034 print "\treduce: {ri}"
1035 for r in ress do print r
1036 removed_reduces.add t
1037 else
1038 print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1039 print "\treduce: {ri}"
1040 for i in guarded_shift[t] do print "\tshift: {i}"
1041 removed_reduces.add t
1042 end
1043 end
1044 end
1045 for t in removed_reduces do
1046 guarded_reduce.keys.remove(t)
1047 end
1048 end
1049
1050 # Return `i` and all other items of the state that expands, directly or indirectly, to `i`
1051 fun back_expand(i: Item): Set[Item]
1052 do
1053 var res = new ArraySet[Item]
1054 var todo = [i]
1055 res.add(i)
1056 while not todo.is_empty do
1057 var x = todo.pop
1058 if x.pos > 0 then continue
1059 var p = x.alt.prod
1060 for y in items do
1061 if y.next != p then continue
1062 if res.has(y) then continue
1063 res.add(y)
1064 todo.add(y)
1065 end
1066 end
1067 return res
1068 end
1069 end
1070
1071 # A transition in a LR automaton
1072 class LRTransition
1073 # The origin state
1074 var from: LRState
1075 # The destination state
1076 var to: LRState
1077 # The element labeling the transition
1078 var elem: Element
1079 end
1080
1081 # A alternative with a cursor (dot) before an element
1082 class Item
1083 # The alternative
1084 var alt: Alternative
1085 # The dot index (0 means before the first element)
1086 var pos: Int
1087
1088 redef fun ==(o) do return o isa Item and alt == o.alt and pos == o.pos
1089 redef fun hash do return alt.hash + pos
1090
1091 redef fun to_s
1092 do
1093 var b = new FlatBuffer
1094 b.append("{alt.prod.name}::{alt.name}=")
1095 for i in [0..alt.elems.length[
1096 do
1097 if pos == i then b.append(".") else b.append(" ")
1098 b.append(alt.elems[i].to_s)
1099 end
1100 if pos == alt.elems.length then b.append(".")
1101 return b.to_s
1102 end
1103
1104 # The element that follows the dot, null if the dot is at the end
1105 fun next: nullable Element
1106 do
1107 if pos >= alt.elems.length then return null
1108 return alt.elems[pos]
1109 end
1110
1111 # SLR lookahead
1112 fun lookahead: Set[Token]
1113 do
1114 var res = new HashSet[Token]
1115 var p = pos + 1
1116 while p < alt.elems.length do
1117 var e = alt.elems[p]
1118 if e isa Token then
1119 res.add(e)
1120 break
1121 else if e isa Production then
1122 #res.add_all(e.firsts)
1123 if not e.is_nullable then break
1124 end
1125 p += 1
1126 end
1127 return res
1128 end
1129
1130 # The item that advanced once
1131 fun avance: Item
1132 do
1133 var res = new Item(alt, pos+1)
1134 return res
1135 end
1136 end