48c61955a9a82ae343eb93448c0821fe5f58f6cc
[nit.git] / contrib / nitcc / src / grammar.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # A gramar describing a language
16 class Gram
17 # The productions (non-terminal) of the conctete grammar
18 var prods = new Array[Production]
19
20 # The additionnal abstract productions of the grammar
21 # TODO clean AST
22 var ast_prods = new Array[Production]
23
24 # The tokens (terminals) of the grammar
25 var tokens = new Array[Token]
26
27 # Dump of the concrete grammar and the transformations
28 fun pretty: String
29 do
30 var res = new FlatBuffer
31 for p in prods do
32 if p.spe != null then
33 res.append("{p.name} \{-> {p.spe.name}\}=\n")
34 else
35 res.append("{p.name} =\n")
36 end
37 var last = null
38 if not p.alts.is_empty then p.alts.last
39 for a in p.alts do
40 res.append("\t\{{a.name}:\} {a.elems.join(" ")}")
41 if a.codes == null then a.make_codes
42 res.append("\n\t\t\{-> {a.codes.join(", ")}\}")
43 if a == last then res.append(" ;\n") else res.append(" |\n")
44 end
45 if p.is_nullable then res.append "\t// is nullable\n"
46 if not p.firsts.is_empty then
47 res.append "\t// firsts:\n"
48 for x in p.firsts do res.append "\t// {x}\n"
49 end
50 if not p.afters.is_empty then
51 res.append "\t// afters:\n"
52 for x in p.afters do res.append "\t// {x}\n"
53 end
54 end
55 return res.to_s
56 end
57
58 # Inline (ie. remove from the conctete grammar) some production
59 # REQUIRE: no circular production in `prods`
60 fun inline(prods: Collection[Production])
61 do
62 for p in self.prods do
63 for a in p.alts.to_a do
64 if a.phony then continue
65 var to_inline = false
66 for e in a.elems do
67 if e isa Production and prods.has(e) then to_inline = true
68 end
69 if not to_inline then continue
70
71 if a.codes == null then a.make_codes
72
73 var a0 = new Alternative(p, a.name, new Array[Element])
74 a0.trans = true
75 a0.codes = new Array[Code]
76 var pool = [a0]
77 var pool2 = new Array[Alternative]
78 for e in a.elems do
79 if not e isa Production or not prods.has(e) then
80 for x in pool do
81 x.elems.add(e)
82 x.codes.add(new CodePop)
83 end
84 continue
85 end
86 if p == e then
87 print "Circular inlining on {p} :: {a}"
88 abort
89 end
90 pool2.clear
91 for a2 in e.alts do
92 if a.phony then continue
93 if a2.codes == null then a2.make_codes
94 for x in pool do
95 var name = a.name + "_" + pool2.length.to_s
96 var na = new Alternative(p, name, new Array[Element])
97 na.trans = true
98 pool2.add(na)
99 na.elems.add_all(x.elems)
100 na.elems.add_all(a2.elems)
101 na.codes = new Array[Code]
102 na.codes.add_all(x.codes.as(not null))
103 na.codes.add_all(a2.codes.as(not null))
104 end
105 end
106 var tmp = pool
107 pool = pool2
108 pool2 = tmp
109 end
110 for x in pool do
111 x.codes.add(a.codes.last)
112 end
113 p.ast_alts.add(a)
114 p.alts.remove(a)
115 p.alts.add_all(pool)
116 end
117 end
118 for p in prods do
119 self.prods.remove(p)
120 self.ast_prods.add(p)
121 end
122 end
123
124 # Compute a LR automaton
125 fun lr0: LRAutomaton
126 do
127 var start = new Production("_start")
128 start.accept = true
129 var eof = new Token("Eof")
130 tokens.add(eof)
131 start.new_alt("Start", self.prods.first, eof)
132 prods.add(start)
133
134 analyse
135
136 var first = new LRState("Start")
137 first.number = 0
138 for i in start.start_state do first.add(i)
139
140 var automaton = new LRAutomaton(self)
141
142 var todo = new List[LRState]
143 todo.add first
144 var seen = new HashSet[LRState]
145 seen.add first
146
147 while not todo.is_empty do
148 var state = todo.shift
149
150 #print state
151 automaton.states.add(state)
152 state.analysis
153
154 var nexts = new HashMap[Element, LRState]
155 for i in state.items do
156 var e = i.next
157 if e == null then continue
158 if nexts.has_key(e) then
159 nexts[e].add(i.avance)
160 else
161 var name
162 if state == automaton.states.first then
163 name = e.to_s
164 else
165 name = "{state.name} {e}"
166 end
167 var next = new LRState(name)
168 nexts[e] = next
169 next.add(i.avance)
170 end
171 end
172
173 for e, next in nexts do
174
175 #print "#states: {seen.length}"
176
177 # Look for an existing LR0 state in the automaton
178 var new_state = true
179 for n in seen do
180 if n == next then
181 next = n
182 new_state = false
183 break
184 end
185 end
186
187 # If not, add it to the pool and the todo-list
188 if new_state then
189 next.number = seen.length
190 assert not seen.has(next)
191 seen.add(next)
192 todo.add(next)
193 end
194
195 # Add the transition
196 var t = new LRTransition(state, next, e)
197 state.outs.add t
198 next.ins.add t
199 end
200 end
201 return automaton
202 end
203
204 # Compute `nullables`, `firsts` and `afters` of productions
205 fun analyse
206 do
207 loop
208 var changed = false
209 for p in prods do
210 if p.is_nullable then continue
211 for a in p.alts do
212 if a.phony then continue
213 var nullabl = true
214 for e in a.elems do
215 if e isa Token then
216 nullabl = false
217 break
218 else if e isa Production then
219 if not e.is_nullable then nullabl = false
220 break
221 else
222 abort
223 end
224 end
225 if nullabl then
226 p.is_nullable = true
227 changed = true
228 end
229 end
230 end
231 if not changed then break
232 end
233
234 loop
235 var changed = false
236 for p in prods do
237 var fs = p.firsts
238 for a in p.alts do
239 if a.phony then continue
240 var i = a.first_item
241 loop
242 var e = i.next
243 if e == null then break
244 if e isa Token then
245 if try_add(fs, i) then changed = true
246 break
247 else if e isa Production then
248 for t in e.firsts do
249 if try_add(fs, t) then changed = true
250 end
251 if not e.is_nullable then break
252 else
253 abort
254 end
255 i = i.avance
256 end
257 end
258 end
259 if not changed then break
260 end
261
262 loop
263 var changed = false
264 for p1 in prods do
265 for a in p1.alts do
266 if a.phony then continue
267 var p0: nullable Production = null
268 var i = a.first_item
269 loop
270 var e = i.next
271 if e == null then break
272 var p = p0
273 if e isa Production then p0 = e else p0 = null
274 if p == null then
275 i = i.avance
276 continue
277 end
278
279 var afs = p.afters
280
281 if e isa Token then
282 if try_add(afs, i) then changed = true
283 else if e isa Production then
284 for t in e.firsts do
285 if try_add(afs, t) then changed = true
286 end
287 if e.is_nullable then
288 for t in e.afters do
289 if try_add(afs, t) then changed = true
290 end
291 end
292 else
293 abort
294 end
295 i = i.avance
296 end
297 if p0 != null then
298 var afs = p0.afters
299 for t in p1.afters do
300 if try_add(afs, t) then changed = true
301 end
302 end
303 end
304 end
305 if not changed then break
306 end
307 end
308
309 # used by `analyse`
310 private fun try_add(set: Set[Item], t: Item): Bool
311 do
312 var res = not set.has(t)
313 if res then set.add(t)
314 return res
315 end
316
317 end
318
319 # A production of a grammar
320 class Production
321 super Element
322 # The alternative of the production
323 var alts = new Array[Alternative]
324
325 # Additionnal alternatives in the AST
326 var ast_alts = new Array[Alternative]
327
328 # Is self the accept production
329 var accept = false
330
331 # Is self transformed to a other production for the AST
332 # FIXME: cleaup AST
333 var spe: nullable Production writable = null
334
335 # Is self contains only a single alternative (then no need for a abstract production class in the AST)
336 # FIXME cleanup AST
337 var altone writable = false
338
339 # The cname of the class in the AST
340 # FIXME: cleanup AST
341 redef fun acname do
342 if spe != null then return spe.acname
343 return super
344 end
345
346 # Is the production nullable
347 var is_nullable = false
348 # The first tokens of the production
349 var firsts = new HashSet[Item]
350 # The tokens that may follows the production (as in SLR)
351 var afters = new HashSet[Item]
352
353
354 # Crate a new empty alternative
355 fun new_alt0(name: String): Alternative
356 do
357 var a = new Alternative(self, name, new Array[Element])
358 alts.add a
359 return a
360 end
361
362 # Crate a new alternative with some elements
363 fun new_alt(name: String, elems: Element...): Alternative
364 do
365 var a = new Alternative(self, name, elems)
366 alts.add a
367 return a
368 end
369
370 # Crate a new alternative with some elements
371 fun new_alt2(name: String, elems: Array[Element]): Alternative
372 do
373 var a = new Alternative(self, name, elems)
374 alts.add a
375 return a
376 end
377
378 # Return a set of items for the production
379 fun start_state: Array[Item]
380 do
381 var res = new Array[Item]
382 for a in alts do
383 if a.phony then continue
384 res.add a.first_item
385 end
386 return res
387 end
388
389 # States in the LR automaton that has a outgoing transition on self
390 var gotos = new ArraySet[LRState]
391 end
392
393 # An alternative of a production
394 class Alternative
395 # The production
396 var prod: Production
397
398 # The name of the alternative
399 var name: String writable
400
401 # The elements of the alternative
402 var elems: Array[Element]
403
404 # The first item of the alternative
405 var first_item = new Item(self, 0)
406
407 # The name of the elements
408 var elems_names = new Array[nullable String]
409
410 # Return a name for a given element (or a number if unamed)
411 fun elemname(i: Int): String
412 do
413 if i >= elems_names.length then return "{i}"
414 var n = elems_names[i]
415 if n == null then return "{i}"
416 return n
417 end
418
419 redef fun to_s do return "{prod.name}::{name}={elems.join(" ")}"
420
421 # A mangled name
422 fun cname: String do
423 return "N{name.to_cmangle}"
424 end
425
426 # The code for the reduction
427 var codes: nullable Array[Code] writable = null
428
429 # Is the alternative transformed (ie not in the AST)
430 var trans writable = false
431
432 # Is the alternative unparsable? (ie not in the automaton)
433 var phony writable = false
434
435 # Imitialize codes with the elements
436 fun make_codes
437 do
438 if codes != null then return
439 var codes = new Array[Code]
440 self.codes = codes
441 for e in elems do
442 codes.add(new CodePop)
443 end
444 codes.add(new CodeNew(self))
445 end
446 end
447
448 # A step in the construction of the AST. used to modelize transformations
449 interface Code
450 end
451 # Get a element from the stack
452 class CodePop
453 super Code
454 redef fun to_s do return "pop"
455 end
456 # Allocate a new AST node for an alternative using the correct number of poped elements
457 class CodeNew
458 super Code
459 var alt: Alternative
460 redef fun to_s do return "New {alt.name}/{alt.elems.length}"
461 end
462 # Get null
463 class CodeNull
464 super Code
465 redef fun to_s do return "null"
466 end
467
468 # Elements of an alternative.
469 # Either a `Production` or a `Token`
470 abstract class Element
471 # The name of the element
472 var name: String
473 redef fun to_s do return name
474
475 private var acname_cache: nullable String = null
476
477 # The mangled name of the element
478 fun cname: String do return "N{name.to_cmangle}"
479
480 # the name of the class in the AST
481 fun acname: String do
482 var res = acname_cache
483 if res == null then
484 res = "N{to_s.to_cmangle}"
485 acname_cache = res
486 end
487 return res
488 end
489 fun acname=(s: String) do acname_cache = s
490 end
491
492 # A terminal element
493 class Token
494 super Element
495 # States of the LR automatio that shift on self
496 var shifts = new ArraySet[LRState]
497 # States of the LR automatio that reduce on self in the lookahead(1)
498 var reduces = new ArraySet[LRState]
499 end
500
501 #
502
503 # A LR automaton
504 class LRAutomaton
505 # The grammar of the automaton
506 var grammar: Gram
507
508 # The set of states
509 var states = new Array[LRState]
510
511 # Dump of the automaton
512 fun pretty: String
513 do
514 var res = new Array[String]
515 res.add "* LRAutomaton: {states.length} states\n"
516 for s in states do
517 res.add "s{s.number} {s.name}\n"
518 res.add "\tCORE\n"
519 for i in s.core do
520 res.add "\t\t{i}\n"
521 if i.next != null then continue
522 for i2 in s.lookahead(i) do
523 res.add "\t\t\t{i2}\n"
524 end
525 end
526 res.add "\tOTHER ITEMS\n"
527 for i in s.items do
528 if s.core.has(i) then continue
529 res.add "\t\t{i}\n"
530 if i.next != null then continue
531 for i2 in s.lookahead(i) do
532 res.add "\t\t\t{i2}\n"
533 end
534 end
535 res.add "\tTRANSITIONS {s.outs.length}\n"
536 for t in s.outs do
537 res.add "\t\t{t.elem} |-> s{t.to.number}\n"
538 end
539 res.add "\tACTIONS\n"
540 if s.is_lr0 then
541 res.add "\t\tSTATE LR0\n"
542 else
543 res.add "\t\tSTATE SLR\n"
544 for t, a in s.guarded_reduce do
545 if a.length > 1 then
546 res.add "\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
547 break
548 else if s.shifts.has(t) then
549 res.add "\t\t/!\\ SHIFT/REDUCE CONFLICT\n"
550 break
551 end
552 end
553 end
554 if not s.shifts.is_empty then
555 res.add "\t\tSHIFT {s.shifts.join(" ")}\n"
556 end
557 for r in s.reduces do
558 res.add "\t\tREDUCE {r}\n"
559 end
560 end
561 return res.to_s
562 end
563
564 # Generate a graphviz file of the automaton
565 fun to_dot(path: String)
566 do
567 var f = new OFStream.open(path)
568 f.write("digraph g \{\n")
569 f.write("rankdir=LR;\n")
570 f.write("node[shape=Mrecord,height=0];\n")
571
572 for s in states do
573 f.write "s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|"
574 for i in s.core do
575 f.write "{i.to_s.escape_to_dot}\\l"
576 end
577 f.write("|")
578 for i in s.items do
579 if s.core.has(i) then continue
580 f.write "{i.to_s.escape_to_dot}\\l"
581 end
582 f.write "\""
583 if not s.is_lr0 then
584 var conflict = false
585 for t, a in s.guarded_reduce do
586 if a.length > 1 then
587 f.write ",color=red"
588 conflict = true
589 break
590 else if s.shifts.has(t) then
591 f.write ",color=orange"
592 conflict = true
593 break
594 end
595 end
596 if not conflict then
597 f.write ",color=blue"
598 end
599 end
600 f.write "];\n"
601 for t in s.outs do
602 f.write "s{s.number} -> s{t.to.number} [label=\"{t.elem.to_s.escape_to_dot}\"];\n"
603 end
604 end
605 f.write("\}\n")
606 f.close
607 end
608
609 # Generate the parser of the automaton
610 fun gen_to_nit(filepath: String, name: String)
611 do
612 var gen = new Generator
613 gen.gen_to_nit(self, name)
614 var f = new OFStream.open(filepath)
615 for s in gen.out do
616 f.write(s)
617 f.write("\n")
618 end
619 f.close
620 end
621 end
622
623 redef class String
624 # escape string used in labels for graphviz
625 fun escape_to_dot: String
626 do
627 return escape_more_to_c("|\{\}<>")
628 end
629 end
630
631 private class Generator
632 var out = new Array[String]
633 fun add(s: String) do out.add(s)
634 fun gen_to_nit(autom: LRAutomaton, name: String)
635 do
636 var states = autom.states
637 var gram = autom.grammar
638
639 add "# Parser generated by nitcc for the grammar {name}"
640 add "import nitcc_runtime"
641
642 add "class Parser_{name}"
643 add "\tsuper Parser"
644 add "\tredef fun start_state do return state_{states.first.cname}"
645 add "end"
646
647 add "redef class Object"
648 for s in states do
649 add "\tprivate fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
650 end
651 for p in gram.prods do
652 add "\tprivate fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
653 end
654 add "end"
655
656 add "redef class NToken"
657 for s in states do
658 if not s.need_guard then continue
659 add "\t# guarded action for state {s.name}"
660 add "\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)"
661 add "\tprivate fun action_s{s.cname}(parser: Parser) do"
662 if s.reduces.length != 1 then
663 add "\t\tparser.parse_error"
664 else
665 gen_reduce_to_nit(s.reduces.first)
666 end
667 add "\tend"
668 end
669 add "end"
670
671 for t in gram.tokens do
672 if t.name == "Eof" then
673 add "redef class {t.cname}"
674 else
675 add "class {t.cname}"
676 end
677 add "\tsuper NToken"
678 for s in t.shifts do
679 if not s.need_guard then continue
680 add "\tredef fun action_s{s.cname}(parser) do"
681 gen_shift_to_nit(s, t)
682 add "\tend"
683 end
684 for s in t.reduces do
685 if not s.need_guard then continue
686 if s.reduces.length <= 1 then continue
687 add "\tredef fun action_s{s.cname}(parser) do"
688 gen_reduce_to_nit(s.guarded_reduce[t].first.alt)
689 add "\tend"
690 end
691 add "\tredef fun node_name do return \"{t.name.escape_to_nit}\""
692 add "end"
693 end
694
695 add "redef class LRGoto"
696 for s in states do
697 if s.gotos.length <= 1 then continue
698 add "\tprivate fun goto_s{s.cname}(parser: Parser) do abort"
699 end
700 add "end"
701
702 for p in gram.prods do
703 add "class Goto_{p.cname}"
704 add "\tsuper LRGoto"
705 for s in p.gotos do
706 if s.gotos.length <= 1 then continue
707 add "\tredef fun goto_s{s.cname}(parser) do"
708 gen_goto_to_nit(s, p)
709 add "\tend"
710 end
711 add "end"
712 end
713
714 var ps = gram.prods.to_a
715 ps.add_all(gram.ast_prods)
716 for p in ps do
717 if p.spe == null and not p.altone then
718 if p.name.has_suffix("?") or p.name.has_suffix("+") then continue
719 add "class {p.acname}"
720 add "\tsuper NProd"
721 add "\tredef fun node_name do return \"{p.name.escape_to_nit}\""
722 add "end"
723 end
724
725 var als = p.alts.to_a
726 als.add_all(p.ast_alts)
727 for a in als do
728 if a.trans then continue
729 add "class {a.cname}"
730 if p.altone then
731 add "\tsuper NProd"
732 else
733 add "\tsuper {p.acname}"
734 end
735 add "\tredef fun node_name do return \"{a.name.escape_to_nit}\""
736 var initarg = new Array[String]
737 for i in [0..a.elems.length[ do
738 add "\tvar n_{a.elemname(i)}: {a.elems[i].acname}"
739 initarg.add("n_{a.elemname(i)}: {a.elems[i].acname}")
740 end
741 if initarg.is_empty then
742 add "\tinit do end"
743 else
744 add "\tinit({initarg.join(", ")}) do"
745 for i in [0..a.elems.length[ do
746 add "\t\tself.n_{a.elemname(i)} = n_{a.elemname(i)}"
747 end
748 add "\tend"
749 end
750 add "\tredef fun number_of_children do return {a.elems.length}"
751 add "\tredef fun child(i) do"
752 for i in [0..a.elems.length[ do
753 add "\t\tif i == {i} then return n_{a.elemname(i)}"
754 end
755 add "\t\tabort"
756 add "\tend"
757 add "end"
758 end
759 end
760
761 for s in states do
762 add "# State {s.name}"
763 add "private class LRState{s.cname}"
764 add "\tsuper LRState"
765
766 add "\tredef fun to_s do return \"{s.name.escape_to_nit}\""
767
768 var err = new Array[String]
769 for t in s.outs do
770 var e = t.elem
771 if e isa Production then err.add e.name
772 end
773 if err.is_empty then for t in s.outs do
774 var e = t.elem
775 if e isa Token then err.add e.name
776 end
777
778 add "\tredef fun error_msg do return \"{err.join(", ").escape_to_nit}\""
779
780 add "\tredef fun action(parser) do"
781 if s.need_guard then
782 add "\t\tparser.peek_token.action_s{s.cname}(parser)"
783 else if s.reduces.length == 1 then
784 gen_reduce_to_nit(s.reduces.first)
785 else
786 abort
787 end
788 add "\tend"
789
790 if not s.gotos.is_empty then
791 add "\tredef fun goto(parser, goto) do"
792 if s.gotos.length > 1 then
793 add "\t\tgoto.goto_s{s.cname}(parser)"
794 else
795 gen_goto_to_nit(s, s.gotos.first)
796 end
797 add "\tend"
798 end
799
800 add "end"
801 end
802
803
804 end
805
806 fun gen_shift_to_nit(s: LRState, t: Token)
807 do
808 var dest = s.trans(t)
809 add "\t\tparser.shift(state_{dest.cname})"
810
811 end
812
813 fun gen_goto_to_nit(s: LRState, p: Production)
814 do
815 var dest = s.trans(p)
816 add "\t\tparser.push(state_{dest.cname})"
817 end
818
819 fun gen_reduce_to_nit(alt: Alternative)
820 do
821 add "\t\t# REDUCE {alt}"
822 var i = alt.elems.length - 1
823 for e in alt.elems.to_a.reversed do
824 add "\t\tvar n{i} = parser.pop.as({e.acname})"
825 i -= 1
826 end
827
828 if alt.name.has_suffix("+_more") then
829 add "\t\tvar prod = n0"
830 add "\t\tn0.children.add(n1)"
831 else if alt.name.has_suffix("+_one") then
832 add "\t\tvar prod = new {alt.prod.acname}"
833 add "\t\tprod.children.add(n0)"
834 else
835 alt.make_codes
836 var cpt = 0
837 i = 0
838 var st = new Array[String]
839 for c in alt.codes.as(not null) do
840 if c isa CodePop then
841 st.add "n{i}"
842 i += 1
843 else if c isa CodeNull then
844 st.add "null"
845 else if c isa CodeNew then
846 var calt = c.alt
847 cpt += 1
848 var from = st.length - calt.elems.length
849 var args = new List[String]
850 for j in [from..st.length[ do
851 args.unshift(st.pop)
852 end
853
854 if args.is_empty then
855 add "\t\tvar p{cpt} = new {calt.cname}"
856 else
857 add "\t\tvar p{cpt} = new {calt.cname}({args.join(", ")})"
858 end
859 #var x = 0
860 #for j in [from..st.length[ do
861 #if st[j] == "null" then continue
862 #add "\t\tp{cpt}.n_{calt.elemname(x)} = {st[j]}"
863 #x += 1
864 #end
865 st.add("p{cpt}")
866 end
867 end
868 assert st.length == 1
869 add "\t\tvar prod = {st.first}"
870 end
871
872 add "\t\tparser.node_stack.push prod"
873 if alt.prod.accept then
874 add "\t\tparser.stop = true"
875 else
876 add "\t\tparser.goto(goto_{alt.prod.cname})"
877 end
878 end
879 end
880
881 # A state in a LR automaton
882 class LRState
883 # name of the automaton (short part from the start)
884 var name: String
885
886 # malglen name
887 fun cname: String do return name.to_cmangle
888
889 # number
890 var number: Int = -1
891
892 # Set of all items
893 var items = new HashSet[Item]
894
895 # Set of items only in the core
896 var core = new HashSet[Item]
897
898 # Outgoing transitions
899 var ins = new Array[LRTransition]
900
901 # Ingoing transitions
902 var outs = new Array[LRTransition]
903
904 # trans function
905 fun trans(e: Element): nullable LRState
906 do
907 for t in outs do if t.elem == e then return t.to
908 return null
909 end
910
911 redef fun ==(o) do return o isa LRState and core == o.core
912 redef fun hash do return items.length
913
914 redef fun to_s do return items.join(" ; ")
915
916 # Add and item in the core
917 fun add(i: Item): Bool
918 do
919 if items.has(i) then return false
920
921 items.add(i)
922 if i.pos > 0 or i.alt.prod.accept then core.add(i)
923 return true
924 end
925
926 # Recusively extends item outside the core
927 fun extends(i: Item)
928 do
929 var e = i.next
930 if e == null then return
931 if not e isa Production then return
932 for i2 in e.start_state do
933 if add(i2) then extends(i2)
934 end
935 end
936
937 # SLR lookahead
938 fun lookahead(i: Item): Set[Item]
939 do
940 return i.alt.prod.afters
941 end
942
943 # Set of all reductions
944 var reduces = new ArraySet[Alternative]
945 # Set of all shifts
946 var shifts = new ArraySet[Token]
947 # Set of all goto
948 var gotos = new ArraySet[Production]
949 # Reduction guarded by tokens
950 var guarded_reduce = new HashMap[Token, Set[Item]]
951 # Shitfs guarded by tokens
952 var guarded_shift = new HashMap[Token, Set[Item]]
953
954 # Does the state need a guard to perform an action?
955 fun need_guard: Bool do return not shifts.is_empty or reduces.length > 1
956
957 # Is the state LR0?
958 fun is_lr0: Bool do return reduces.length <= 1 and shifts.is_empty or reduces.is_empty
959
960 # compute guards and conflicts
961 fun analysis
962 do
963 # Extends the core
964 for i in items.to_a do
965 extends(i)
966 end
967
968 # Collect action and conflicts
969 for i in items do
970 var n = i.next
971 if n == null then
972 reduces.add(i.alt)
973 for i2 in lookahead(i) do
974 var t = i2.next
975 assert t isa Token
976 t.reduces.add(self)
977 if not guarded_reduce.has_key(t) then
978 guarded_reduce[t] = new ArraySet[Item]
979 end
980 guarded_reduce[t].add(i)
981 end
982 else if n isa Token then
983 shifts.add(n)
984 n.shifts.add(self)
985 if not guarded_shift.has_key(n) then
986 guarded_shift[n] = new ArraySet[Item]
987 end
988 guarded_shift[n].add(i)
989 else if n isa Production then
990 gotos.add(n)
991 n.gotos.add(self)
992 else
993 abort
994 end
995 end
996 for t, a in guarded_reduce do
997 if a.length > 1 then
998 print "REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
999 for i in a do print "\treduce: {i}"
1000 else if guarded_shift.has_key(t) then
1001 var ri = a.first
1002 var confs = new Array[Item]
1003 var ress = new Array[String]
1004 var g = guarded_shift[t]
1005 for si in lookahead(ri) do
1006 if si.next != t then continue
1007 if not g.has(si) then
1008 confs.add(si)
1009 continue
1010 end
1011 var p = ri.alt.prod
1012 var csi: nullable Item = null
1013 for bsi in back_expand(si) do
1014 if bsi.alt.prod == p then
1015 csi = bsi
1016 break
1017 end
1018 end
1019 if csi == null then
1020 confs.add(si)
1021 continue
1022 end
1023 ress.add "\tshift: {si}"
1024 if si != csi then
1025 ress.add "\tcore shift: {csi}"
1026 end
1027 end
1028 if confs.is_empty then
1029 print "Automatic Dangling on state {self.number} {self.name} for token {t}:"
1030 print "\treduce: {ri}"
1031 for r in ress do print r
1032 guarded_reduce.keys.remove(t)
1033 else
1034 print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1035 print "\treduce: {ri}"
1036 for i in guarded_shift[t] do print "\tshift: {i}"
1037 end
1038 end
1039 end
1040 end
1041
1042 # Return `i` and all other items of the state that expands, directly or undirectly, to `i`
1043 fun back_expand(i: Item): Set[Item]
1044 do
1045 var res = new ArraySet[Item]
1046 var todo = [i]
1047 res.add(i)
1048 while not todo.is_empty do
1049 var x = todo.pop
1050 if x.pos > 0 then continue
1051 var p = x.alt.prod
1052 for y in items do
1053 if y.next != p then continue
1054 if res.has(y) then continue
1055 res.add(y)
1056 todo.add(y)
1057 end
1058 end
1059 return res
1060 end
1061 end
1062
1063 # A transition in a LR automaton
1064 class LRTransition
1065 # The origin state
1066 var from: LRState
1067 # The testination state
1068 var to: LRState
1069 # The element labeling the transition
1070 var elem: Element
1071 end
1072
1073 # A alternative with a cursor (dot) before an element
1074 class Item
1075 # The alternative
1076 var alt: Alternative
1077 # The dot index (0 means before the first element)
1078 var pos: Int
1079
1080 redef fun ==(o) do return o isa Item and alt == o.alt and pos == o.pos
1081 redef fun hash do return alt.hash + pos
1082
1083 redef fun to_s
1084 do
1085 var b = new FlatBuffer
1086 b.append("{alt.prod.name}::{alt.name}=")
1087 for i in [0..alt.elems.length[
1088 do
1089 if pos == i then b.append(".") else b.append(" ")
1090 b.append(alt.elems[i].to_s)
1091 end
1092 if pos == alt.elems.length then b.append(".")
1093 return b.to_s
1094 end
1095
1096 # The element thatr follow the dot, null if the fdot is at the end
1097 fun next: nullable Element
1098 do
1099 if pos >= alt.elems.length then return null
1100 return alt.elems[pos]
1101 end
1102
1103 # SLR loohahead
1104 fun lookahead: Set[Token]
1105 do
1106 var res = new HashSet[Token]
1107 var p = pos + 1
1108 while p < alt.elems.length do
1109 var e = alt.elems[p]
1110 if e isa Token then
1111 res.add(e)
1112 break
1113 else if e isa Production then
1114 #res.add_all(e.firsts)
1115 if not e.is_nullable then break
1116 end
1117 p += 1
1118 end
1119 return res
1120 end
1121
1122 # The item that advanced once
1123 fun avance: Item
1124 do
1125 var res = new Item(alt, pos+1)
1126 return res
1127 end
1128 end