lib/standard/string: Moved escape_to_dot from nitcc to standard/string.nit
[nit.git] / contrib / nitcc / src / grammar.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # A gramar describing a language
16 class Gram
17 # The productions (non-terminal) of the conctete grammar
18 var prods = new Array[Production]
19
20 # The additionnal abstract productions of the grammar
21 # TODO clean AST
22 var ast_prods = new Array[Production]
23
24 # The tokens (terminals) of the grammar
25 var tokens = new Array[Token]
26
27 # Dump of the concrete grammar and the transformations
28 fun pretty: String
29 do
30 var res = new FlatBuffer
31 for p in prods do
32 if p.spe != null then
33 res.append("{p.name} \{-> {p.spe.name}\}=\n")
34 else
35 res.append("{p.name} =\n")
36 end
37 var last = null
38 if not p.alts.is_empty then p.alts.last
39 for a in p.alts do
40 res.append("\t\{{a.name}:\} {a.elems.join(" ")}")
41 if a.codes == null then a.make_codes
42 res.append("\n\t\t\{-> {a.codes.join(", ")}\}")
43 if a == last then res.append(" ;\n") else res.append(" |\n")
44 end
45 if p.is_nullable then res.append "\t// is nullable\n"
46 if not p.firsts.is_empty then
47 res.append "\t// firsts:\n"
48 for x in p.firsts do res.append "\t// {x}\n"
49 end
50 if not p.afters.is_empty then
51 res.append "\t// afters:\n"
52 for x in p.afters do res.append "\t// {x}\n"
53 end
54 end
55 return res.to_s
56 end
57
58 # Inline (ie. remove from the conctete grammar) some production
59 # REQUIRE: no circular production in `prods`
60 fun inline(prods: Collection[Production])
61 do
62 for p in self.prods do
63 for a in p.alts.to_a do
64 if a.phony then continue
65 var to_inline = false
66 for e in a.elems do
67 if e isa Production and prods.has(e) then to_inline = true
68 end
69 if not to_inline then continue
70
71 if a.codes == null then a.make_codes
72
73 var a0 = new Alternative(p, a.name, new Array[Element])
74 a0.trans = true
75 a0.codes = new Array[Code]
76 var pool = [a0]
77 var pool2 = new Array[Alternative]
78 for e in a.elems do
79 if not e isa Production or not prods.has(e) then
80 for x in pool do
81 x.elems.add(e)
82 x.codes.add(new CodePop)
83 end
84 continue
85 end
86 if p == e then
87 print "Circular inlining on {p} :: {a}"
88 abort
89 end
90 pool2.clear
91 for a2 in e.alts do
92 if a.phony then continue
93 if a2.codes == null then a2.make_codes
94 for x in pool do
95 var name = a.name + "_" + pool2.length.to_s
96 var na = new Alternative(p, name, new Array[Element])
97 na.trans = true
98 pool2.add(na)
99 na.elems.add_all(x.elems)
100 na.elems.add_all(a2.elems)
101 na.codes = new Array[Code]
102 na.codes.add_all(x.codes.as(not null))
103 na.codes.add_all(a2.codes.as(not null))
104 end
105 end
106 var tmp = pool
107 pool = pool2
108 pool2 = tmp
109 end
110 for x in pool do
111 x.codes.add(a.codes.last)
112 end
113 p.ast_alts.add(a)
114 p.alts.remove(a)
115 p.alts.add_all(pool)
116 end
117 end
118 for p in prods do
119 self.prods.remove(p)
120 self.ast_prods.add(p)
121 end
122 end
123
124 # Compute a LR automaton
125 fun lr0: LRAutomaton
126 do
127 var start = new Production("_start")
128 start.accept = true
129 var eof = new Token("Eof")
130 tokens.add(eof)
131 start.new_alt("Start", self.prods.first, eof)
132 prods.add(start)
133
134 analyse
135
136 var first = new LRState("Start")
137 first.number = 0
138 for i in start.start_state do first.add(i)
139
140 var automaton = new LRAutomaton(self)
141
142 var todo = new List[LRState]
143 todo.add first
144 var seen = new HashSet[LRState]
145 seen.add first
146
147 while not todo.is_empty do
148 var state = todo.shift
149
150 #print state
151 automaton.states.add(state)
152 state.analysis
153
154 var nexts = new HashMap[Element, LRState]
155 for i in state.items do
156 var e = i.next
157 if e == null then continue
158 if nexts.has_key(e) then
159 nexts[e].add(i.avance)
160 else
161 var name
162 if state == automaton.states.first then
163 name = e.to_s
164 else
165 name = "{state.name} {e}"
166 end
167 var next = new LRState(name)
168 nexts[e] = next
169 next.add(i.avance)
170 end
171 end
172
173 for e, next in nexts do
174
175 #print "#states: {seen.length}"
176
177 # Look for an existing LR0 state in the automaton
178 var new_state = true
179 for n in seen do
180 if n == next then
181 next = n
182 new_state = false
183 break
184 end
185 end
186
187 # If not, add it to the pool and the todo-list
188 if new_state then
189 next.number = seen.length
190 assert not seen.has(next)
191 seen.add(next)
192 todo.add(next)
193 end
194
195 # Add the transition
196 var t = new LRTransition(state, next, e)
197 state.outs.add t
198 next.ins.add t
199 end
200 end
201 return automaton
202 end
203
204 # Compute `nullables`, `firsts` and `afters` of productions
205 fun analyse
206 do
207 loop
208 var changed = false
209 for p in prods do
210 if p.is_nullable then continue
211 for a in p.alts do
212 if a.phony then continue
213 var nullabl = true
214 for e in a.elems do
215 if e isa Token then
216 nullabl = false
217 break
218 else if e isa Production then
219 if not e.is_nullable then nullabl = false
220 break
221 else
222 abort
223 end
224 end
225 if nullabl then
226 p.is_nullable = true
227 changed = true
228 end
229 end
230 end
231 if not changed then break
232 end
233
234 loop
235 var changed = false
236 for p in prods do
237 var fs = p.firsts
238 for a in p.alts do
239 if a.phony then continue
240 var i = a.first_item
241 loop
242 var e = i.next
243 if e == null then break
244 if e isa Token then
245 if try_add(fs, i) then changed = true
246 break
247 else if e isa Production then
248 for t in e.firsts do
249 if try_add(fs, t) then changed = true
250 end
251 if not e.is_nullable then break
252 else
253 abort
254 end
255 i = i.avance
256 end
257 end
258 end
259 if not changed then break
260 end
261
262 loop
263 var changed = false
264 for p1 in prods do
265 for a in p1.alts do
266 if a.phony then continue
267 var p0: nullable Production = null
268 var i = a.first_item
269 loop
270 var e = i.next
271 if e == null then break
272 var p = p0
273 if e isa Production then p0 = e else p0 = null
274 if p == null then
275 i = i.avance
276 continue
277 end
278
279 var afs = p.afters
280
281 if e isa Token then
282 if try_add(afs, i) then changed = true
283 else if e isa Production then
284 for t in e.firsts do
285 if try_add(afs, t) then changed = true
286 end
287 if e.is_nullable then
288 for t in e.afters do
289 if try_add(afs, t) then changed = true
290 end
291 end
292 else
293 abort
294 end
295 i = i.avance
296 end
297 if p0 != null then
298 var afs = p0.afters
299 for t in p1.afters do
300 if try_add(afs, t) then changed = true
301 end
302 end
303 end
304 end
305 if not changed then break
306 end
307 end
308
309 # used by `analyse`
310 private fun try_add(set: Set[Item], t: Item): Bool
311 do
312 var res = not set.has(t)
313 if res then set.add(t)
314 return res
315 end
316
317 end
318
319 # A production of a grammar
320 class Production
321 super Element
322 # The alternative of the production
323 var alts = new Array[Alternative]
324
325 # Additionnal alternatives in the AST
326 var ast_alts = new Array[Alternative]
327
328 # Is self the accept production
329 var accept = false
330
331 # Is self transformed to a other production for the AST
332 # FIXME: cleaup AST
333 var spe: nullable Production = null is writable
334
335 # Is self contains only a single alternative (then no need for a abstract production class in the AST)
336 # FIXME cleanup AST
337 var altone = false is writable
338
339 # The cname of the class in the AST
340 # FIXME: cleanup AST
341 redef fun acname do
342 if spe != null then return spe.acname
343 return super
344 end
345
346 # Is the production nullable
347 var is_nullable = false
348 # The first tokens of the production
349 var firsts = new HashSet[Item]
350 # The tokens that may follows the production (as in SLR)
351 var afters = new HashSet[Item]
352
353
354 # Crate a new empty alternative
355 fun new_alt0(name: String): Alternative
356 do
357 var a = new Alternative(self, name, new Array[Element])
358 alts.add a
359 return a
360 end
361
362 # Crate a new alternative with some elements
363 fun new_alt(name: String, elems: Element...): Alternative
364 do
365 var a = new Alternative(self, name, elems)
366 alts.add a
367 return a
368 end
369
370 # Crate a new alternative with some elements
371 fun new_alt2(name: String, elems: Array[Element]): Alternative
372 do
373 var a = new Alternative(self, name, elems)
374 alts.add a
375 return a
376 end
377
378 # Return a set of items for the production
379 fun start_state: Array[Item]
380 do
381 var res = new Array[Item]
382 for a in alts do
383 if a.phony then continue
384 res.add a.first_item
385 end
386 return res
387 end
388
389 # States in the LR automaton that has a outgoing transition on self
390 var gotos = new ArraySet[LRState]
391 end
392
393 # An alternative of a production
394 class Alternative
395 # The production
396 var prod: Production
397
398 # The name of the alternative
399 var name: String is writable
400
401 # The elements of the alternative
402 var elems: Array[Element]
403
404 # The first item of the alternative
405 var first_item = new Item(self, 0)
406
407 # The name of the elements
408 var elems_names = new Array[nullable String]
409
410 # Return a name for a given element (or a number if unamed)
411 fun elemname(i: Int): String
412 do
413 if i >= elems_names.length then return "{i}"
414 var n = elems_names[i]
415 if n == null then return "{i}"
416 return n
417 end
418
419 redef fun to_s do return "{prod.name}::{name}={elems.join(" ")}"
420
421 # A mangled name
422 fun cname: String do
423 return "N{name.to_cmangle}"
424 end
425
426 # The code for the reduction
427 var codes: nullable Array[Code] = null is writable
428
429 # Is the alternative transformed (ie not in the AST)
430 var trans = false is writable
431
432 # Is the alternative unparsable? (ie not in the automaton)
433 var phony = false is writable
434
435 # Imitialize codes with the elements
436 fun make_codes
437 do
438 if codes != null then return
439 var codes = new Array[Code]
440 self.codes = codes
441 for e in elems do
442 codes.add(new CodePop)
443 end
444 codes.add(new CodeNew(self))
445 end
446 end
447
448 # A step in the construction of the AST. used to modelize transformations
449 interface Code
450 end
451 # Get a element from the stack
452 class CodePop
453 super Code
454 redef fun to_s do return "pop"
455 end
456 # Allocate a new AST node for an alternative using the correct number of poped elements
457 class CodeNew
458 super Code
459 var alt: Alternative
460 redef fun to_s do return "New {alt.name}/{alt.elems.length}"
461 end
462 # Get null
463 class CodeNull
464 super Code
465 redef fun to_s do return "null"
466 end
467
468 # Elements of an alternative.
469 # Either a `Production` or a `Token`
470 abstract class Element
471 # The name of the element
472 var name: String
473 redef fun to_s do return name
474
475 private var acname_cache: nullable String = null
476
477 # The mangled name of the element
478 fun cname: String do return "N{name.to_cmangle}"
479
480 # the name of the class in the AST
481 fun acname: String do
482 var res = acname_cache
483 if res == null then
484 res = "N{to_s.to_cmangle}"
485 acname_cache = res
486 end
487 return res
488 end
489 fun acname=(s: String) do acname_cache = s
490 end
491
492 # A terminal element
493 class Token
494 super Element
495 # States of the LR automatio that shift on self
496 var shifts = new ArraySet[LRState]
497 # States of the LR automatio that reduce on self in the lookahead(1)
498 var reduces = new ArraySet[LRState]
499 end
500
501 #
502
503 # A LR automaton
504 class LRAutomaton
505 # The grammar of the automaton
506 var grammar: Gram
507
508 # The set of states
509 var states = new Array[LRState]
510
511 # Dump of the automaton
512 fun pretty: String
513 do
514 var res = new Array[String]
515 res.add "* LRAutomaton: {states.length} states\n"
516 for s in states do
517 res.add "s{s.number} {s.name}\n"
518 res.add "\tCORE\n"
519 for i in s.core do
520 res.add "\t\t{i}\n"
521 if i.next != null then continue
522 for i2 in s.lookahead(i) do
523 res.add "\t\t\t{i2}\n"
524 end
525 end
526 res.add "\tOTHER ITEMS\n"
527 for i in s.items do
528 if s.core.has(i) then continue
529 res.add "\t\t{i}\n"
530 if i.next != null then continue
531 for i2 in s.lookahead(i) do
532 res.add "\t\t\t{i2}\n"
533 end
534 end
535 res.add "\tTRANSITIONS {s.outs.length}\n"
536 for t in s.outs do
537 res.add "\t\t{t.elem} |-> s{t.to.number}\n"
538 end
539 res.add "\tACTIONS\n"
540 if s.is_lr0 then
541 res.add "\t\tSTATE LR0\n"
542 else
543 res.add "\t\tSTATE SLR\n"
544 for t, a in s.guarded_reduce do
545 if a.length > 1 then
546 res.add "\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
547 break
548 else if s.shifts.has(t) then
549 res.add "\t\t/!\\ SHIFT/REDUCE CONFLICT\n"
550 break
551 end
552 end
553 end
554 if not s.shifts.is_empty then
555 res.add "\t\tSHIFT {s.shifts.join(" ")}\n"
556 end
557 for r in s.reduces do
558 res.add "\t\tREDUCE {r}\n"
559 end
560 end
561 return res.to_s
562 end
563
564 # Generate a graphviz file of the automaton
565 fun to_dot(path: String)
566 do
567 var f = new OFStream.open(path)
568 f.write("digraph g \{\n")
569 f.write("rankdir=LR;\n")
570 f.write("node[shape=Mrecord,height=0];\n")
571
572 for s in states do
573 f.write "s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|"
574 for i in s.core do
575 f.write "{i.to_s.escape_to_dot}\\l"
576 end
577 f.write("|")
578 for i in s.items do
579 if s.core.has(i) then continue
580 f.write "{i.to_s.escape_to_dot}\\l"
581 end
582 f.write "\""
583 if not s.is_lr0 then
584 var conflict = false
585 for t, a in s.guarded_reduce do
586 if a.length > 1 then
587 f.write ",color=red"
588 conflict = true
589 break
590 else if s.shifts.has(t) then
591 f.write ",color=orange"
592 conflict = true
593 break
594 end
595 end
596 if not conflict then
597 f.write ",color=blue"
598 end
599 end
600 f.write "];\n"
601 for t in s.outs do
602 f.write "s{s.number} -> s{t.to.number} [label=\"{t.elem.to_s.escape_to_dot}\"];\n"
603 end
604 end
605 f.write("\}\n")
606 f.close
607 end
608
609 # Generate the parser of the automaton
610 fun gen_to_nit(filepath: String, name: String)
611 do
612 var gen = new Generator
613 gen.gen_to_nit(self, name)
614 var f = new OFStream.open(filepath)
615 for s in gen.out do
616 f.write(s)
617 f.write("\n")
618 end
619 f.close
620 end
621 end
622
623 private class Generator
624 var out = new Array[String]
625 fun add(s: String) do out.add(s)
626 fun gen_to_nit(autom: LRAutomaton, name: String)
627 do
628 var states = autom.states
629 var gram = autom.grammar
630
631 add "# Parser generated by nitcc for the grammar {name}"
632 add "import nitcc_runtime"
633
634 add "class Parser_{name}"
635 add "\tsuper Parser"
636 add "\tredef fun start_state do return state_{states.first.cname}"
637 add "end"
638
639 add "redef class Object"
640 for s in states do
641 add "\tprivate fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
642 end
643 for p in gram.prods do
644 add "\tprivate fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
645 end
646 add "end"
647
648 add "redef class NToken"
649 for s in states do
650 if not s.need_guard then continue
651 add "\t# guarded action for state {s.name}"
652 add "\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)"
653 add "\tprivate fun action_s{s.cname}(parser: Parser) do"
654 if s.reduces.length != 1 then
655 add "\t\tparser.parse_error"
656 else
657 gen_reduce_to_nit(s.reduces.first)
658 end
659 add "\tend"
660 end
661 add "end"
662
663 for t in gram.tokens do
664 if t.name == "Eof" then
665 add "redef class {t.cname}"
666 else
667 add "class {t.cname}"
668 end
669 add "\tsuper NToken"
670 for s in t.shifts do
671 if not s.need_guard then continue
672 add "\tredef fun action_s{s.cname}(parser) do"
673 gen_shift_to_nit(s, t)
674 add "\tend"
675 end
676 for s in t.reduces do
677 if not s.need_guard then continue
678 if s.reduces.length <= 1 then continue
679 add "\tredef fun action_s{s.cname}(parser) do"
680 gen_reduce_to_nit(s.guarded_reduce[t].first.alt)
681 add "\tend"
682 end
683 add "\tredef fun node_name do return \"{t.name.escape_to_nit}\""
684 add "end"
685 end
686
687 add "redef class LRGoto"
688 for s in states do
689 if s.gotos.length <= 1 then continue
690 add "\tprivate fun goto_s{s.cname}(parser: Parser) do abort"
691 end
692 add "end"
693
694 for p in gram.prods do
695 add "class Goto_{p.cname}"
696 add "\tsuper LRGoto"
697 for s in p.gotos do
698 if s.gotos.length <= 1 then continue
699 add "\tredef fun goto_s{s.cname}(parser) do"
700 gen_goto_to_nit(s, p)
701 add "\tend"
702 end
703 add "end"
704 end
705
706 var ps = gram.prods.to_a
707 ps.add_all(gram.ast_prods)
708 for p in ps do
709 if p.spe == null and not p.altone then
710 if p.name.has_suffix("?") or p.name.has_suffix("+") then continue
711 add "class {p.acname}"
712 add "\tsuper NProd"
713 add "\tredef fun node_name do return \"{p.name.escape_to_nit}\""
714 add "end"
715 end
716
717 var als = p.alts.to_a
718 als.add_all(p.ast_alts)
719 for a in als do
720 if a.trans then continue
721 add "class {a.cname}"
722 if p.altone then
723 add "\tsuper NProd"
724 else
725 add "\tsuper {p.acname}"
726 end
727 add "\tredef fun node_name do return \"{a.name.escape_to_nit}\""
728 var initarg = new Array[String]
729 for i in [0..a.elems.length[ do
730 add "\tvar n_{a.elemname(i)}: {a.elems[i].acname}"
731 initarg.add("n_{a.elemname(i)}: {a.elems[i].acname}")
732 end
733 if initarg.is_empty then
734 add "\tinit do end"
735 else
736 add "\tinit({initarg.join(", ")}) do"
737 for i in [0..a.elems.length[ do
738 add "\t\tself.n_{a.elemname(i)} = n_{a.elemname(i)}"
739 end
740 add "\tend"
741 end
742 add "\tredef fun number_of_children do return {a.elems.length}"
743 add "\tredef fun child(i) do"
744 for i in [0..a.elems.length[ do
745 add "\t\tif i == {i} then return n_{a.elemname(i)}"
746 end
747 add "\t\tabort"
748 add "\tend"
749 add "end"
750 end
751 end
752
753 for s in states do
754 add "# State {s.name}"
755 add "private class LRState{s.cname}"
756 add "\tsuper LRState"
757
758 add "\tredef fun to_s do return \"{s.name.escape_to_nit}\""
759
760 var err = new Array[String]
761 for t in s.outs do
762 var e = t.elem
763 if e isa Production then err.add e.name
764 end
765 if err.is_empty then for t in s.outs do
766 var e = t.elem
767 if e isa Token then err.add e.name
768 end
769
770 add "\tredef fun error_msg do return \"{err.join(", ").escape_to_nit}\""
771
772 add "\tredef fun action(parser) do"
773 if s.need_guard then
774 add "\t\tparser.peek_token.action_s{s.cname}(parser)"
775 else if s.reduces.length == 1 then
776 gen_reduce_to_nit(s.reduces.first)
777 else
778 abort
779 end
780 add "\tend"
781
782 if not s.gotos.is_empty then
783 add "\tredef fun goto(parser, goto) do"
784 if s.gotos.length > 1 then
785 add "\t\tgoto.goto_s{s.cname}(parser)"
786 else
787 gen_goto_to_nit(s, s.gotos.first)
788 end
789 add "\tend"
790 end
791
792 add "end"
793 end
794
795
796 end
797
798 fun gen_shift_to_nit(s: LRState, t: Token)
799 do
800 var dest = s.trans(t)
801 add "\t\tparser.shift(state_{dest.cname})"
802
803 end
804
805 fun gen_goto_to_nit(s: LRState, p: Production)
806 do
807 var dest = s.trans(p)
808 add "\t\tparser.push(state_{dest.cname})"
809 end
810
811 fun gen_reduce_to_nit(alt: Alternative)
812 do
813 add "\t\t# REDUCE {alt}"
814 var i = alt.elems.length - 1
815 for e in alt.elems.to_a.reversed do
816 add "\t\tvar n{i} = parser.pop.as({e.acname})"
817 i -= 1
818 end
819
820 if alt.name.has_suffix("+_more") then
821 add "\t\tvar prod = n0"
822 add "\t\tn0.children.add(n1)"
823 else if alt.name.has_suffix("+_one") then
824 add "\t\tvar prod = new {alt.prod.acname}"
825 add "\t\tprod.children.add(n0)"
826 else
827 alt.make_codes
828 var cpt = 0
829 i = 0
830 var st = new Array[String]
831 for c in alt.codes.as(not null) do
832 if c isa CodePop then
833 st.add "n{i}"
834 i += 1
835 else if c isa CodeNull then
836 st.add "null"
837 else if c isa CodeNew then
838 var calt = c.alt
839 cpt += 1
840 var from = st.length - calt.elems.length
841 var args = new List[String]
842 for j in [from..st.length[ do
843 args.unshift(st.pop)
844 end
845
846 if args.is_empty then
847 add "\t\tvar p{cpt} = new {calt.cname}"
848 else
849 add "\t\tvar p{cpt} = new {calt.cname}({args.join(", ")})"
850 end
851 #var x = 0
852 #for j in [from..st.length[ do
853 #if st[j] == "null" then continue
854 #add "\t\tp{cpt}.n_{calt.elemname(x)} = {st[j]}"
855 #x += 1
856 #end
857 st.add("p{cpt}")
858 end
859 end
860 assert st.length == 1
861 add "\t\tvar prod = {st.first}"
862 end
863
864 add "\t\tparser.node_stack.push prod"
865 if alt.prod.accept then
866 add "\t\tparser.stop = true"
867 else
868 add "\t\tparser.goto(goto_{alt.prod.cname})"
869 end
870 end
871 end
872
873 # A state in a LR automaton
874 class LRState
875 # name of the automaton (short part from the start)
876 var name: String
877
878 # malglen name
879 fun cname: String do return name.to_cmangle
880
881 # number
882 var number: Int = -1
883
884 # Set of all items
885 var items = new HashSet[Item]
886
887 # Set of items only in the core
888 var core = new HashSet[Item]
889
890 # Outgoing transitions
891 var ins = new Array[LRTransition]
892
893 # Ingoing transitions
894 var outs = new Array[LRTransition]
895
896 # trans function
897 fun trans(e: Element): nullable LRState
898 do
899 for t in outs do if t.elem == e then return t.to
900 return null
901 end
902
903 redef fun ==(o) do return o isa LRState and core == o.core
904 redef fun hash do return items.length
905
906 redef fun to_s do return items.join(" ; ")
907
908 # Add and item in the core
909 fun add(i: Item): Bool
910 do
911 if items.has(i) then return false
912
913 items.add(i)
914 if i.pos > 0 or i.alt.prod.accept then core.add(i)
915 return true
916 end
917
918 # Recusively extends item outside the core
919 fun extends(i: Item)
920 do
921 var e = i.next
922 if e == null then return
923 if not e isa Production then return
924 for i2 in e.start_state do
925 if add(i2) then extends(i2)
926 end
927 end
928
929 # SLR lookahead
930 fun lookahead(i: Item): Set[Item]
931 do
932 return i.alt.prod.afters
933 end
934
935 # Set of all reductions
936 var reduces = new ArraySet[Alternative]
937 # Set of all shifts
938 var shifts = new ArraySet[Token]
939 # Set of all goto
940 var gotos = new ArraySet[Production]
941 # Reduction guarded by tokens
942 var guarded_reduce = new HashMap[Token, Set[Item]]
943 # Shitfs guarded by tokens
944 var guarded_shift = new HashMap[Token, Set[Item]]
945
946 # Does the state need a guard to perform an action?
947 fun need_guard: Bool do return not shifts.is_empty or reduces.length > 1
948
949 # Is the state LR0?
950 fun is_lr0: Bool do return reduces.length <= 1 and shifts.is_empty or reduces.is_empty
951
952 # compute guards and conflicts
953 fun analysis
954 do
955 # Extends the core
956 for i in items.to_a do
957 extends(i)
958 end
959
960 # Collect action and conflicts
961 for i in items do
962 var n = i.next
963 if n == null then
964 reduces.add(i.alt)
965 for i2 in lookahead(i) do
966 var t = i2.next
967 assert t isa Token
968 t.reduces.add(self)
969 if not guarded_reduce.has_key(t) then
970 guarded_reduce[t] = new ArraySet[Item]
971 end
972 guarded_reduce[t].add(i)
973 end
974 else if n isa Token then
975 shifts.add(n)
976 n.shifts.add(self)
977 if not guarded_shift.has_key(n) then
978 guarded_shift[n] = new ArraySet[Item]
979 end
980 guarded_shift[n].add(i)
981 else if n isa Production then
982 gotos.add(n)
983 n.gotos.add(self)
984 else
985 abort
986 end
987 end
988 for t, a in guarded_reduce do
989 if a.length > 1 then
990 print "REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
991 for i in a do print "\treduce: {i}"
992 else if guarded_shift.has_key(t) then
993 var ri = a.first
994 var confs = new Array[Item]
995 var ress = new Array[String]
996 var g = guarded_shift[t]
997 for si in lookahead(ri) do
998 if si.next != t then continue
999 if not g.has(si) then
1000 confs.add(si)
1001 continue
1002 end
1003 var p = ri.alt.prod
1004 var csi: nullable Item = null
1005 for bsi in back_expand(si) do
1006 if bsi.alt.prod == p then
1007 csi = bsi
1008 break
1009 end
1010 end
1011 if csi == null then
1012 confs.add(si)
1013 continue
1014 end
1015 ress.add "\tshift: {si}"
1016 if si != csi then
1017 ress.add "\tcore shift: {csi}"
1018 end
1019 end
1020 if confs.is_empty then
1021 print "Automatic Dangling on state {self.number} {self.name} for token {t}:"
1022 print "\treduce: {ri}"
1023 for r in ress do print r
1024 guarded_reduce.keys.remove(t)
1025 else
1026 print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1027 print "\treduce: {ri}"
1028 for i in guarded_shift[t] do print "\tshift: {i}"
1029 end
1030 end
1031 end
1032 end
1033
1034 # Return `i` and all other items of the state that expands, directly or undirectly, to `i`
1035 fun back_expand(i: Item): Set[Item]
1036 do
1037 var res = new ArraySet[Item]
1038 var todo = [i]
1039 res.add(i)
1040 while not todo.is_empty do
1041 var x = todo.pop
1042 if x.pos > 0 then continue
1043 var p = x.alt.prod
1044 for y in items do
1045 if y.next != p then continue
1046 if res.has(y) then continue
1047 res.add(y)
1048 todo.add(y)
1049 end
1050 end
1051 return res
1052 end
1053 end
1054
1055 # A transition in a LR automaton
1056 class LRTransition
1057 # The origin state
1058 var from: LRState
1059 # The testination state
1060 var to: LRState
1061 # The element labeling the transition
1062 var elem: Element
1063 end
1064
1065 # A alternative with a cursor (dot) before an element
1066 class Item
1067 # The alternative
1068 var alt: Alternative
1069 # The dot index (0 means before the first element)
1070 var pos: Int
1071
1072 redef fun ==(o) do return o isa Item and alt == o.alt and pos == o.pos
1073 redef fun hash do return alt.hash + pos
1074
1075 redef fun to_s
1076 do
1077 var b = new FlatBuffer
1078 b.append("{alt.prod.name}::{alt.name}=")
1079 for i in [0..alt.elems.length[
1080 do
1081 if pos == i then b.append(".") else b.append(" ")
1082 b.append(alt.elems[i].to_s)
1083 end
1084 if pos == alt.elems.length then b.append(".")
1085 return b.to_s
1086 end
1087
1088 # The element thatr follow the dot, null if the fdot is at the end
1089 fun next: nullable Element
1090 do
1091 if pos >= alt.elems.length then return null
1092 return alt.elems[pos]
1093 end
1094
1095 # SLR loohahead
1096 fun lookahead: Set[Token]
1097 do
1098 var res = new HashSet[Token]
1099 var p = pos + 1
1100 while p < alt.elems.length do
1101 var e = alt.elems[p]
1102 if e isa Token then
1103 res.add(e)
1104 break
1105 else if e isa Production then
1106 #res.add_all(e.firsts)
1107 if not e.is_nullable then break
1108 end
1109 p += 1
1110 end
1111 return res
1112 end
1113
1114 # The item that advanced once
1115 fun avance: Item
1116 do
1117 var res = new Item(alt, pos+1)
1118 return res
1119 end
1120 end