1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # A grammar describing a language
17 # The productions (non-terminal) of the concrete grammar
18 var prods
= new Array[Production]
20 # The additional abstract productions of the grammar
22 var ast_prods
= new Array[Production]
24 # The tokens (terminals) of the grammar
25 var tokens
= new Array[Token]
27 # Dump of the concrete grammar and the transformations
30 var res
= new FlatBuffer
33 res
.append
("{p.name} \{-> {p.spe.name}\}=\n")
35 res
.append
("{p.name} =\n")
38 if not p
.alts
.is_empty
then last
= p
.alts
.last
40 res
.append
("\t\{{a.name}:\} {a.elems.join(" ")}")
41 if a
.codes
== null then a
.make_codes
42 res
.append
("\n\t\t\{-> {a.codes.join(", ")}\}")
43 if a
== last
then res
.append
(" ;\n") else res
.append
(" |\n")
45 if p
.is_nullable
then res
.append
"\t// is nullable\n"
46 if not p
.firsts
.is_empty
then
47 res
.append
"\t// firsts:\n"
48 for x
in p
.firsts
do res
.append
"\t// {x}\n"
50 if not p
.afters
.is_empty
then
51 res
.append
"\t// afters:\n"
52 for x
in p
.afters
do res
.append
"\t// {x}\n"
58 # Inline (ie. remove from the concrete grammar) some production
59 # REQUIRE: no circular production in `prods`
60 fun inline
(prods
: Collection[Production])
62 for p
in self.prods
do
63 for a
in p
.alts
.to_a
do
64 if a
.phony
then continue
67 if e
isa Production and prods
.has
(e
) then to_inline
= true
69 if not to_inline
then continue
71 if a
.codes
== null then a
.make_codes
73 var a0
= new Alternative(p
, a
.name
, new Array[Element])
75 a0
.codes
= new Array[Code]
77 var pool2
= new Array[Alternative]
79 if not e
isa Production or not prods
.has
(e
) then
82 x
.codes
.add
(new CodePop)
87 print
"Circular inlining on {p} :: {a}"
92 if a
.phony
then continue
93 if a2
.codes
== null then a2
.make_codes
95 var name
= a
.name
+ "_" + pool2
.length
.to_s
96 var na
= new Alternative(p
, name
, new Array[Element])
99 na
.elems
.add_all
(x
.elems
)
100 na
.elems
.add_all
(a2
.elems
)
101 na
.codes
= new Array[Code]
102 na
.codes
.add_all
(x
.codes
.as(not null))
103 na
.codes
.add_all
(a2
.codes
.as(not null))
111 x
.codes
.add
(a
.codes
.last
)
120 self.ast_prods
.add
(p
)
124 # Compute a LR automaton
127 var start
= new Production("_start")
129 var eof
= new Token("Eof")
131 start
.new_alt
("Start", self.prods
.first
, eof
)
136 var first
= new LRState("Start")
138 for i
in start
.start_state
do first
.add
(i
)
140 var automaton
= new LRAutomaton(self)
142 var todo
= new List[LRState]
144 var seen
= new HashSet[LRState]
147 while not todo
.is_empty
do
148 var state
= todo
.shift
151 automaton
.states
.add
(state
)
154 var nexts
= new HashMap[Element, LRState]
155 for i
in state
.items
do
157 if e
== null then continue
158 if nexts
.has_key
(e
) then
159 nexts
[e
].add
(i
.avance
)
162 if state
== automaton
.states
.first
then
165 name
= "{state.name} {e}"
167 var next
= new LRState(name
)
173 for e
, next
in nexts
do
175 #print "#states: {seen.length}"
177 # Look for an existing LR0 state in the automaton
187 # If not, add it to the pool and the todo-list
189 next
.number
= seen
.length
190 assert not seen
.has
(next
)
196 var t
= new LRTransition(state
, next
, e
)
204 # Compute `nullables`, `firsts` and `afters` of productions
210 if p
.is_nullable
then continue
212 if a
.phony
then continue
218 else if e
isa Production then
219 if not e
.is_nullable
then nullabl
= false
231 if not changed
then break
239 if a
.phony
then continue
243 if e
== null then break
245 if try_add
(fs
, i
) then changed
= true
247 else if e
isa Production then
249 if try_add
(fs
, t
) then changed
= true
251 if not e
.is_nullable
then break
259 if not changed
then break
266 if a
.phony
then continue
267 var p0
: nullable Production = null
271 if e
== null then break
273 if e
isa Production then p0
= e
else p0
= null
282 if try_add
(afs
, i
) then changed
= true
283 else if e
isa Production then
285 if try_add
(afs
, t
) then changed
= true
287 if e
.is_nullable
then
289 if try_add
(afs
, t
) then changed
= true
299 for t
in p1
.afters
do
300 if try_add
(afs
, t
) then changed
= true
305 if not changed
then break
310 private fun try_add
(set
: Set[Item], t
: Item): Bool
312 var res
= not set
.has
(t
)
313 if res
then set
.add
(t
)
319 # A production of a grammar
322 # The alternative of the production
323 var alts
= new Array[Alternative]
325 # Additional alternatives in the AST
326 var ast_alts
= new Array[Alternative]
328 # Is self the accept production
331 # Is self transformed to a other production for the AST
333 var spe
: nullable Production = null is writable
335 # Is self contains only a single alternative (then no need for a abstract production class in the AST)
337 var altone
= false is writable
339 # The cname of the class in the AST
342 if spe
!= null then return spe
.acname
346 # Is the production nullable
347 var is_nullable
= false
349 # The first tokens of the production
350 var firsts
= new HashSet[Item]
352 # The tokens that may follows the production (as in SLR)
353 var afters
= new HashSet[Item]
356 # Crate a new empty alternative
357 fun new_alt0
(name
: String): Alternative
359 var a
= new Alternative(self, name
, new Array[Element])
364 # Crate a new alternative with some elements
365 fun new_alt
(name
: String, elems
: Element...): Alternative
367 var a
= new Alternative(self, name
, elems
)
372 # Crate a new alternative with some elements
373 fun new_alt2
(name
: String, elems
: Array[Element]): Alternative
375 var a
= new Alternative(self, name
, elems
)
380 # Return a set of items for the production
381 fun start_state
: Array[Item]
383 var res
= new Array[Item]
385 if a
.phony
then continue
391 # States in the LR automaton that has a outgoing transition on self
392 var gotos
= new ArraySet[LRState]
395 # An alternative of a production
400 # The name of the alternative
401 var name
: String is writable
403 # The elements of the alternative
404 var elems
: Array[Element]
406 # The first item of the alternative
407 var first_item
= new Item(self, 0)
409 # The name of the elements
410 var elems_names
= new Array[nullable String]
412 # Return a name for a given element (or a number if unamed)
413 fun elemname
(i
: Int): String
415 if i
>= elems_names
.length
then return "{i}"
416 var n
= elems_names
[i
]
417 if n
== null then return "{i}"
421 redef fun to_s
do return "{prod.name}::{name}={elems.join(" ")}"
425 return "N{name.to_cmangle}"
428 # The code for the reduction
429 var codes
: nullable Array[Code] = null is writable
431 # Is the alternative transformed (ie not in the AST)
432 var trans
= false is writable
434 # Is the alternative unparsable? (ie not in the automaton)
435 var phony
= false is writable
437 # Initialize codes with the elements
440 if codes
!= null then return
441 var codes
= new Array[Code]
444 codes
.add
(new CodePop)
446 codes
.add
(new CodeNew(self))
450 # A step in the construction of the AST.
451 # Used to model transformations
455 # Get a element from the stack
458 redef fun to_s
do return "pop"
461 # Allocate a new AST node for an alternative using the correct number of popped elements
465 # The associated alternative
468 redef fun to_s
do return "New {alt.name}/{alt.elems.length}"
474 redef fun to_s
do return "null"
477 # Elements of an alternative.
478 # Either a `Production` or a `Token`
479 abstract class Element
480 # The name of the element
482 redef fun to_s
do return name
484 private var acname_cache
: nullable String = null
486 # The mangled name of the element
487 fun cname
: String do return "N{name.to_cmangle}"
489 # The name of the class in the AST
490 fun acname
: String do
491 var res
= acname_cache
493 res
= "N{to_s.to_cmangle}"
499 # The name of the class in the AST
500 fun acname
=(s
: String) do acname_cache
= s
506 # States of the LR automaton that shift on self
507 var shifts
= new ArraySet[LRState]
508 # States of the LR automaton that reduce on self in the lookahead(1)
509 var reduces
= new ArraySet[LRState]
516 # The grammar of the automaton
520 var states
= new Array[LRState]
522 # Dump of the automaton
525 var res
= new Array[String]
526 res
.add
"* LRAutomaton: {states.length} states\n"
528 res
.add
"s{s.number} {s.name}\n"
532 if i
.next
!= null then continue
533 for i2
in s
.lookahead
(i
) do
534 res
.add
"\t\t\t{i2}\n"
537 res
.add
"\tOTHER ITEMS\n"
539 if s
.core
.has
(i
) then continue
541 if i
.next
!= null then continue
542 for i2
in s
.lookahead
(i
) do
543 res
.add
"\t\t\t{i2}\n"
546 res
.add
"\tTRANSITIONS {s.outs.length}\n"
548 res
.add
"\t\t{t.elem} |-> s{t.to.number}\n"
550 res
.add
"\tACTIONS\n"
552 res
.add
"\t\tSTATE LR0\n"
554 res
.add
"\t\tSTATE SLR\n"
555 for t
, a
in s
.guarded_reduce
do
557 res
.add
"\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
559 else if s
.shifts
.has
(t
) then
560 res
.add
"\t\t/!\\ SHIFT/REDUCE CONFLICT\n"
565 if not s
.shifts
.is_empty
then
566 res
.add
"\t\tSHIFT {s.shifts.join(" ")}\n"
568 for r
in s
.reduces
do
569 res
.add
"\t\tREDUCE {r}\n"
575 # Generate a graphviz file of the automaton
576 fun to_dot
(path
: String)
578 var f
= new FileWriter.open
(path
)
579 f
.write
("digraph g \{\n")
580 f
.write
("rankdir=LR;\n")
581 f
.write
("node[shape=Mrecord,height=0];\n")
584 f
.write
"s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|"
586 f.write "{i.to_s.escape_to_dot}\\l
"
590 if s.core.has(i) then continue
591 f.write "{i.to_s.escape_to_dot}\\l
"
596 for t
, a
in s
.guarded_reduce
do
601 else if s
.shifts
.has
(t
) then
602 f
.write
",color=orange"
608 f
.write
",color=blue"
613 f
.write
"s{s.number} -> s{t.to.number} [label=\"{t.elem.to_s.escape_to_dot}\
"];\n"
620 # Generate the parser of the automaton
621 fun gen_to_nit
(filepath
: String, name
: String)
623 var gen
= new Generator
624 gen
.gen_to_nit
(self, name
)
625 var f
= new FileWriter.open
(filepath
)
634 private class Generator
635 var out
= new Array[String]
636 fun add
(s
: String) do out
.add
(s
)
637 fun gen_to_nit
(autom
: LRAutomaton, name
: String)
639 var states
= autom
.states
640 var gram
= autom
.grammar
642 add
"# Parser generated by nitcc for the grammar {name}"
643 add
"module {name}_parser is no_warning(\"missing-doc\
",\"old-init\
")"
644 add
"import nitcc_runtime"
646 add
"class Parser_{name}"
648 add
"\tredef fun start_state do return state_{states.first.cname}"
652 add
"private fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
654 for p
in gram
.prods
do
655 add
"private fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
657 add
"private fun reduce_{a.cname}(parser: Parser) do"
663 add
"redef class NToken"
665 if not s
.need_guard
then continue
666 add
"\t# guarded action for state {s.name}"
667 add
"\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)"
668 add
"\tprivate fun action_s{s.cname}(parser: Parser) do"
669 if s
.reduces
.length
!= 1 then
670 add
"\t\tparser.parse_error"
672 add
"\t\treduce_{s.reduces.first.cname}(parser)"
673 #gen_reduce_to_nit(s.reduces.first)
679 for t
in gram
.tokens
do
680 if t
.name
== "Eof" then
681 add
"redef class {t.cname}"
683 add
"class {t.cname}"
687 if not s
.need_guard
then continue
688 add
"\tredef fun action_s{s.cname}(parser) do"
689 gen_shift_to_nit
(s
, t
)
692 for s
in t
.reduces
do
693 if not s
.need_guard
then continue
694 if s
.reduces
.length
<= 1 then continue
695 add
"\tredef fun action_s{s.cname}(parser) do"
696 add
"\t\treduce_{s.guarded_reduce[t].first.alt.cname}(parser)"
697 #gen_reduce_to_nit(s.guarded_reduce[t].first.alt)
700 add
"\tredef fun node_name do return \"{t.name.escape_to_nit}\
""
704 add
"redef class LRGoto"
706 if s
.gotos
.length
<= 1 then continue
707 add
"\tprivate fun goto_s{s.cname}(parser: Parser) do abort"
711 for p
in gram
.prods
do
712 add
"class Goto_{p.cname}"
715 if s
.gotos
.length
<= 1 then continue
716 add
"\tredef fun goto_s{s.cname}(parser) do"
717 gen_goto_to_nit
(s
, p
)
723 var ps
= gram
.prods
.to_a
724 ps
.add_all
(gram
.ast_prods
)
726 if p
.spe
== null and not p
.altone
then
727 if p
.name
.has_suffix
("?") or p
.name
.has_suffix
("+") then continue
728 add
"class {p.acname}"
730 add
"\tredef fun node_name do return \"{p.name.escape_to_nit}\
""
734 var als
= p
.alts
.to_a
735 als
.add_all
(p
.ast_alts
)
737 if a
.trans
then continue
738 add
"class {a.cname}"
742 add
"\tsuper {p.acname}"
744 add
"\tredef fun node_name do return \"{a.name.escape_to_nit}\
""
745 var initarg
= new Array[String]
746 for i
in [0..a
.elems
.length
[ do
747 add
"\tvar n_{a.elemname(i)}: {a.elems[i].acname}"
748 initarg
.add
("n_{a.elemname(i)}: {a.elems[i].acname}")
750 if initarg
.is_empty
then
753 add
"\tinit({initarg.join(", ")}) do"
754 for i
in [0..a
.elems
.length
[ do
755 add
"\t\tself.n_{a.elemname(i)} = n_{a.elemname(i)}"
759 add
"\tredef fun number_of_children do return {a.elems.length}"
760 add
"\tredef fun child(i) do"
761 for i
in [0..a
.elems
.length
[ do
762 add
"\t\tif i == {i} then return n_{a.elemname(i)}"
771 add
"# State {s.name}"
772 add
"private class LRState{s.cname}"
773 add
"\tsuper LRState"
775 add
"\tredef fun to_s do return \"{s.name.escape_to_nit}\
""
777 var err
= new Array[String]
780 if e
isa Production then err
.add e
.name
782 if err
.is_empty
then for t
in s
.outs
do
784 if e
isa Token then err
.add e
.name
787 add
"\tredef fun error_msg do return \"{err.join(", ").escape_to_nit}\
""
789 add
"\tredef fun action(parser) do"
791 add
"\t\tparser.peek_token.action_s{s.cname}(parser)"
792 else if s
.reduces
.length
== 1 then
793 add
"\t\treduce_{s.reduces.first.cname}(parser)"
794 #gen_reduce_to_nit(s.reduces.first)
800 if not s
.gotos
.is_empty
then
801 add
"\tredef fun goto(parser, goto) do"
802 if s
.gotos
.length
> 1 then
803 add
"\t\tgoto.goto_s{s.cname}(parser)"
805 gen_goto_to_nit
(s
, s
.gotos
.first
)
816 fun gen_shift_to_nit
(s
: LRState, t
: Token)
818 var dest
= s
.trans
(t
)
819 add
"\t\tparser.shift(state_{dest.cname})"
823 fun gen_goto_to_nit
(s
: LRState, p
: Production)
825 var dest
= s
.trans
(p
)
826 add
"\t\tparser.push(state_{dest.cname})"
829 fun gen_reduce_to_nit
(alt
: Alternative)
831 add
"\t\t# REDUCE {alt}"
832 var i
= alt
.elems
.length
- 1
833 for e
in alt
.elems
.to_a
.reversed
do
834 add
"\t\tvar n{i} = parser.pop.as({e.acname})"
838 if alt
.name
.has_suffix
("+_more") then
839 add
"\t\tvar prod = n0"
840 add
"\t\tn0.children.add(n1)"
841 else if alt
.name
.has_suffix
("+_one") then
842 add
"\t\tvar prod = new {alt.prod.acname}"
843 add
"\t\tprod.children.add(n0)"
848 var st
= new Array[String]
849 for c
in alt
.codes
.as(not null) do
850 if c
isa CodePop then
853 else if c
isa CodeNull then
855 else if c
isa CodeNew then
858 var from
= st
.length
- calt
.elems
.length
859 var args
= new List[String]
860 for j
in [from
..st
.length
[ do
864 if args
.is_empty
then
865 add
"\t\tvar p{cpt} = new {calt.cname}"
867 add
"\t\tvar p{cpt} = new {calt.cname}({args.join(", ")})"
870 #for j in [from..st.length[ do
871 #if st[j] == "null" then continue
872 #add "\t\tp{cpt}.n_{calt.elemname(x)} = {st[j]}"
878 assert st
.length
== 1
879 add
"\t\tvar prod = {st.first}"
882 add
"\t\tparser.node_stack.push prod"
883 if alt
.prod
.accept
then
884 add
"\t\tparser.stop = true"
886 add
"\t\tparser.goto(goto_{alt.prod.cname})"
891 # A state in a LR automaton
893 # Name of the automaton (short part from the start)
897 fun cname
: String do return name
.to_cmangle
903 var items
= new HashSet[Item]
905 # Set of items only in the core
906 var core
= new HashSet[Item]
908 # Outgoing transitions
909 var ins
= new Array[LRTransition]
911 # Ingoing transitions
912 var outs
= new Array[LRTransition]
915 fun trans
(e
: Element): nullable LRState
917 for t
in outs
do if t
.elem
== e
then return t
.to
921 redef fun ==(o
) do return o
isa LRState and core
== o
.core
922 redef fun hash
do return items
.length
924 redef fun to_s
do return items
.join
(" ; ")
926 # Add and item in the core
927 fun add
(i
: Item): Bool
929 if items
.has
(i
) then return false
932 if i
.pos
> 0 or i
.alt
.prod
.accept
then core
.add
(i
)
936 # Recursively extends item outside the core
940 if e
== null then return
941 if not e
isa Production then return
942 for i2
in e
.start_state
do
943 if add
(i2
) then extends
(i2
)
948 fun lookahead
(i
: Item): Set[Item]
950 return i
.alt
.prod
.afters
953 # Set of all reductions
954 var reduces
= new ArraySet[Alternative]
956 var shifts
= new ArraySet[Token]
958 var gotos
= new ArraySet[Production]
959 # Reduction guarded by tokens
960 var guarded_reduce
= new HashMap[Token, Set[Item]]
961 # Shifts guarded by tokens
962 var guarded_shift
= new HashMap[Token, Set[Item]]
964 # Does the state need a guard to perform an action?
965 fun need_guard
: Bool do return not shifts
.is_empty
or reduces
.length
> 1
968 fun is_lr0
: Bool do return reduces
.length
<= 1 and shifts
.is_empty
or reduces
.is_empty
970 # Compute guards and conflicts
974 for i
in items
.to_a
do
978 # Collect action and conflicts
983 for i2
in lookahead
(i
) do
987 if not guarded_reduce
.has_key
(t
) then
988 guarded_reduce
[t
] = new ArraySet[Item]
990 guarded_reduce
[t
].add
(i
)
992 else if n
isa Token then
995 if not guarded_shift
.has_key
(n
) then
996 guarded_shift
[n
] = new ArraySet[Item]
998 guarded_shift
[n
].add
(i
)
999 else if n
isa Production then
1006 # Token to remove as reduction guard to solve S/R conflicts
1007 var removed_reduces
= new Array[Token]
1008 for t
, a
in guarded_reduce
do
1009 if a
.length
> 1 then
1010 print
"REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1011 for i
in a
do print
"\treduce: {i}"
1013 if guarded_shift
.has_key
(t
) then
1015 var confs
= new Array[Item]
1016 var ress
= new Array[String]
1017 var g
= guarded_shift
[t
]
1018 for si
in lookahead
(ri
) do
1019 if si
.next
!= t
then continue
1020 if not g
.has
(si
) then
1025 var csi
: nullable Item = null
1026 for bsi
in back_expand
(si
) do
1027 if bsi
.alt
.prod
== p
then
1036 ress
.add
"\tshift: {si}"
1038 ress
.add
"\tcore shift: {csi}"
1041 if confs
.is_empty
then
1042 print
"Automatic Dangling on state {self.number} {self.name} for token {t}:"
1043 print
"\treduce: {ri}"
1044 for r
in ress
do print r
1045 removed_reduces
.add t
1047 print
"SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1048 print
"\treduce: {ri}"
1049 for i
in guarded_shift
[t
] do print
"\tshift: {i}"
1050 removed_reduces
.add t
1054 for t
in removed_reduces
do
1055 guarded_reduce
.keys
.remove
(t
)
1056 t
.reduces
.remove
(self)
1060 # Return `i` and all other items of the state that expands, directly or indirectly, to `i`
1061 fun back_expand
(i
: Item): Set[Item]
1063 var res
= new ArraySet[Item]
1066 while not todo
.is_empty
do
1068 if x
.pos
> 0 then continue
1071 if y
.next
!= p
then continue
1072 if res
.has
(y
) then continue
1081 # A transition in a LR automaton
1085 # The destination state
1087 # The element labeling the transition
1091 # A alternative with a cursor (dot) before an element
1094 var alt
: Alternative
1095 # The dot index (0 means before the first element)
1098 redef fun ==(o
) do return o
isa Item and alt
== o
.alt
and pos
== o
.pos
1099 redef fun hash
do return alt
.hash
+ pos
1103 var b
= new FlatBuffer
1104 b
.append
("{alt.prod.name}::{alt.name}=")
1105 for i
in [0..alt
.elems
.length
[
1107 if pos
== i
then b
.append
(".") else b
.append
(" ")
1108 b
.append
(alt
.elems
[i
].to_s
)
1110 if pos
== alt
.elems
.length
then b
.append
(".")
1114 # The element that follows the dot, null if the dot is at the end
1115 fun next
: nullable Element
1117 if pos
>= alt
.elems
.length
then return null
1118 return alt
.elems
[pos
]
1122 fun lookahead
: Set[Token]
1124 var res
= new HashSet[Token]
1126 while p
< alt
.elems
.length
do
1127 var e
= alt
.elems
[p
]
1131 else if e
isa Production then
1132 #res.add_all(e.firsts)
1133 if not e
.is_nullable
then break
1140 # The item that advanced once
1143 var res
= new Item(alt
, pos
+1)