1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # A grammar describing a language
17 # The productions (non-terminal) of the concrete grammar
18 var prods
= new Array[Production]
20 # The additional abstract productions of the grammar
22 var ast_prods
= new Array[Production]
24 # The tokens (terminals) of the grammar
25 var tokens
= new Array[Token]
27 # Dump of the concrete grammar and the transformations
30 var res
= new FlatBuffer
33 res
.append
("{p.name} \{-> {p.spe.name}\}=\n")
35 res
.append
("{p.name} =\n")
38 if not p
.alts
.is_empty
then last
= p
.alts
.last
40 res
.append
("\t\{{a.name}:\} {a.elems.join(" ")}")
41 if a
.codes
== null then a
.make_codes
42 res
.append
("\n\t\t\{-> {a.codes.join(", ")}\}")
43 if a
== last
then res
.append
(" ;\n") else res
.append
(" |\n")
45 if p
.is_nullable
then res
.append
"\t// is nullable\n"
46 if not p
.firsts
.is_empty
then
47 res
.append
"\t// firsts:\n"
48 for x
in p
.firsts
do res
.append
"\t// {x}\n"
50 if not p
.afters
.is_empty
then
51 res
.append
"\t// afters:\n"
52 for x
in p
.afters
do res
.append
"\t// {x}\n"
58 # Inline (ie. remove from the concrete grammar) some production
59 # REQUIRE: no circular production in `prods`
60 fun inline
(prods
: Collection[Production])
62 for p
in self.prods
do
63 for a
in p
.alts
.to_a
do
64 if a
.phony
then continue
67 if e
isa Production and prods
.has
(e
) then to_inline
= true
69 if not to_inline
then continue
71 if a
.codes
== null then a
.make_codes
73 var a0
= new Alternative(p
, a
.name
, new Array[Element])
75 a0
.codes
= new Array[Code]
77 var pool2
= new Array[Alternative]
79 if not e
isa Production or not prods
.has
(e
) then
82 x
.codes
.add
(new CodePop)
87 print
"Circular inlining on {p} :: {a}"
92 if a
.phony
then continue
93 if a2
.codes
== null then a2
.make_codes
95 var name
= a
.name
+ "_" + pool2
.length
.to_s
96 var na
= new Alternative(p
, name
, new Array[Element])
99 na
.elems
.add_all
(x
.elems
)
100 na
.elems
.add_all
(a2
.elems
)
101 na
.codes
= new Array[Code]
102 na
.codes
.add_all
(x
.codes
.as(not null))
103 na
.codes
.add_all
(a2
.codes
.as(not null))
111 x
.codes
.add
(a
.codes
.last
)
120 self.ast_prods
.add
(p
)
124 # Compute a LR automaton
127 var start
= new Production("_start")
129 var eof
= new Token("Eof")
131 start
.new_alt
("Start", self.prods
.first
, eof
)
136 var first
= new LRState("Start")
138 for i
in start
.start_state
do first
.add
(i
)
140 var automaton
= new LRAutomaton(self)
142 var todo
= new List[LRState]
144 var seen
= new HashSet[LRState]
147 while not todo
.is_empty
do
148 var state
= todo
.shift
151 automaton
.states
.add
(state
)
154 var nexts
= new HashMap[Element, LRState]
155 for i
in state
.items
do
157 if e
== null then continue
158 if nexts
.has_key
(e
) then
159 nexts
[e
].add
(i
.avance
)
162 if state
== automaton
.states
.first
then
165 name
= "{state.name} {e}"
167 var next
= new LRState(name
)
173 for e
, next
in nexts
do
175 #print "#states: {seen.length}"
177 # Look for an existing LR0 state in the automaton
187 # If not, add it to the pool and the todo-list
189 next
.number
= seen
.length
190 assert not seen
.has
(next
)
196 var t
= new LRTransition(state
, next
, e
)
204 # Compute `nullables`, `firsts` and `afters` of productions
210 if p
.is_nullable
then continue
212 if a
.phony
then continue
218 else if e
isa Production then
219 if not e
.is_nullable
then nullabl
= false
231 if not changed
then break
239 if a
.phony
then continue
243 if e
== null then break
245 if try_add
(fs
, i
) then changed
= true
247 else if e
isa Production then
249 if try_add
(fs
, t
) then changed
= true
251 if not e
.is_nullable
then break
259 if not changed
then break
266 if a
.phony
then continue
267 var p0
: nullable Production = null
271 if e
== null then break
273 if e
isa Production then p0
= e
else p0
= null
282 if try_add
(afs
, i
) then changed
= true
283 else if e
isa Production then
285 if try_add
(afs
, t
) then changed
= true
287 if e
.is_nullable
then
289 if try_add
(afs
, t
) then changed
= true
299 for t
in p1
.afters
do
300 if try_add
(afs
, t
) then changed
= true
305 if not changed
then break
310 private fun try_add
(set
: Set[Item], t
: Item): Bool
312 var res
= not set
.has
(t
)
313 if res
then set
.add
(t
)
319 # A production of a grammar
322 # The alternative of the production
323 var alts
= new Array[Alternative]
325 # Additional alternatives in the AST
326 var ast_alts
= new Array[Alternative]
328 # Is self the accept production
331 # Is self transformed to a other production for the AST
333 var spe
: nullable Production = null is writable
335 # Is self contains only a single alternative (then no need for a abstract production class in the AST)
337 var altone
= false is writable
339 # The cname of the class in the AST
342 if spe
!= null then return spe
.acname
346 # Is the production nullable
347 var is_nullable
= false
349 # The first tokens of the production
350 var firsts
= new HashSet[Item]
352 # The tokens that may follows the production (as in SLR)
353 var afters
= new HashSet[Item]
356 # Crate a new empty alternative
357 fun new_alt0
(name
: String): Alternative
359 var a
= new Alternative(self, name
, new Array[Element])
364 # Crate a new alternative with some elements
365 fun new_alt
(name
: String, elems
: Element...): Alternative
367 var a
= new Alternative(self, name
, elems
)
372 # Crate a new alternative with some elements
373 fun new_alt2
(name
: String, elems
: Array[Element]): Alternative
375 var a
= new Alternative(self, name
, elems
)
380 # Return a set of items for the production
381 fun start_state
: Array[Item]
383 var res
= new Array[Item]
385 if a
.phony
then continue
391 # States in the LR automaton that has a outgoing transition on self
392 var gotos
= new ArraySet[LRState]
395 # An alternative of a production
400 # The name of the alternative
401 var name
: String is writable
403 # The elements of the alternative
404 var elems
: Array[Element]
406 # The first item of the alternative
407 var first_item
= new Item(self, 0)
409 # The name of the elements
410 var elems_names
= new Array[nullable String]
412 # Return a name for a given element (or a number if unamed)
413 fun elemname
(i
: Int): String
415 if i
>= elems_names
.length
then return "{i}"
416 var n
= elems_names
[i
]
417 if n
== null then return "{i}"
421 redef fun to_s
do return "{prod.name}::{name}={elems.join(" ")}"
425 return "N{name.to_cmangle}"
428 # The code for the reduction
429 var codes
: nullable Array[Code] = null is writable
431 # Is the alternative transformed (ie not in the AST)
432 var trans
= false is writable
434 # Is the alternative unparsable? (ie not in the automaton)
435 var phony
= false is writable
437 # Initialize codes with the elements
440 if codes
!= null then return
441 var codes
= new Array[Code]
444 codes
.add
(new CodePop)
446 codes
.add
(new CodeNew(self))
450 # A step in the construction of the AST.
451 # Used to model transformations
455 # Get a element from the stack
458 redef fun to_s
do return "pop"
461 # Allocate a new AST node for an alternative using the correct number of popped elements
465 # The associated alternative
468 redef fun to_s
do return "New {alt.name}/{alt.elems.length}"
474 redef fun to_s
do return "null"
477 # Elements of an alternative.
478 # Either a `Production` or a `Token`
479 abstract class Element
480 # The name of the element
482 redef fun to_s
do return name
484 private var acname_cache
: nullable String = null
486 # The mangled name of the element
487 fun cname
: String do return "N{name.to_cmangle}"
489 # The name of the class in the AST
490 fun acname
: String do
491 var res
= acname_cache
493 res
= "N{to_s.to_cmangle}"
499 # The name of the class in the AST
500 fun acname
=(s
: String) do acname_cache
= s
506 # States of the LR automaton that shift on self
507 var shifts
= new ArraySet[LRState]
508 # States of the LR automaton that reduce on self in the lookahead(1)
509 var reduces
= new ArraySet[LRState]
516 # The grammar of the automaton
520 var states
= new Array[LRState]
522 # Dump of the automaton
525 var res
= new Array[String]
526 res
.add
"* LRAutomaton: {states.length} states\n"
528 res
.add
"s{s.number} {s.name}\n"
532 if i
.next
!= null then continue
533 for i2
in s
.lookahead
(i
) do
534 res
.add
"\t\t\t{i2}\n"
537 res
.add
"\tOTHER ITEMS\n"
539 if s
.core
.has
(i
) then continue
541 if i
.next
!= null then continue
542 for i2
in s
.lookahead
(i
) do
543 res
.add
"\t\t\t{i2}\n"
546 res
.add
"\tTRANSITIONS {s.outs.length}\n"
548 res
.add
"\t\t{t.elem} |-> s{t.to.number}\n"
550 res
.add
"\tACTIONS\n"
552 res
.add
"\t\tSTATE LR0\n"
554 res
.add
"\t\tSTATE SLR\n"
555 for t
, a
in s
.guarded_reduce
do
557 res
.add
"\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
559 else if s
.shifts
.has
(t
) then
560 res
.add
"\t\t/!\\ SHIFT/REDUCE CONFLICT\n"
565 if not s
.shifts
.is_empty
then
566 res
.add
"\t\tSHIFT {s.shifts.join(" ")}\n"
568 for r
in s
.reduces
do
569 res
.add
"\t\tREDUCE {r}\n"
575 # Generate a graphviz file of the automaton
576 fun to_dot
(path
: String)
578 var f
= new FileWriter.open
(path
)
579 f
.write
("digraph g \{\n")
580 f
.write
("rankdir=LR;\n")
581 f
.write
("node[shape=Mrecord,height=0];\n")
584 f
.write
"s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|"
586 f.write "{i.to_s.escape_to_dot}\\l
"
590 if s.core.has(i) then continue
591 f.write "{i.to_s.escape_to_dot}\\l
"
596 for t
, a
in s
.guarded_reduce
do
601 else if s
.shifts
.has
(t
) then
602 f
.write
",color=orange"
608 f
.write
",color=blue"
613 f
.write
"s{s.number} -> s{t.to.number} [label=\"{t.elem.to_s.escape_to_dot}\
"];\n"
620 # Generate the parser of the automaton
621 fun gen_to_nit
(filepath
: String, name
: String)
623 var gen
= new Generator
624 gen
.gen_to_nit
(self, name
)
625 var f
= new FileWriter.open
(filepath
)
634 private class Generator
635 var out
= new Array[String]
636 fun add
(s
: String) do out
.add
(s
)
637 fun gen_to_nit
(autom
: LRAutomaton, name
: String)
639 var states
= autom
.states
640 var gram
= autom
.grammar
642 add
"# Parser generated by nitcc for the grammar {name}"
643 add
"module {name}_parser is no_warning(\"missing-doc\
",\"old-init\
")"
644 add
"import nitcc_runtime"
646 add
"class Parser_{name}"
648 add
"\tredef fun start_state do return state_{states.first.cname}"
651 add
"redef class Object"
653 add
"\tprivate fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
655 for p
in gram
.prods
do
656 add
"\tprivate fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
660 add
"redef class NToken"
662 if not s
.need_guard
then continue
663 add
"\t# guarded action for state {s.name}"
664 add
"\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)"
665 add
"\tprivate fun action_s{s.cname}(parser: Parser) do"
666 if s
.reduces
.length
!= 1 then
667 add
"\t\tparser.parse_error"
669 gen_reduce_to_nit
(s
.reduces
.first
)
675 for t
in gram
.tokens
do
676 if t
.name
== "Eof" then
677 add
"redef class {t.cname}"
679 add
"class {t.cname}"
683 if not s
.need_guard
then continue
684 add
"\tredef fun action_s{s.cname}(parser) do"
685 gen_shift_to_nit
(s
, t
)
688 for s
in t
.reduces
do
689 if not s
.need_guard
then continue
690 if s
.reduces
.length
<= 1 then continue
691 add
"\tredef fun action_s{s.cname}(parser) do"
692 gen_reduce_to_nit
(s
.guarded_reduce
[t
].first
.alt
)
695 add
"\tredef fun node_name do return \"{t.name.escape_to_nit}\
""
699 add
"redef class LRGoto"
701 if s
.gotos
.length
<= 1 then continue
702 add
"\tprivate fun goto_s{s.cname}(parser: Parser) do abort"
706 for p
in gram
.prods
do
707 add
"class Goto_{p.cname}"
710 if s
.gotos
.length
<= 1 then continue
711 add
"\tredef fun goto_s{s.cname}(parser) do"
712 gen_goto_to_nit
(s
, p
)
718 var ps
= gram
.prods
.to_a
719 ps
.add_all
(gram
.ast_prods
)
721 if p
.spe
== null and not p
.altone
then
722 if p
.name
.has_suffix
("?") or p
.name
.has_suffix
("+") then continue
723 add
"class {p.acname}"
725 add
"\tredef fun node_name do return \"{p.name.escape_to_nit}\
""
729 var als
= p
.alts
.to_a
730 als
.add_all
(p
.ast_alts
)
732 if a
.trans
then continue
733 add
"class {a.cname}"
737 add
"\tsuper {p.acname}"
739 add
"\tredef fun node_name do return \"{a.name.escape_to_nit}\
""
740 var initarg
= new Array[String]
741 for i
in [0..a
.elems
.length
[ do
742 add
"\tvar n_{a.elemname(i)}: {a.elems[i].acname}"
743 initarg
.add
("n_{a.elemname(i)}: {a.elems[i].acname}")
745 if initarg
.is_empty
then
748 add
"\tinit({initarg.join(", ")}) do"
749 for i
in [0..a
.elems
.length
[ do
750 add
"\t\tself.n_{a.elemname(i)} = n_{a.elemname(i)}"
754 add
"\tredef fun number_of_children do return {a.elems.length}"
755 add
"\tredef fun child(i) do"
756 for i
in [0..a
.elems
.length
[ do
757 add
"\t\tif i == {i} then return n_{a.elemname(i)}"
766 add
"# State {s.name}"
767 add
"private class LRState{s.cname}"
768 add
"\tsuper LRState"
770 add
"\tredef fun to_s do return \"{s.name.escape_to_nit}\
""
772 var err
= new Array[String]
775 if e
isa Production then err
.add e
.name
777 if err
.is_empty
then for t
in s
.outs
do
779 if e
isa Token then err
.add e
.name
782 add
"\tredef fun error_msg do return \"{err.join(", ").escape_to_nit}\
""
784 add
"\tredef fun action(parser) do"
786 add
"\t\tparser.peek_token.action_s{s.cname}(parser)"
787 else if s
.reduces
.length
== 1 then
788 gen_reduce_to_nit
(s
.reduces
.first
)
794 if not s
.gotos
.is_empty
then
795 add
"\tredef fun goto(parser, goto) do"
796 if s
.gotos
.length
> 1 then
797 add
"\t\tgoto.goto_s{s.cname}(parser)"
799 gen_goto_to_nit
(s
, s
.gotos
.first
)
810 fun gen_shift_to_nit
(s
: LRState, t
: Token)
812 var dest
= s
.trans
(t
)
813 add
"\t\tparser.shift(state_{dest.cname})"
817 fun gen_goto_to_nit
(s
: LRState, p
: Production)
819 var dest
= s
.trans
(p
)
820 add
"\t\tparser.push(state_{dest.cname})"
823 fun gen_reduce_to_nit
(alt
: Alternative)
825 add
"\t\t# REDUCE {alt}"
826 var i
= alt
.elems
.length
- 1
827 for e
in alt
.elems
.to_a
.reversed
do
828 add
"\t\tvar n{i} = parser.pop.as({e.acname})"
832 if alt
.name
.has_suffix
("+_more") then
833 add
"\t\tvar prod = n0"
834 add
"\t\tn0.children.add(n1)"
835 else if alt
.name
.has_suffix
("+_one") then
836 add
"\t\tvar prod = new {alt.prod.acname}"
837 add
"\t\tprod.children.add(n0)"
842 var st
= new Array[String]
843 for c
in alt
.codes
.as(not null) do
844 if c
isa CodePop then
847 else if c
isa CodeNull then
849 else if c
isa CodeNew then
852 var from
= st
.length
- calt
.elems
.length
853 var args
= new List[String]
854 for j
in [from
..st
.length
[ do
858 if args
.is_empty
then
859 add
"\t\tvar p{cpt} = new {calt.cname}"
861 add
"\t\tvar p{cpt} = new {calt.cname}({args.join(", ")})"
864 #for j in [from..st.length[ do
865 #if st[j] == "null" then continue
866 #add "\t\tp{cpt}.n_{calt.elemname(x)} = {st[j]}"
872 assert st
.length
== 1
873 add
"\t\tvar prod = {st.first}"
876 add
"\t\tparser.node_stack.push prod"
877 if alt
.prod
.accept
then
878 add
"\t\tparser.stop = true"
880 add
"\t\tparser.goto(goto_{alt.prod.cname})"
885 # A state in a LR automaton
887 # Name of the automaton (short part from the start)
891 fun cname
: String do return name
.to_cmangle
897 var items
= new HashSet[Item]
899 # Set of items only in the core
900 var core
= new HashSet[Item]
902 # Outgoing transitions
903 var ins
= new Array[LRTransition]
905 # Ingoing transitions
906 var outs
= new Array[LRTransition]
909 fun trans
(e
: Element): nullable LRState
911 for t
in outs
do if t
.elem
== e
then return t
.to
915 redef fun ==(o
) do return o
isa LRState and core
== o
.core
916 redef fun hash
do return items
.length
918 redef fun to_s
do return items
.join
(" ; ")
920 # Add and item in the core
921 fun add
(i
: Item): Bool
923 if items
.has
(i
) then return false
926 if i
.pos
> 0 or i
.alt
.prod
.accept
then core
.add
(i
)
930 # Recursively extends item outside the core
934 if e
== null then return
935 if not e
isa Production then return
936 for i2
in e
.start_state
do
937 if add
(i2
) then extends
(i2
)
942 fun lookahead
(i
: Item): Set[Item]
944 return i
.alt
.prod
.afters
947 # Set of all reductions
948 var reduces
= new ArraySet[Alternative]
950 var shifts
= new ArraySet[Token]
952 var gotos
= new ArraySet[Production]
953 # Reduction guarded by tokens
954 var guarded_reduce
= new HashMap[Token, Set[Item]]
955 # Shifts guarded by tokens
956 var guarded_shift
= new HashMap[Token, Set[Item]]
958 # Does the state need a guard to perform an action?
959 fun need_guard
: Bool do return not shifts
.is_empty
or reduces
.length
> 1
962 fun is_lr0
: Bool do return reduces
.length
<= 1 and shifts
.is_empty
or reduces
.is_empty
964 # Compute guards and conflicts
968 for i
in items
.to_a
do
972 # Collect action and conflicts
977 for i2
in lookahead
(i
) do
981 if not guarded_reduce
.has_key
(t
) then
982 guarded_reduce
[t
] = new ArraySet[Item]
984 guarded_reduce
[t
].add
(i
)
986 else if n
isa Token then
989 if not guarded_shift
.has_key
(n
) then
990 guarded_shift
[n
] = new ArraySet[Item]
992 guarded_shift
[n
].add
(i
)
993 else if n
isa Production then
1000 for t
, a
in guarded_reduce
do
1001 if a
.length
> 1 then
1002 print
"REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1003 for i
in a
do print
"\treduce: {i}"
1004 else if guarded_shift
.has_key
(t
) then
1006 var confs
= new Array[Item]
1007 var ress
= new Array[String]
1008 var g
= guarded_shift
[t
]
1009 for si
in lookahead
(ri
) do
1010 if si
.next
!= t
then continue
1011 if not g
.has
(si
) then
1016 var csi
: nullable Item = null
1017 for bsi
in back_expand
(si
) do
1018 if bsi
.alt
.prod
== p
then
1027 ress
.add
"\tshift: {si}"
1029 ress
.add
"\tcore shift: {csi}"
1032 if confs
.is_empty
then
1033 print
"Automatic Dangling on state {self.number} {self.name} for token {t}:"
1034 print
"\treduce: {ri}"
1035 for r
in ress
do print r
1036 guarded_reduce
.keys
.remove
(t
)
1038 print
"SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1039 print
"\treduce: {ri}"
1040 for i
in guarded_shift
[t
] do print
"\tshift: {i}"
1046 # Return `i` and all other items of the state that expands, directly or indirectly, to `i`
1047 fun back_expand
(i
: Item): Set[Item]
1049 var res
= new ArraySet[Item]
1052 while not todo
.is_empty
do
1054 if x
.pos
> 0 then continue
1057 if y
.next
!= p
then continue
1058 if res
.has
(y
) then continue
1067 # A transition in a LR automaton
1071 # The destination state
1073 # The element labeling the transition
1077 # A alternative with a cursor (dot) before an element
1080 var alt
: Alternative
1081 # The dot index (0 means before the first element)
1084 redef fun ==(o
) do return o
isa Item and alt
== o
.alt
and pos
== o
.pos
1085 redef fun hash
do return alt
.hash
+ pos
1089 var b
= new FlatBuffer
1090 b
.append
("{alt.prod.name}::{alt.name}=")
1091 for i
in [0..alt
.elems
.length
[
1093 if pos
== i
then b
.append
(".") else b
.append
(" ")
1094 b
.append
(alt
.elems
[i
].to_s
)
1096 if pos
== alt
.elems
.length
then b
.append
(".")
1100 # The element that follows the dot, null if the dot is at the end
1101 fun next
: nullable Element
1103 if pos
>= alt
.elems
.length
then return null
1104 return alt
.elems
[pos
]
1108 fun lookahead
: Set[Token]
1110 var res
= new HashSet[Token]
1112 while p
< alt
.elems
.length
do
1113 var e
= alt
.elems
[p
]
1117 else if e
isa Production then
1118 #res.add_all(e.firsts)
1119 if not e
.is_nullable
then break
1126 # The item that advanced once
1129 var res
= new Item(alt
, pos
+1)