b3b2c6b94baa52c678fab79956ac3f401c7268d1
1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # A grammar describing a language
17 # The productions (non-terminal) of the concrete grammar
18 var prods
= new Array[Production]
20 # The additional abstract productions of the grammar
22 var ast_prods
= new Array[Production]
24 # The tokens (terminals) of the grammar
25 var tokens
= new Array[Token]
27 # Dump of the concrete grammar and the transformations
30 var res
= new FlatBuffer
33 res
.append
("{p.name} \{-> {p.spe.name}\}=\n")
35 res
.append
("{p.name} =\n")
38 if not p
.alts
.is_empty
then last
= p
.alts
.last
40 res
.append
("\t\{{a.name}:\} {a.elems.join(" ")}")
41 if a
.codes
== null then a
.make_codes
42 res
.append
("\n\t\t\{-> {a.codes.join(", ")}\}")
43 if a
== last
then res
.append
(" ;\n") else res
.append
(" |\n")
45 if p
.is_nullable
then res
.append
"\t// is nullable\n"
46 if not p
.firsts
.is_empty
then
47 res
.append
"\t// firsts:\n"
48 for x
in p
.firsts
do res
.append
"\t// {x}\n"
50 if not p
.afters
.is_empty
then
51 res
.append
"\t// afters:\n"
52 for x
in p
.afters
do res
.append
"\t// {x}\n"
58 # Inline (ie. remove from the concrete grammar) some production
59 # REQUIRE: no circular production in `prods`
60 fun inline
(prods
: Collection[Production])
62 for p
in self.prods
do
63 for a
in p
.alts
.to_a
do
64 if a
.phony
then continue
67 if e
isa Production and prods
.has
(e
) then to_inline
= true
69 if not to_inline
then continue
71 if a
.codes
== null then a
.make_codes
73 var a0
= new Alternative(p
, a
.name
, new Array[Element])
75 a0
.codes
= new Array[Code]
77 var pool2
= new Array[Alternative]
79 if not e
isa Production or not prods
.has
(e
) then
82 x
.codes
.add
(new CodePop)
87 print
"Circular inlining on {p} :: {a}"
92 if a
.phony
then continue
93 if a2
.codes
== null then a2
.make_codes
95 var name
= a
.name
+ "_" + pool2
.length
.to_s
96 var na
= new Alternative(p
, name
, new Array[Element])
99 na
.elems
.add_all
(x
.elems
)
100 na
.elems
.add_all
(a2
.elems
)
101 na
.codes
= new Array[Code]
102 na
.codes
.add_all
(x
.codes
.as(not null))
103 na
.codes
.add_all
(a2
.codes
.as(not null))
111 x
.codes
.add
(a
.codes
.last
)
120 self.ast_prods
.add
(p
)
124 # Compute a LR automaton
127 var start
= new Production("_start")
129 var eof
= new Token("Eof")
131 start
.new_alt
("Start", self.prods
.first
, eof
)
136 var first
= new LRState("Start")
138 for i
in start
.start_state
do first
.add
(i
)
140 var automaton
= new LRAutomaton(self)
142 var todo
= new List[LRState]
144 var seen
= new HashSet[LRState]
147 while not todo
.is_empty
do
148 var state
= todo
.shift
151 automaton
.states
.add
(state
)
154 var nexts
= new HashMap[Element, LRState]
155 for i
in state
.items
do
157 if e
== null then continue
158 if nexts
.has_key
(e
) then
159 nexts
[e
].add
(i
.avance
)
162 if state
== automaton
.states
.first
then
165 name
= "{state.name} {e}"
167 var next
= new LRState(name
)
173 for e
, next
in nexts
do
175 #print "#states: {seen.length}"
177 # Look for an existing LR0 state in the automaton
187 # If not, add it to the pool and the todo-list
189 next
.number
= seen
.length
190 assert not seen
.has
(next
)
196 var t
= new LRTransition(state
, next
, e
)
204 # Compute `nullables`, `firsts` and `afters` of productions
210 if p
.is_nullable
then continue
212 if a
.phony
then continue
218 else if e
isa Production then
219 if not e
.is_nullable
then nullabl
= false
231 if not changed
then break
239 if a
.phony
then continue
243 if e
== null then break
245 if try_add
(fs
, i
) then changed
= true
247 else if e
isa Production then
249 if try_add
(fs
, t
) then changed
= true
251 if not e
.is_nullable
then break
259 if not changed
then break
266 if a
.phony
then continue
267 var p0
: nullable Production = null
271 if e
== null then break
273 if e
isa Production then p0
= e
else p0
= null
282 if try_add
(afs
, i
) then changed
= true
283 else if e
isa Production then
285 if try_add
(afs
, t
) then changed
= true
287 if e
.is_nullable
then
289 if try_add
(afs
, t
) then changed
= true
299 for t
in p1
.afters
do
300 if try_add
(afs
, t
) then changed
= true
305 if not changed
then break
310 private fun try_add
(set
: Set[Item], t
: Item): Bool
312 var res
= not set
.has
(t
)
313 if res
then set
.add
(t
)
319 # A production of a grammar
322 # The alternative of the production
323 var alts
= new Array[Alternative]
325 # Additional alternatives in the AST
326 var ast_alts
= new Array[Alternative]
328 # Is self the accept production
331 # Is self transformed to a other production for the AST
333 var spe
: nullable Production = null is writable
335 # Is self contains only a single alternative (then no need for a abstract production class in the AST)
337 var altone
= false is writable
339 # The cname of the class in the AST
342 if spe
!= null then return spe
.acname
346 # Is the production nullable
347 var is_nullable
= false
349 # The first tokens of the production
350 var firsts
= new HashSet[Item]
352 # The tokens that may follows the production (as in SLR)
353 var afters
= new HashSet[Item]
356 # Crate a new empty alternative
357 fun new_alt0
(name
: String): Alternative
359 var a
= new Alternative(self, name
, new Array[Element])
364 # Crate a new alternative with some elements
365 fun new_alt
(name
: String, elems
: Element...): Alternative
367 var a
= new Alternative(self, name
, elems
)
372 # Crate a new alternative with some elements
373 fun new_alt2
(name
: String, elems
: Array[Element]): Alternative
375 var a
= new Alternative(self, name
, elems
)
380 # Return a set of items for the production
381 fun start_state
: Array[Item]
383 var res
= new Array[Item]
385 if a
.phony
then continue
391 # States in the LR automaton that has a outgoing transition on self
392 var gotos
= new ArraySet[LRState]
395 # An alternative of a production
400 # The name of the alternative
401 var name
: String is writable
403 # The elements of the alternative
404 var elems
: Array[Element]
406 # The first item of the alternative
407 var first_item
= new Item(self, 0)
409 # The name of the elements
410 var elems_names
= new Array[nullable String]
412 # Return a name for a given element (or a number if unamed)
413 fun elemname
(i
: Int): String
415 if i
>= elems_names
.length
then return "{i}"
416 var n
= elems_names
[i
]
417 if n
== null then return "{i}"
421 redef fun to_s
do return "{prod.name}::{name}={elems.join(" ")}"
425 return "N{name.to_cmangle}"
428 # The code for the reduction
429 var codes
: nullable Array[Code] = null is writable
431 # Is the alternative transformed (ie not in the AST)
432 var trans
= false is writable
434 # Is the alternative unparsable? (ie not in the automaton)
435 var phony
= false is writable
437 # Initialize codes with the elements
440 if codes
!= null then return
441 var codes
= new Array[Code]
444 codes
.add
(new CodePop)
446 codes
.add
(new CodeNew(self))
450 # A step in the construction of the AST.
451 # Used to model transformations
455 # Get a element from the stack
458 redef fun to_s
do return "pop"
461 # Allocate a new AST node for an alternative using the correct number of popped elements
465 # The associated alternative
468 redef fun to_s
do return "New {alt.name}/{alt.elems.length}"
474 redef fun to_s
do return "null"
477 # Elements of an alternative.
478 # Either a `Production` or a `Token`
479 abstract class Element
480 # The name of the element
482 redef fun to_s
do return name
484 private var acname_cache
: nullable String = null
486 # The mangled name of the element
487 fun cname
: String do return "N{name.to_cmangle}"
489 # The name of the class in the AST
490 fun acname
: String do
491 var res
= acname_cache
493 res
= "N{to_s.to_cmangle}"
499 # The name of the class in the AST
500 fun acname
=(s
: String) do acname_cache
= s
506 # States of the LR automaton that shift on self
507 var shifts
= new ArraySet[LRState]
508 # States of the LR automaton that reduce on self in the lookahead(1)
509 var reduces
= new ArraySet[LRState]
516 # The grammar of the automaton
520 var states
= new Array[LRState]
522 # Dump of the automaton
525 var res
= new Array[String]
526 res
.add
"* LRAutomaton: {states.length} states\n"
528 res
.add
"s{s.number} {s.name}\n"
532 if i
.next
!= null then continue
533 for i2
in s
.lookahead
(i
) do
534 res
.add
"\t\t\t{i2}\n"
537 res
.add
"\tOTHER ITEMS\n"
539 if s
.core
.has
(i
) then continue
541 if i
.next
!= null then continue
542 for i2
in s
.lookahead
(i
) do
543 res
.add
"\t\t\t{i2}\n"
546 res
.add
"\tTRANSITIONS {s.outs.length}\n"
548 res
.add
"\t\t{t.elem} |-> s{t.to.number}\n"
550 res
.add
"\tACTIONS\n"
552 res
.add
"\t\tSTATE LR0\n"
554 res
.add
"\t\tSTATE SLR\n"
555 for t
, a
in s
.guarded_reduce
do
557 res
.add
"\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
559 else if s
.shifts
.has
(t
) then
560 res
.add
"\t\t/!\\ SHIFT/REDUCE CONFLICT\n"
565 if not s
.shifts
.is_empty
then
566 res
.add
"\t\tSHIFT {s.shifts.join(" ")}\n"
568 for r
in s
.reduces
do
569 res
.add
"\t\tREDUCE {r}\n"
575 # Generate a graphviz file of the automaton
576 fun to_dot
(path
: String)
578 var f
= new FileWriter.open
(path
)
579 f
.write
("digraph g \{\n")
580 f
.write
("rankdir=LR;\n")
581 f
.write
("node[shape=Mrecord,height=0];\n")
584 f
.write
"s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|"
586 f.write "{i.to_s.escape_to_dot}\\l
"
590 if s.core.has(i) then continue
591 f.write "{i.to_s.escape_to_dot}\\l
"
596 for t
, a
in s
.guarded_reduce
do
601 else if s
.shifts
.has
(t
) then
602 f
.write
",color=orange"
608 f
.write
",color=blue"
613 f
.write
"s{s.number} -> s{t.to.number} [label=\"{t.elem.to_s.escape_to_dot}\
"];\n"
620 # Generate the parser of the automaton
621 fun gen_to_nit
(filepath
: String, name
: String)
623 var gen
= new Generator
624 gen
.gen_to_nit
(self, name
)
625 var f
= new FileWriter.open
(filepath
)
634 private class Generator
635 var out
= new Array[String]
636 fun add
(s
: String) do out
.add
(s
)
637 fun gen_to_nit
(autom
: LRAutomaton, name
: String)
639 var states
= autom
.states
640 var gram
= autom
.grammar
642 add
"# Parser generated by nitcc for the grammar {name}"
643 add
"module {name}_parser is no_warning(\"missing-doc\
",\"old-init\
")"
644 add
"import nitcc_runtime"
646 add
"class Parser_{name}"
648 add
"\tredef fun start_state do return state_{states.first.cname}"
652 add
"private fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
654 for p
in gram
.prods
do
655 add
"private fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
658 add
"redef class NToken"
660 if not s
.need_guard
then continue
661 add
"\t# guarded action for state {s.name}"
662 add
"\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)"
663 add
"\tprivate fun action_s{s.cname}(parser: Parser) do"
664 if s
.reduces
.length
!= 1 then
665 add
"\t\tparser.parse_error"
667 gen_reduce_to_nit
(s
.reduces
.first
)
673 for t
in gram
.tokens
do
674 if t
.name
== "Eof" then
675 add
"redef class {t.cname}"
677 add
"class {t.cname}"
681 if not s
.need_guard
then continue
682 add
"\tredef fun action_s{s.cname}(parser) do"
683 gen_shift_to_nit
(s
, t
)
686 for s
in t
.reduces
do
687 if not s
.need_guard
then continue
688 if s
.reduces
.length
<= 1 then continue
689 add
"\tredef fun action_s{s.cname}(parser) do"
690 gen_reduce_to_nit
(s
.guarded_reduce
[t
].first
.alt
)
693 add
"\tredef fun node_name do return \"{t.name.escape_to_nit}\
""
697 add
"redef class LRGoto"
699 if s
.gotos
.length
<= 1 then continue
700 add
"\tprivate fun goto_s{s.cname}(parser: Parser) do abort"
704 for p
in gram
.prods
do
705 add
"class Goto_{p.cname}"
708 if s
.gotos
.length
<= 1 then continue
709 add
"\tredef fun goto_s{s.cname}(parser) do"
710 gen_goto_to_nit
(s
, p
)
716 var ps
= gram
.prods
.to_a
717 ps
.add_all
(gram
.ast_prods
)
719 if p
.spe
== null and not p
.altone
then
720 if p
.name
.has_suffix
("?") or p
.name
.has_suffix
("+") then continue
721 add
"class {p.acname}"
723 add
"\tredef fun node_name do return \"{p.name.escape_to_nit}\
""
727 var als
= p
.alts
.to_a
728 als
.add_all
(p
.ast_alts
)
730 if a
.trans
then continue
731 add
"class {a.cname}"
735 add
"\tsuper {p.acname}"
737 add
"\tredef fun node_name do return \"{a.name.escape_to_nit}\
""
738 var initarg
= new Array[String]
739 for i
in [0..a
.elems
.length
[ do
740 add
"\tvar n_{a.elemname(i)}: {a.elems[i].acname}"
741 initarg
.add
("n_{a.elemname(i)}: {a.elems[i].acname}")
743 if initarg
.is_empty
then
746 add
"\tinit({initarg.join(", ")}) do"
747 for i
in [0..a
.elems
.length
[ do
748 add
"\t\tself.n_{a.elemname(i)} = n_{a.elemname(i)}"
752 add
"\tredef fun number_of_children do return {a.elems.length}"
753 add
"\tredef fun child(i) do"
754 for i
in [0..a
.elems
.length
[ do
755 add
"\t\tif i == {i} then return n_{a.elemname(i)}"
764 add
"# State {s.name}"
765 add
"private class LRState{s.cname}"
766 add
"\tsuper LRState"
768 add
"\tredef fun to_s do return \"{s.name.escape_to_nit}\
""
770 var err
= new Array[String]
773 if e
isa Production then err
.add e
.name
775 if err
.is_empty
then for t
in s
.outs
do
777 if e
isa Token then err
.add e
.name
780 add
"\tredef fun error_msg do return \"{err.join(", ").escape_to_nit}\
""
782 add
"\tredef fun action(parser) do"
784 add
"\t\tparser.peek_token.action_s{s.cname}(parser)"
785 else if s
.reduces
.length
== 1 then
786 gen_reduce_to_nit
(s
.reduces
.first
)
792 if not s
.gotos
.is_empty
then
793 add
"\tredef fun goto(parser, goto) do"
794 if s
.gotos
.length
> 1 then
795 add
"\t\tgoto.goto_s{s.cname}(parser)"
797 gen_goto_to_nit
(s
, s
.gotos
.first
)
808 fun gen_shift_to_nit
(s
: LRState, t
: Token)
810 var dest
= s
.trans
(t
)
811 add
"\t\tparser.shift(state_{dest.cname})"
815 fun gen_goto_to_nit
(s
: LRState, p
: Production)
817 var dest
= s
.trans
(p
)
818 add
"\t\tparser.push(state_{dest.cname})"
821 fun gen_reduce_to_nit
(alt
: Alternative)
823 add
"\t\t# REDUCE {alt}"
824 var i
= alt
.elems
.length
- 1
825 for e
in alt
.elems
.to_a
.reversed
do
826 add
"\t\tvar n{i} = parser.pop.as({e.acname})"
830 if alt
.name
.has_suffix
("+_more") then
831 add
"\t\tvar prod = n0"
832 add
"\t\tn0.children.add(n1)"
833 else if alt
.name
.has_suffix
("+_one") then
834 add
"\t\tvar prod = new {alt.prod.acname}"
835 add
"\t\tprod.children.add(n0)"
840 var st
= new Array[String]
841 for c
in alt
.codes
.as(not null) do
842 if c
isa CodePop then
845 else if c
isa CodeNull then
847 else if c
isa CodeNew then
850 var from
= st
.length
- calt
.elems
.length
851 var args
= new List[String]
852 for j
in [from
..st
.length
[ do
856 if args
.is_empty
then
857 add
"\t\tvar p{cpt} = new {calt.cname}"
859 add
"\t\tvar p{cpt} = new {calt.cname}({args.join(", ")})"
862 #for j in [from..st.length[ do
863 #if st[j] == "null" then continue
864 #add "\t\tp{cpt}.n_{calt.elemname(x)} = {st[j]}"
870 assert st
.length
== 1
871 add
"\t\tvar prod = {st.first}"
874 add
"\t\tparser.node_stack.push prod"
875 if alt
.prod
.accept
then
876 add
"\t\tparser.stop = true"
878 add
"\t\tparser.goto(goto_{alt.prod.cname})"
883 # A state in a LR automaton
885 # Name of the automaton (short part from the start)
889 fun cname
: String do return name
.to_cmangle
895 var items
= new HashSet[Item]
897 # Set of items only in the core
898 var core
= new HashSet[Item]
900 # Outgoing transitions
901 var ins
= new Array[LRTransition]
903 # Ingoing transitions
904 var outs
= new Array[LRTransition]
907 fun trans
(e
: Element): nullable LRState
909 for t
in outs
do if t
.elem
== e
then return t
.to
913 redef fun ==(o
) do return o
isa LRState and core
== o
.core
914 redef fun hash
do return items
.length
916 redef fun to_s
do return items
.join
(" ; ")
918 # Add and item in the core
919 fun add
(i
: Item): Bool
921 if items
.has
(i
) then return false
924 if i
.pos
> 0 or i
.alt
.prod
.accept
then core
.add
(i
)
928 # Recursively extends item outside the core
932 if e
== null then return
933 if not e
isa Production then return
934 for i2
in e
.start_state
do
935 if add
(i2
) then extends
(i2
)
940 fun lookahead
(i
: Item): Set[Item]
942 return i
.alt
.prod
.afters
945 # Set of all reductions
946 var reduces
= new ArraySet[Alternative]
948 var shifts
= new ArraySet[Token]
950 var gotos
= new ArraySet[Production]
951 # Reduction guarded by tokens
952 var guarded_reduce
= new HashMap[Token, Set[Item]]
953 # Shifts guarded by tokens
954 var guarded_shift
= new HashMap[Token, Set[Item]]
956 # Does the state need a guard to perform an action?
957 fun need_guard
: Bool do return not shifts
.is_empty
or reduces
.length
> 1
960 fun is_lr0
: Bool do return reduces
.length
<= 1 and shifts
.is_empty
or reduces
.is_empty
962 # Compute guards and conflicts
966 for i
in items
.to_a
do
970 # Collect action and conflicts
975 for i2
in lookahead
(i
) do
979 if not guarded_reduce
.has_key
(t
) then
980 guarded_reduce
[t
] = new ArraySet[Item]
982 guarded_reduce
[t
].add
(i
)
984 else if n
isa Token then
987 if not guarded_shift
.has_key
(n
) then
988 guarded_shift
[n
] = new ArraySet[Item]
990 guarded_shift
[n
].add
(i
)
991 else if n
isa Production then
998 # Token to remove as reduction guard to solve S/R conflicts
999 var removed_reduces
= new Array[Token]
1000 for t
, a
in guarded_reduce
do
1001 if a
.length
> 1 then
1002 print
"REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1003 for i
in a
do print
"\treduce: {i}"
1005 if guarded_shift
.has_key
(t
) then
1007 var confs
= new Array[Item]
1008 var ress
= new Array[String]
1009 var g
= guarded_shift
[t
]
1010 for si
in lookahead
(ri
) do
1011 if si
.next
!= t
then continue
1012 if not g
.has
(si
) then
1017 var csi
: nullable Item = null
1018 for bsi
in back_expand
(si
) do
1019 if bsi
.alt
.prod
== p
then
1028 ress
.add
"\tshift: {si}"
1030 ress
.add
"\tcore shift: {csi}"
1033 if confs
.is_empty
then
1034 print
"Automatic Dangling on state {self.number} {self.name} for token {t}:"
1035 print
"\treduce: {ri}"
1036 for r
in ress
do print r
1037 removed_reduces
.add t
1039 print
"SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1040 print
"\treduce: {ri}"
1041 for i
in guarded_shift
[t
] do print
"\tshift: {i}"
1042 removed_reduces
.add t
1046 for t
in removed_reduces
do
1047 guarded_reduce
.keys
.remove
(t
)
1051 # Return `i` and all other items of the state that expands, directly or indirectly, to `i`
1052 fun back_expand
(i
: Item): Set[Item]
1054 var res
= new ArraySet[Item]
1057 while not todo
.is_empty
do
1059 if x
.pos
> 0 then continue
1062 if y
.next
!= p
then continue
1063 if res
.has
(y
) then continue
1072 # A transition in a LR automaton
1076 # The destination state
1078 # The element labeling the transition
1082 # A alternative with a cursor (dot) before an element
1085 var alt
: Alternative
1086 # The dot index (0 means before the first element)
1089 redef fun ==(o
) do return o
isa Item and alt
== o
.alt
and pos
== o
.pos
1090 redef fun hash
do return alt
.hash
+ pos
1094 var b
= new FlatBuffer
1095 b
.append
("{alt.prod.name}::{alt.name}=")
1096 for i
in [0..alt
.elems
.length
[
1098 if pos
== i
then b
.append
(".") else b
.append
(" ")
1099 b
.append
(alt
.elems
[i
].to_s
)
1101 if pos
== alt
.elems
.length
then b
.append
(".")
1105 # The element that follows the dot, null if the dot is at the end
1106 fun next
: nullable Element
1108 if pos
>= alt
.elems
.length
then return null
1109 return alt
.elems
[pos
]
1113 fun lookahead
: Set[Token]
1115 var res
= new HashSet[Token]
1117 while p
< alt
.elems
.length
do
1118 var e
= alt
.elems
[p
]
1122 else if e
isa Production then
1123 #res.add_all(e.firsts)
1124 if not e
.is_nullable
then break
1131 # The item that advanced once
1134 var res
= new Item(alt
, pos
+1)