1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # A gramar describing a language
17 # The productions (non-terminal) of the conctete grammar
18 var prods
= new Array[Production]
20 # The additionnal abstract productions of the grammar
22 var ast_prods
= new Array[Production]
24 # The tokens (terminals) of the grammar
25 var tokens
= new Array[Token]
27 # Dump of the concrete grammar and the transformations
33 res
.append
("{p.name} \{-> {p.spe.name}\}=\n")
35 res
.append
("{p.name} =\n")
37 var last
= p
.alts
.last
39 res
.append
("\t\{{a.name}:\} {a.elems.join(" ")}")
40 if a
.codes
== null then a
.make_codes
41 res
.append
("\n\t\t\{-> {a.codes.join(", ")}\}")
42 if a
== last
then res
.append
(" ;\n") else res
.append
(" |\n")
44 if p
.is_nullable
then res
.append
"\t// is nullable\n"
45 if not p
.firsts
.is_empty
then
46 res
.append
"\t// firsts:\n"
47 for x
in p
.firsts
do res
.append
"\t// {x}\n"
49 if not p
.afters
.is_empty
then
50 res
.append
"\t// afters:\n"
51 for x
in p
.afters
do res
.append
"\t// {x}\n"
57 # Inline (ie. remove from the conctete grammar) some production
58 # REQUIRE: no circular production in `prods`
59 fun inline
(prods
: Collection[Production])
61 for p
in self.prods
do
62 for a
in p
.alts
.to_a
do
65 if e
isa Production and prods
.has
(e
) then to_inline
= true
67 if not to_inline
then continue
69 if a
.codes
== null then a
.make_codes
71 var a0
= new Alternative(p
, a
.name
, new Array[Element])
73 a0
.codes
= new Array[Code]
75 var pool2
= new Array[Alternative]
77 if not e
isa Production or not prods
.has
(e
) then
80 x
.codes
.add
(new CodePop)
85 print
"Circular inlining on {p} :: {a}"
90 if a2
.codes
== null then a2
.make_codes
92 var name
= a
.name
+ "_" + pool2
.length
.to_s
93 var na
= new Alternative(p
, name
, new Array[Element])
96 na
.elems
.add_all
(x
.elems
)
97 na
.elems
.add_all
(a2
.elems
)
98 na
.codes
= new Array[Code]
99 na
.codes
.add_all
(x
.codes
.as(not null))
100 na
.codes
.add_all
(a2
.codes
.as(not null))
108 x
.codes
.add
(a
.codes
.last
)
117 self.ast_prods
.add
(p
)
121 # Compute a LR automaton
124 var start
= new Production("_start")
126 var eof
= new Token("Eof")
128 start
.new_alt
("Start", self.prods
.first
, eof
)
133 var first
= new LRState("Start")
135 for i
in start
.start_state
do first
.add
(i
)
137 var automaton
= new LRAutomaton(self)
139 var todo
= new List[LRState]
141 var seen
= new HashSet[LRState]
144 while not todo
.is_empty
do
145 var state
= todo
.shift
148 automaton
.states
.add
(state
)
151 var nexts
= new HashMap[Element, LRState]
152 for i
in state
.items
do
154 if e
== null then continue
155 if nexts
.has_key
(e
) then
156 nexts
[e
].add
(i
.avance
)
159 if state
== automaton
.states
.first
then
162 name
= "{state.name} {e}"
164 var next
= new LRState(name
)
170 for e
, next
in nexts
do
172 #print "#states: {seen.length}"
174 # Look for an existing LR0 state in the automaton
184 # If not, add it to the pool and the todo-list
186 next
.number
= seen
.length
187 assert not seen
.has
(next
)
193 var t
= new LRTransition(state
, next
, e
)
201 # Compute `nullables`, `firsts` and `afters` of productions
207 if p
.is_nullable
then continue
214 else if e
isa Production then
215 if not e
.is_nullable
then nullabl
= false
227 if not changed
then break
238 if e
== null then break
240 if try_add
(fs
, i
) then changed
= true
242 else if e
isa Production then
244 if try_add
(fs
, t
) then changed
= true
246 if not e
.is_nullable
then break
254 if not changed
then break
261 var p0
: nullable Production = null
265 if e
== null then break
267 if e
isa Production then p0
= e
else p0
= null
276 if try_add
(afs
, i
) then changed
= true
277 else if e
isa Production then
279 if try_add
(afs
, t
) then changed
= true
281 if e
.is_nullable
then
283 if try_add
(afs
, t
) then changed
= true
293 for t
in p1
.afters
do
294 if try_add
(afs
, t
) then changed
= true
299 if not changed
then break
304 private fun try_add
(set
: Set[Item], t
: Item): Bool
306 var res
= not set
.has
(t
)
307 if res
then set
.add
(t
)
313 # A production of a grammar
316 # The alternative of the production
317 var alts
= new Array[Alternative]
319 # Additionnal alternatives in the AST
320 var ast_alts
= new Array[Alternative]
322 # Is self the accept production
325 # Is self transformed to a other production for the AST
327 var spe
: nullable Production writable = null
329 # Is self contains only a single alternative (then no need for a abstract production class in the AST)
331 var altone
writable = false
333 # The cname of the class in the AST
336 if spe
!= null then return spe
.acname
340 # Is the production nullable
341 var is_nullable
= false
342 # The first tokens of the production
343 var firsts
= new HashSet[Item]
344 # The tokens that may follows the production (as in SLR)
345 var afters
= new HashSet[Item]
348 # Crate a new empty alternative
349 fun new_alt0
(name
: String): Alternative
351 var a
= new Alternative(self, name
, new Array[Element])
356 # Crate a new alternative with some elements
357 fun new_alt
(name
: String, elems
: Element...): Alternative
359 var a
= new Alternative(self, name
, elems
)
364 # Crate a new alternative with some elements
365 fun new_alt2
(name
: String, elems
: Array[Element]): Alternative
367 var a
= new Alternative(self, name
, elems
)
372 # Return a set of items for the production
373 fun start_state
: Array[Item]
375 var res
= new Array[Item]
382 # States in the LR automaton that has a outgoing transition on self
383 var gotos
= new ArraySet[LRState]
386 # An alternative of a production
391 # The name of the alternative
392 var name
: String writable
394 # The elements of the alternative
395 var elems
: Array[Element]
397 # The first item of the alternative
398 var first_item
= new Item(self, 0)
400 # The name of the elements
401 var elems_names
= new Array[nullable String]
403 # Return a name for a given element (or a number if unamed)
404 fun elemname
(i
: Int): String
406 if i
>= elems_names
.length
then return "{i}"
407 var n
= elems_names
[i
]
408 if n
== null then return "{i}"
412 redef fun to_s
do return "{prod.name}::{name}={elems.join(" ")}"
416 return "N{name.to_cmangle}"
419 # The code for the reduction
420 var codes
: nullable Array[Code] writable = null
422 # Is the alternative transformed (ie not in the AST)
423 var trans
writable = false
425 # Imitialize codes with the elements
428 if codes
!= null then return
429 var codes
= new Array[Code]
432 codes
.add
(new CodePop)
434 codes
.add
(new CodeNew(self))
438 # A step in the construction of the AST. used to modelize transformations
441 # Get a element from the stack
444 redef fun to_s
do return "pop"
446 # Allocate a new AST node for an alternative using the correct number of poped elements
450 redef fun to_s
do return "New {alt.name}/{alt.elems.length}"
455 redef fun to_s
do return "null"
458 # Elements of an alternative.
459 # Either a `Production` or a `Token`
460 abstract class Element
461 # The name of the element
463 redef fun to_s
do return name
465 private var acname_cache
: nullable String = null
467 # The mangled name of the element
468 fun cname
: String do return "N{name.to_cmangle}"
470 # the name of the class in the AST
471 fun acname
: String do
472 var res
= acname_cache
474 res
= "N{to_s.to_cmangle}"
479 fun acname
=(s
: String) do acname_cache
= s
485 # States of the LR automatio that shift on self
486 var shifts
= new ArraySet[LRState]
487 # States of the LR automatio that reduce on self in the lookahead(1)
488 var reduces
= new ArraySet[LRState]
495 # The grammar of the automaton
499 var states
= new Array[LRState]
501 # Dump of the automaton
504 var res
= new Array[String]
505 res
.add
"* LRAutomaton: {states.length} states\n"
507 res
.add
"s{s.number} {s.name}\n"
511 if i
.next
!= null then continue
512 for i2
in s
.lookahead
(i
) do
513 res
.add
"\t\t\t{i2}\n"
516 res
.add
"\tOTHER ITEMS\n"
518 if s
.core
.has
(i
) then continue
520 if i
.next
!= null then continue
521 for i2
in s
.lookahead
(i
) do
522 res
.add
"\t\t\t{i2}\n"
525 res
.add
"\tTRANSITIONS {s.outs.length}\n"
527 res
.add
"\t\t{t.elem} |-> s{t.to.number}\n"
529 res
.add
"\tACTIONS\n"
531 res
.add
"\t\tSTATE LR0\n"
533 res
.add
"\t\tSTATE SLR\n"
534 for t
, a
in s
.guarded_reduce
do
536 res
.add
"\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
538 else if s
.shifts
.has
(t
) then
539 res
.add
"\t\t/!\\ SHIFT/REDUCE CONFLICT\n"
544 if not s
.shifts
.is_empty
then
545 res
.add
"\t\tSHIFT {s.shifts.join(" ")}\n"
547 for r
in s
.reduces
do
548 res
.add
"\t\tREDUCE {r}\n"
554 # Generate a graphviz file of the automaton
555 fun to_dot
(path
: String)
557 var f
= new OFStream.open
(path
)
558 f
.write
("digraph g \{\n")
559 f
.write
("rankdir=LR;\n")
560 f
.write
("node[shape=Mrecord,height=0];\n")
563 f
.write
"s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|"
565 f.write "{i.to_s.escape_to_dot}\\l
"
569 if s.core.has(i) then continue
570 f.write "{i.to_s.escape_to_dot}\\l
"
575 for t
, a
in s
.guarded_reduce
do
580 else if s
.shifts
.has
(t
) then
581 f
.write
",color=orange"
587 f
.write
",color=blue"
592 f
.write
"s{s.number} -> s{t.to.number} [label=\"{t.elem.to_s.escape_to_dot}\
"];\n"
599 # Generate the parser of the automaton
600 fun gen_to_nit
(filepath
: String, name
: String)
602 var gen
= new Generator
603 gen
.gen_to_nit
(self, name
)
604 var f
= new OFStream.open
(filepath
)
614 # escape string used in labels for graphviz
615 fun escape_to_dot
: String
617 return escape_more_to_c
("|\{\}")
621 private class Generator
622 var out
= new Array[String]
623 fun add
(s
: String) do out
.add
(s
)
624 fun gen_to_nit
(autom
: LRAutomaton, name
: String)
626 var states
= autom
.states
627 var gram
= autom
.grammar
629 add
"# Parser generated by nitcc for the grammar {name}"
630 add
"import nitcc_runtime"
632 add
"class Parser_{name}"
634 add
"\tredef fun start_state do return state_{states.first.cname}"
637 add
"redef class Object"
639 add
"\tprivate fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
641 for p
in gram
.prods
do
642 add
"\tprivate fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
646 add
"redef class NToken"
648 if not s
.need_guard
then continue
649 add
"\t# guarded action for state {s.name}"
650 add
"\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)"
651 add
"\tprivate fun action_s{s.cname}(parser: Parser) do"
652 if s
.reduces
.length
!= 1 then
653 add
"\t\tparser.parse_error"
655 gen_reduce_to_nit
(s
.reduces
.first
)
661 for t
in gram
.tokens
do
662 if t
.name
== "Eof" then
663 add
"redef class {t.cname}"
665 add
"class {t.cname}"
669 if not s
.need_guard
then continue
670 add
"\tredef fun action_s{s.cname}(parser) do"
671 gen_shift_to_nit
(s
, t
)
674 for s
in t
.reduces
do
675 if not s
.need_guard
then continue
676 if s
.reduces
.length
<= 1 then continue
677 add
"\tredef fun action_s{s.cname}(parser) do"
678 gen_reduce_to_nit
(s
.guarded_reduce
[t
].first
.alt
)
681 add
"\tredef fun node_name do return \"{t.name.escape_to_nit}\
""
685 add
"redef class LRGoto"
687 if s
.gotos
.length
<= 1 then continue
688 add
"\tprivate fun goto_s{s.cname}(parser: Parser) do abort"
692 for p
in gram
.prods
do
693 add
"class Goto_{p.cname}"
696 if s
.gotos
.length
<= 1 then continue
697 add
"\tredef fun goto_s{s.cname}(parser) do"
698 gen_goto_to_nit
(s
, p
)
704 var ps
= gram
.prods
.to_a
705 ps
.add_all
(gram
.ast_prods
)
707 if p
.spe
== null and not p
.altone
then
708 if p
.name
.has_suffix
("?") or p
.name
.has_suffix
("+") then continue
709 add
"class {p.acname}"
711 add
"\tredef fun node_name do return \"{p.name.escape_to_nit}\
""
715 var als
= p
.alts
.to_a
716 als
.add_all
(p
.ast_alts
)
718 if a
.trans
then continue
719 add
"class {a.cname}"
723 add
"\tsuper {p.acname}"
725 add
"\tredef fun node_name do return \"{a.name.escape_to_nit}\
""
726 var initarg
= new Array[String]
727 for i
in [0..a
.elems
.length
[ do
728 add
"\tvar n_{a.elemname(i)}: {a.elems[i].acname}"
729 initarg
.add
("n_{a.elemname(i)}: {a.elems[i].acname}")
731 if initarg
.is_empty
then
734 add
"\tinit({initarg.join(", ")}) do"
735 for i
in [0..a
.elems
.length
[ do
736 add
"\t\tself.n_{a.elemname(i)} = n_{a.elemname(i)}"
740 add
"\tredef fun number_of_children do return {a.elems.length}"
741 add
"\tredef fun child(i) do"
742 for i
in [0..a
.elems
.length
[ do
743 add
"\t\tif i == {i} then return n_{a.elemname(i)}"
752 add
"# State {s.name}"
753 add
"private class LRState{s.cname}"
754 add
"\tsuper LRState"
756 add
"\tredef fun to_s do return \"{s.name.escape_to_nit}\
""
758 var err
= new Array[String]
761 if e
isa Production then err
.add e
.name
763 if err
.is_empty
then for t
in s
.outs
do
765 if e
isa Token then err
.add e
.name
768 add
"\tredef fun error_msg do return \"{err.join(", ").escape_to_nit}\
""
770 add
"\tredef fun action(parser) do"
772 add
"\t\tparser.peek_token.action_s{s.cname}(parser)"
773 else if s
.reduces
.length
== 1 then
774 gen_reduce_to_nit
(s
.reduces
.first
)
780 if not s
.gotos
.is_empty
then
781 add
"\tredef fun goto(parser, goto) do"
782 if s
.gotos
.length
> 1 then
783 add
"\t\tgoto.goto_s{s.cname}(parser)"
785 gen_goto_to_nit
(s
, s
.gotos
.first
)
796 fun gen_shift_to_nit
(s
: LRState, t
: Token)
798 var dest
= s
.trans
(t
)
799 add
"\t\tparser.shift(state_{dest.cname})"
803 fun gen_goto_to_nit
(s
: LRState, p
: Production)
805 var dest
= s
.trans
(p
)
806 add
"\t\tparser.push(state_{dest.cname})"
809 fun gen_reduce_to_nit
(alt
: Alternative)
811 add
"\t\t# REDUCE {alt}"
812 var i
= alt
.elems
.length
- 1
813 for e
in alt
.elems
.to_a
.reversed
do
814 add
"\t\tvar n{i} = parser.pop.as({e.acname})"
818 if alt
.name
.has_suffix
("+_more") then
819 add
"\t\tvar prod = n0"
820 add
"\t\tn0.children.add(n1)"
821 else if alt
.name
.has_suffix
("+_one") then
822 add
"\t\tvar prod = new {alt.prod.acname}"
823 add
"\t\tprod.children.add(n0)"
828 var st
= new Array[String]
829 for c
in alt
.codes
.as(not null) do
830 if c
isa CodePop then
833 else if c
isa CodeNull then
835 else if c
isa CodeNew then
838 var from
= st
.length
- calt
.elems
.length
839 var args
= new List[String]
840 for j
in [from
..st
.length
[ do
844 if args
.is_empty
then
845 add
"\t\tvar p{cpt} = new {calt.cname}"
847 add
"\t\tvar p{cpt} = new {calt.cname}({args.join(", ")})"
850 #for j in [from..st.length[ do
851 #if st[j] == "null" then continue
852 #add "\t\tp{cpt}.n_{calt.elemname(x)} = {st[j]}"
858 assert st
.length
== 1
859 add
"\t\tvar prod = {st.first}"
862 add
"\t\tparser.node_stack.push prod"
863 if alt
.prod
.accept
then
864 add
"\t\tparser.stop = true"
866 add
"\t\tparser.goto(goto_{alt.prod.cname})"
871 # A state in a LR automaton
873 # name of the automaton (short part from the start)
877 fun cname
: String do return name
.to_cmangle
883 var items
= new HashSet[Item]
885 # Set of items only in the core
886 var core
= new HashSet[Item]
888 # Outgoing transitions
889 var ins
= new Array[LRTransition]
891 # Ingoing transitions
892 var outs
= new Array[LRTransition]
895 fun trans
(e
: Element): nullable LRState
897 for t
in outs
do if t
.elem
== e
then return t
.to
901 redef fun ==(o
) do return o
isa LRState and core
== o
.core
902 redef fun hash
do return items
.length
904 redef fun to_s
do return items
.join
(" ; ")
906 # Add and item in the core
907 fun add
(i
: Item): Bool
909 if items
.has
(i
) then return false
912 if i
.pos
> 0 or i
.alt
.prod
.accept
then core
.add
(i
)
916 # Recusively extends item outside the core
920 if e
== null then return
921 if not e
isa Production then return
922 for i2
in e
.start_state
do
923 if add
(i2
) then extends
(i2
)
928 fun lookahead
(i
: Item): Set[Item]
930 return i
.alt
.prod
.afters
933 # Set of all reductions
934 var reduces
= new ArraySet[Alternative]
936 var shifts
= new ArraySet[Token]
938 var gotos
= new ArraySet[Production]
939 # Reduction guarded by tokens
940 var guarded_reduce
= new HashMap[Token, Set[Item]]
941 # Shitfs guarded by tokens
942 var guarded_shift
= new HashMap[Token, Set[Item]]
944 # Does the state need a guard to perform an action?
945 fun need_guard
: Bool do return not shifts
.is_empty
or reduces
.length
> 1
948 fun is_lr0
: Bool do return reduces
.length
<= 1 and shifts
.is_empty
or reduces
.is_empty
950 # compute guards and conflicts
954 for i
in items
.to_a
do
958 # Collect action and conflicts
963 for i2
in lookahead
(i
) do
967 if not guarded_reduce
.has_key
(t
) then
968 guarded_reduce
[t
] = new ArraySet[Item]
970 guarded_reduce
[t
].add
(i
)
972 else if n
isa Token then
975 if not guarded_shift
.has_key
(n
) then
976 guarded_shift
[n
] = new ArraySet[Item]
978 guarded_shift
[n
].add
(i
)
979 else if n
isa Production then
986 for t
, a
in guarded_reduce
do
988 print
"REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
989 for i
in a
do print
"\treduce: {i}"
990 else if guarded_shift
.has_key
(t
) then
992 var confs
= new Array[Item]
993 var ress
= new Array[String]
994 var g
= guarded_shift
[t
]
995 for si
in lookahead
(ri
) do
996 if si
.next
!= t
then continue
997 if not g
.has
(si
) then
1002 var csi
: nullable Item = null
1003 for bsi
in back_expand
(si
) do
1004 if bsi
.alt
.prod
== p
then
1013 ress
.add
"\tshift: {si}"
1015 ress
.add
"\tcore shift: {csi}"
1018 if confs
.is_empty
then
1019 print
"Automatic Dangling on state {self.number} {self.name} for token {t}:"
1020 print
"\treduce: {ri}"
1021 for r
in ress
do print r
1022 guarded_reduce
.keys
.remove
(t
)
1024 print
"SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
1025 print
"\treduce: {ri}"
1026 for i
in guarded_shift
[t
] do print
"\tshift: {i}"
1032 # Return `i` and all other items of the state that expands, directly or undirectly, to `i`
1033 fun back_expand
(i
: Item): Set[Item]
1035 var res
= new ArraySet[Item]
1038 while not todo
.is_empty
do
1040 if x
.pos
> 0 then continue
1043 if y
.next
!= p
then continue
1044 if res
.has
(y
) then continue
1053 # A transition in a LR automaton
1057 # The testination state
1059 # The element labeling the transition
1063 # A alternative with a cursor (dot) before an element
1066 var alt
: Alternative
1067 # The dot index (0 means before the first element)
1070 redef fun ==(o
) do return o
isa Item and alt
== o
.alt
and pos
== o
.pos
1071 redef fun hash
do return alt
.hash
+ pos
1076 b
.append
("{alt.prod.name}::{alt.name}=")
1077 for i
in [0..alt
.elems
.length
[
1079 if pos
== i
then b
.append
(".") else b
.append
(" ")
1080 b
.append
(alt
.elems
[i
].to_s
)
1082 if pos
== alt
.elems
.length
then b
.append
(".")
1086 # The element thatr follow the dot, null if the fdot is at the end
1087 fun next
: nullable Element
1089 if pos
>= alt
.elems
.length
then return null
1090 return alt
.elems
[pos
]
1094 fun lookahead
: Set[Token]
1096 var res
= new HashSet[Token]
1098 while p
< alt
.elems
.length
do
1099 var e
= alt
.elems
[p
]
1103 else if e
isa Production then
1104 #res.add_all(e.firsts)
1105 if not e
.is_nullable
then break
1112 # The item that advanced once
1115 var res
= new Item(alt
, pos
+1)