1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # A gramar describing a language
17 # The productions (non-terminal) of the conctete grammar
18 var prods
= new Array[Production]
20 # The additionnal abstract productions of the grammar
22 var ast_prods
= new Array[Production]
24 # The tokens (terminals) of the grammar
25 var tokens
= new Array[Token]
27 # Dump of the concrete grammar and the transformations
33 res
.append
("{p.name} \{-> {p.spe.name}\}=\n")
35 res
.append
("{p.name} =\n")
37 var last
= p
.alts
.last
39 res
.append
("\t\{{a.name}:\} {a.elems.join(" ")}")
40 if a
.codes
== null then a
.make_codes
41 res
.append
("\n\t\t\{-> {a.codes.join(", ")}\}")
42 if a
== last
then res
.append
(" ;\n") else res
.append
(" |\n")
44 if p
.is_nullable
then res
.append
"\t// is nullable\n"
45 if not p
.firsts
.is_empty
then res
.append
"\t// firsts: {p.firsts.join(" ")}\n"
46 if not p
.afters
.is_empty
then res
.append
"\t// afters: {p.afters.join(" ")}\n"
51 # Inline (ie. remove from the conctete grammar) some production
52 # REQUIRE: no circular production in `prods`
53 fun inline
(prods
: Collection[Production])
55 for p
in self.prods
do
56 for a
in p
.alts
.to_a
do
59 if e
isa Production and prods
.has
(e
) then to_inline
= true
61 if not to_inline
then continue
63 if a
.codes
== null then a
.make_codes
65 var a0
= new Alternative(p
, a
.name
, new Array[Element])
67 a0
.codes
= new Array[Code]
69 var pool2
= new Array[Alternative]
71 if not e
isa Production or not prods
.has
(e
) then
74 x
.codes
.add
(new CodePop)
79 print
"Circular inlining on {p} :: {a}"
84 if a2
.codes
== null then a2
.make_codes
86 var name
= a
.name
+ "_" + pool2
.length
.to_s
87 var na
= new Alternative(p
, name
, new Array[Element])
90 na
.elems
.add_all
(x
.elems
)
91 na
.elems
.add_all
(a2
.elems
)
92 na
.codes
= new Array[Code]
93 na
.codes
.add_all
(x
.codes
.as(not null))
94 na
.codes
.add_all
(a2
.codes
.as(not null))
102 x
.codes
.add
(a
.codes
.last
)
111 self.ast_prods
.add
(p
)
115 # Compute a LR automaton
120 var start
= new Production("_start")
122 var eof
= new Token("Eof")
124 start
.new_alt
("Start", self.prods
.first
, eof
)
126 var first
= new LRState("Start")
128 for i
in start
.start_state
do first
.add
(i
)
130 var automaton
= new LRAutomaton(self)
132 var todo
= new List[LRState]
134 var seen
= new HashSet[LRState]
137 while not todo
.is_empty
do
138 var state
= todo
.shift
141 automaton
.states
.add
(state
)
144 var nexts
= new HashMap[Element, LRState]
145 for i
in state
.items
do
147 if e
== null then continue
148 if nexts
.has_key
(e
) then
149 nexts
[e
].add
(i
.avance
)
152 if state
== automaton
.states
.first
then
155 name
= "{state.name} {e}"
157 var next
= new LRState(name
)
163 for e
, next
in nexts
do
165 #print "#states: {seen.length}"
170 for i
in next
.items
do
171 if n
.add
(i
) then n
.extends
(i
)
180 next
.number
= seen
.length
181 assert not seen
.has
(next
)
186 var t
= new LRTransition(state
, next
, e
)
194 # Compute `nullables`, `firsts` and `afters` of productions
200 if p
.is_nullable
then continue
207 else if e
isa Production then
208 if not e
.is_nullable
then nullabl
= false
220 if not changed
then break
230 if try_add
(fs
, e
) then changed
= true
232 else if e
isa Production then
234 if try_add
(fs
, t
) then changed
= true
236 if not e
.is_nullable
then break
243 if not changed
then break
250 var p0
: nullable Production = null
253 if e
isa Production then p0
= e
else p0
= null
254 if p
== null then continue
259 if try_add
(afs
, e
) then changed
= true
260 else if e
isa Production then
262 if try_add
(afs
, t
) then changed
= true
264 if e
.is_nullable
then
266 if try_add
(afs
, t
) then changed
= true
275 for t
in p1
.afters
do
276 if try_add
(afs
, t
) then changed
= true
281 if not changed
then break
286 private fun try_add
(set
: Set[Token], t
: Token): Bool
288 var res
= not set
.has
(t
)
289 if res
then set
.add
(t
)
295 # A production of a grammar
298 # The alternative of the production
299 var alts
= new Array[Alternative]
301 # Additionnal alternatives in the AST
302 var ast_alts
= new Array[Alternative]
304 # Is self the accept production
307 # Is self transformed to a other production for the AST
309 var spe
: nullable Production writable = null
311 # Is self contains only a single alternative (then no need for a abstract production class in the AST)
313 var altone
writable = false
315 # The cname of the class in the AST
318 if spe
!= null then return spe
.acname
322 # Is the production nullable
323 var is_nullable
= false
324 # The first tokens of the production
325 var firsts
= new HashSet[Token]
326 # The tokens that may follows the production (as in SLR)
327 var afters
= new HashSet[Token]
330 # Crate a new empty alternative
331 fun new_alt0
(name
: String): Alternative
333 var a
= new Alternative(self, name
, new Array[Element])
338 # Crate a new alternative with some elements
339 fun new_alt
(name
: String, elems
: Element...): Alternative
341 var a
= new Alternative(self, name
, elems
)
346 # Crate a new alternative with some elements
347 fun new_alt2
(name
: String, elems
: Array[Element]): Alternative
349 var a
= new Alternative(self, name
, elems
)
354 # Return a set of items for the production
355 fun start_state
: Array[Item]
357 var res
= new Array[Item]
359 res
.add
new Item(a
, 0)
364 # States in the LR automaton that has a outgoing transition on self
365 var gotos
= new ArraySet[LRState]
368 # An alternative of a production
373 # The name of the alternative
374 var name
: String writable
376 # The elements of the alternative
377 var elems
: Array[Element]
379 # The name of the elements
380 var elems_names
= new Array[nullable String]
382 # Return a name for a given element (or a number if unamed)
383 fun elemname
(i
: Int): String
385 if i
>= elems_names
.length
then return "{i}"
386 var n
= elems_names
[i
]
387 if n
== null then return "{i}"
391 redef fun to_s
do return "{prod.name}::{name}={elems.join(" ")}"
395 return "N{name.to_cmangle}"
398 # The code for the reduction
399 var codes
: nullable Array[Code] writable = null
401 # Is the alternative transformed (ie not in the AST)
402 var trans
writable = false
404 # Imitialize codes with the elements
407 if codes
!= null then return
408 var codes
= new Array[Code]
411 codes
.add
(new CodePop)
413 codes
.add
(new CodeNew(self))
417 # A step in the construction of the AST. used to modelize transformations
420 # Get a element from the stack
423 redef fun to_s
do return "pop"
425 # Allocate a new AST node for an alternative using the correct number of poped elements
429 redef fun to_s
do return "New {alt.name}/{alt.elems.length}"
434 redef fun to_s
do return "null"
437 # Elements of an alternative.
438 # Either a `Production` or a `Token`
439 abstract class Element
440 # The name of the element
442 redef fun to_s
do return name
444 private var acname_cache
: nullable String = null
446 # The mangled name of the element
447 fun cname
: String do return "N{name.to_cmangle}"
449 # the name of the class in the AST
450 fun acname
: String do
451 var res
= acname_cache
453 res
= "N{to_s.to_cmangle}"
458 fun acname
=(s
: String) do acname_cache
= s
464 # States of the LR automatio that shift on self
465 var shifts
= new ArraySet[LRState]
466 # States of the LR automatio that reduce on self in the lookahead(1)
467 var reduces
= new ArraySet[LRState]
474 # The grammar of the automaton
478 var states
= new Array[LRState]
480 # Dump of the automaton
483 var res
= new Array[String]
484 res
.add
"* LRAutomaton: {states.length} states\n"
486 res
.add
"s{s.number} {s.name}\n"
491 res
.add
"\tOTHER ITEMS\n"
493 if s
.core
.has
(i
) then continue
496 res
.add
"\tTRANSITIONS {s.outs.length}\n"
498 res
.add
"\t\t{t.elem} |-> s{t.to.number}\n"
500 res
.add
"\tACTIONS\n"
502 res
.add
"\t\tSTATE LR0\n"
504 res
.add
"\t\tSTATE LALR\n"
505 for t
, a
in s
.guarded_reduce
do
507 res
.add
"\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
509 else if s
.shifts
.has
(t
) then
510 res
.add
"\t\t/!\\ SHIFT/REDUCE CONFLICT\n"
515 if not s
.shifts
.is_empty
then
516 res
.add
"\t\tSHIFT {s.shifts.join(" ")}\n"
518 for r
in s
.reduces
do
519 res
.add
"\t\tREDUCE {r}\n"
525 # Generate a graphviz file of the automaton
526 fun to_dot
(path
: String)
528 var f
= new OFStream.open
(path
)
529 f
.write
("digraph g \{\n")
530 f
.write
("rankdir=LR;\n")
531 f
.write
("node[shape=Mrecord,height=0];\n")
534 f
.write
"s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|"
536 f.write "{i.to_s.escape_to_dot}\\l
"
540 if s.core.has(i) then continue
541 f.write "{i.to_s.escape_to_dot}\\l
"
546 for t
, a
in s
.guarded_reduce
do
551 else if s
.shifts
.has
(t
) then
552 f
.write
",color=orange"
558 f
.write
",color=blue"
563 f
.write
"s{s.number} -> s{t.to.number} [label=\"{t.elem.to_s.escape_to_dot}\
"];\n"
570 # Generate the parser of the automaton
571 fun gen_to_nit
(filepath
: String, name
: String)
573 var gen
= new Generator
574 gen
.gen_to_nit
(self, name
)
575 var f
= new OFStream.open
(filepath
)
585 # escape string used in labels for graphviz
586 fun escape_to_dot
: String
588 return escape_more_to_c
("|\{\}")
592 private class Generator
593 var out
= new Array[String]
594 fun add
(s
: String) do out
.add
(s
)
595 fun gen_to_nit
(autom
: LRAutomaton, name
: String)
597 var states
= autom
.states
598 var gram
= autom
.grammar
600 add
"# Parser generated by nitcc for the grammar {name}"
601 add
"import nitcc_runtime"
603 add
"class Parser_{name}"
605 add
"\tredef fun start_state do return state_{states.first.cname}"
608 add
"redef class Object"
610 add
"\tprivate fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
612 for p
in gram
.prods
do
613 add
"\tprivate fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
617 add
"redef class NToken"
619 if not s
.need_guard
then continue
620 add
"\t# guarded action for state {s.name}"
621 add
"\t# {s.shifts.length} shift(s) and {s.reduces.length} reduce(s)"
622 add
"\tprivate fun action_s{s.cname}(parser: Parser) do"
623 if s
.reduces
.length
!= 1 then
624 add
"\t\tparser.parse_error"
626 gen_reduce_to_nit
(s
.reduces
.first
)
632 for t
in gram
.tokens
do
633 if t
.name
== "Eof" then
634 add
"redef class {t.cname}"
636 add
"class {t.cname}"
640 if not s
.need_guard
then continue
641 add
"\tredef fun action_s{s.cname}(parser) do"
642 gen_shift_to_nit
(s
, t
)
645 for s
in t
.reduces
do
646 if not s
.need_guard
then continue
647 if s
.reduces
.length
<= 1 then continue
648 add
"\tredef fun action_s{s.cname}(parser) do"
649 gen_reduce_to_nit
(s
.guarded_reduce
[t
].first
.alt
)
652 add
"\tredef fun node_name do return \"{t.name.escape_to_nit}\
""
656 add
"redef class LRGoto"
658 if s
.gotos
.length
<= 1 then continue
659 add
"\tprivate fun goto_s{s.cname}(parser: Parser) do abort"
663 for p
in gram
.prods
do
664 add
"class Goto_{p.cname}"
667 if s
.gotos
.length
<= 1 then continue
668 add
"\tredef fun goto_s{s.cname}(parser) do"
669 gen_goto_to_nit
(s
, p
)
675 var ps
= gram
.prods
.to_a
676 ps
.add_all
(gram
.ast_prods
)
678 if p
.spe
== null and not p
.altone
then
679 if p
.name
.has_suffix
("?") or p
.name
.has_suffix
("+") then continue
680 add
"class {p.acname}"
682 add
"\tredef fun node_name do return \"{p.name.escape_to_nit}\
""
686 var als
= p
.alts
.to_a
687 als
.add_all
(p
.ast_alts
)
689 if a
.trans
then continue
690 add
"class {a.cname}"
694 add
"\tsuper {p.acname}"
696 add
"\tredef fun node_name do return \"{a.name.escape_to_nit}\
""
697 var initarg
= new Array[String]
698 for i
in [0..a
.elems
.length
[ do
699 add
"\tvar n_{a.elemname(i)}: {a.elems[i].acname}"
700 initarg
.add
("n_{a.elemname(i)}: {a.elems[i].acname}")
702 if initarg
.is_empty
then
705 add
"\tinit({initarg.join(", ")}) do"
706 for i
in [0..a
.elems
.length
[ do
707 add
"\t\tself.n_{a.elemname(i)} = n_{a.elemname(i)}"
711 add
"\tredef fun number_of_children do return {a.elems.length}"
712 add
"\tredef fun child(i) do"
713 for i
in [0..a
.elems
.length
[ do
714 add
"\t\tif i == {i} then return n_{a.elemname(i)}"
723 add
"# State {s.name}"
724 add
"private class LRState{s.cname}"
725 add
"\tsuper LRState"
727 add
"\tredef fun to_s do return \"{s.name.escape_to_nit}\
""
729 var err
= new Array[String]
732 if e
isa Production then err
.add e
.name
734 if err
.is_empty
then for t
in s
.outs
do
736 if e
isa Token then err
.add e
.name
739 add
"\tredef fun error_msg do return \"{err.join(", ").escape_to_nit}\
""
741 add
"\tredef fun action(parser) do"
743 add
"\t\tparser.peek_token.action_s{s.cname}(parser)"
744 else if s
.reduces
.length
== 1 then
745 gen_reduce_to_nit
(s
.reduces
.first
)
751 if not s
.gotos
.is_empty
then
752 add
"\tredef fun goto(parser, goto) do"
753 if s
.gotos
.length
> 1 then
754 add
"\t\tgoto.goto_s{s.cname}(parser)"
756 gen_goto_to_nit
(s
, s
.gotos
.first
)
767 fun gen_shift_to_nit
(s
: LRState, t
: Token)
769 var dest
= s
.trans
(t
)
770 add
"\t\tparser.shift(state_{dest.cname})"
774 fun gen_goto_to_nit
(s
: LRState, p
: Production)
776 var dest
= s
.trans
(p
)
777 add
"\t\tparser.push(state_{dest.cname})"
780 fun gen_reduce_to_nit
(alt
: Alternative)
782 add
"\t\t# REDUCE {alt}"
783 var i
= alt
.elems
.length
- 1
784 for e
in alt
.elems
.to_a
.reversed
do
785 add
"\t\tvar n{i} = parser.pop.as({e.acname})"
789 if alt
.name
.has_suffix
("+_more") then
790 add
"\t\tvar prod = n0"
791 add
"\t\tn0.items.add(n1)"
792 else if alt
.name
.has_suffix
("+_one") then
793 add
"\t\tvar prod = new {alt.prod.acname}"
794 add
"\t\tprod.items.add(n0)"
799 var st
= new Array[String]
800 for c
in alt
.codes
.as(not null) do
801 if c
isa CodePop then
804 else if c
isa CodeNull then
806 else if c
isa CodeNew then
809 var from
= st
.length
- calt
.elems
.length
810 var args
= new List[String]
811 for j
in [from
..st
.length
[ do
815 if args
.is_empty
then
816 add
"\t\tvar p{cpt} = new {calt.cname}"
818 add
"\t\tvar p{cpt} = new {calt.cname}({args.join(", ")})"
821 #for j in [from..st.length[ do
822 #if st[j] == "null" then continue
823 #add "\t\tp{cpt}.n_{calt.elemname(x)} = {st[j]}"
829 assert st
.length
== 1
830 add
"\t\tvar prod = {st.first}"
833 add
"\t\tparser.node_stack.push prod"
834 if alt
.prod
.accept
then
835 add
"\t\tparser.stop = true"
837 add
"\t\tparser.goto(goto_{alt.prod.cname})"
842 # A state in a LR automaton
844 # name of the automaton (short part from the start)
848 fun cname
: String do return name
.to_cmangle
854 var items
= new HashSet[Item]
856 # Set of items only in the core
857 var core
= new HashSet[Item]
859 # Outgoing transitions
860 var ins
= new Array[LRTransition]
862 # Ingoing transitions
863 var outs
= new Array[LRTransition]
866 fun trans
(e
: Element): nullable LRState
868 for t
in outs
do if t
.elem
== e
then return t
.to
872 redef fun ==(o
) do return o
isa LRState and core
== o
.core
873 redef fun hash
do return items
.length
875 redef fun to_s
do return items
.join
(" ; ")
877 # Add and item in the core
878 fun add
(i
: Item): Bool
880 #if items.has(i) then return
882 #print "add {i} in {inspect}={self}"
885 if i
.alt
== i2
.alt
and i
.pos
== i2
.pos
then
886 if i2
.future
.has_all
(i
.future
) then return false
887 i2
.future
.add_all
(i
.future
)
895 if i
.pos
> 0 or i
.alt
.prod
.accept
then core
.add
(i
)
900 # Recusively extends item outside the core
904 if e
== null then return
905 if not e
isa Production then return
907 for i2
in e
.start_state
do
908 i2
.future
.add_all
(la
)
909 if add
(i2
) then extends
(i2
)
913 # Set of all reductions
914 var reduces
= new ArraySet[Alternative]
916 var shifts
= new ArraySet[Token]
918 var gotos
= new ArraySet[Production]
919 # Reduction guarded by tokens
920 var guarded_reduce
= new HashMap[Token, Array[Item]]
921 # Shitfs guarded by tokens
922 var guarded_shift
= new HashMap[Token, Array[Item]]
924 # Does the state need a guard to perform an action?
925 fun need_guard
: Bool do return not shifts
.is_empty
or reduces
.length
> 1
928 fun is_lr0
: Bool do return reduces
.length
<= 1 and shifts
.is_empty
or reduces
.is_empty
930 # compute guards and conflicts
933 for i
in items
.to_a
do
941 #for t in i.alt.prod.afters do
944 if guarded_reduce
.has_key
(t
) then
945 guarded_reduce
[t
].add
(i
)
947 guarded_reduce
[t
] = [i
]
950 else if n
isa Token then
953 if guarded_shift
.has_key
(n
) then
954 guarded_shift
[n
].add
(i
)
956 guarded_shift
[n
] = [i
]
958 else if n
isa Production then
965 for t
, a
in guarded_reduce
do
967 print
"REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
968 for i
in a
do print
"\t{i}"
969 else if guarded_shift
.has_key
(t
) then
970 print
"SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
971 for i
in a
do print
"\t{i}"
972 for i
in guarded_shift
[t
] do print
"\t{i}"
978 # A transition in a LR automaton
982 # The testination state
984 # The element labeling the transition
988 # A alternative with a cursor (dot) and possibly a future
992 # The dot index (0 means before the first element)
994 # The possible future
995 var future
= new ArraySet[Token]
997 redef fun ==(o
) do return o
isa Item and alt
== o
.alt
and pos
== o
.pos
998 redef fun hash
do return alt
.hash
+ pos
1003 b
.append
("{alt.prod.name}::{alt.name}=")
1004 for i
in [0..alt
.elems
.length
[
1006 if pos
== i
then b
.append
(".") else b
.append
(" ")
1007 b
.append
(alt
.elems
[i
].to_s
)
1009 if pos
== alt
.elems
.length
then b
.append
(".")
1010 if not future
.is_empty
then
1012 b
.append
(future
.join
(" "))
1018 # The element thatr follow the dot, null if the fdot is at the end
1019 fun next
: nullable Element
1021 if pos
>= alt
.elems
.length
then return null
1022 return alt
.elems
[pos
]
1026 fun lookahead
: Set[Token]
1028 var res
= new HashSet[Token]
1030 while p
< alt
.elems
.length
do
1031 var e
= alt
.elems
[p
]
1035 else if e
isa Production then
1036 res
.add_all
(e
.firsts
)
1037 if not e
.is_nullable
then break
1041 if p
>= alt
.elems
.length
then
1047 # The item that advanced once
1050 var res
= new Item(alt
, pos
+1)
1051 res
.future
.add_all
(future
)