# See the License for the specific language governing permissions and
# limitations under the License.
-# A gramar describing a language
+# A grammar describing a language
class Gram
- # The productions (non-terminal) of the conctete grammar
+ # The productions (non-terminal) of the concrete grammar
var prods = new Array[Production]
- # The additionnal abstract productions of the grammar
+ # The additional abstract productions of the grammar
# TODO clean AST
var ast_prods = new Array[Production]
# Dump of the concrete grammar and the transformations
fun pretty: String
do
- var res = new Buffer
+ var res = new FlatBuffer
for p in prods do
if p.spe != null then
res.append("{p.name} \{-> {p.spe.name}\}=\n")
else
res.append("{p.name} =\n")
end
- var last = p.alts.last
+ var last = null
+ if not p.alts.is_empty then last = p.alts.last
for a in p.alts do
res.append("\t\{{a.name}:\} {a.elems.join(" ")}")
if a.codes == null then a.make_codes
if a == last then res.append(" ;\n") else res.append(" |\n")
end
if p.is_nullable then res.append "\t// is nullable\n"
- if not p.firsts.is_empty then res.append "\t// firsts: {p.firsts.join(" ")}\n"
- if not p.afters.is_empty then res.append "\t// afters: {p.afters.join(" ")}\n"
+ if not p.firsts.is_empty then
+ res.append "\t// firsts:\n"
+ for x in p.firsts do res.append "\t// {x}\n"
+ end
+ if not p.afters.is_empty then
+ res.append "\t// afters:\n"
+ for x in p.afters do res.append "\t// {x}\n"
+ end
end
return res.to_s
end
- # Inline (ie. remove from the conctete grammar) some production
+ # Inline (ie. remove from the concrete grammar) some production
# REQUIRE: no circular production in `prods`
fun inline(prods: Collection[Production])
do
for p in self.prods do
for a in p.alts.to_a do
+ if a.phony then continue
var to_inline = false
for e in a.elems do
if e isa Production and prods.has(e) then to_inline = true
end
pool2.clear
for a2 in e.alts do
+ if a.phony then continue
if a2.codes == null then a2.make_codes
for x in pool do
var name = a.name + "_" + pool2.length.to_s
# Compute a LR automaton
fun lr0: LRAutomaton
do
- analyse
-
var start = new Production("_start")
start.accept = true
var eof = new Token("Eof")
tokens.add(eof)
start.new_alt("Start", self.prods.first, eof)
prods.add(start)
+
+ analyse
+
var first = new LRState("Start")
first.number = 0
for i in start.start_state do first.add(i)
#print "#states: {seen.length}"
+ # Look for an existing LR0 state in the automaton
var new_state = true
for n in seen do
if n == next then
- for i in next.items do
- if n.add(i) then n.extends(i)
- end
next = n
new_state = false
break
end
end
+ # If not, add it to the pool and the todo-list
if new_state then
next.number = seen.length
assert not seen.has(next)
todo.add(next)
end
+ # Add the transition
var t = new LRTransition(state, next, e)
state.outs.add t
next.ins.add t
for p in prods do
if p.is_nullable then continue
for a in p.alts do
+ if a.phony then continue
var nullabl = true
for e in a.elems do
if e isa Token then
for p in prods do
var fs = p.firsts
for a in p.alts do
- for e in a.elems do
+ if a.phony then continue
+ var i = a.first_item
+ loop
+ var e = i.next
+ if e == null then break
if e isa Token then
- if try_add(fs, e) then changed = true
+ if try_add(fs, i) then changed = true
break
else if e isa Production then
for t in e.firsts do
else
abort
end
+ i = i.avance
end
end
end
var changed = false
for p1 in prods do
for a in p1.alts do
+ if a.phony then continue
var p0: nullable Production = null
- for e in a.elems do
+ var i = a.first_item
+ loop
+ var e = i.next
+ if e == null then break
var p = p0
if e isa Production then p0 = e else p0 = null
- if p == null then continue
+ if p == null then
+ i = i.avance
+ continue
+ end
var afs = p.afters
if e isa Token then
- if try_add(afs, e) then changed = true
+ if try_add(afs, i) then changed = true
else if e isa Production then
for t in e.firsts do
if try_add(afs, t) then changed = true
else
abort
end
+ i = i.avance
end
if p0 != null then
var afs = p0.afters
end
# used by `analyse`
- private fun try_add(set: Set[Token], t: Token): Bool
+ private fun try_add(set: Set[Item], t: Item): Bool
do
var res = not set.has(t)
if res then set.add(t)
# The alternative of the production
var alts = new Array[Alternative]
- # Additionnal alternatives in the AST
+ # Additional alternatives in the AST
var ast_alts = new Array[Alternative]
# Is self the accept production
var accept = false
# Is self transformed to a other production for the AST
- # FIXME: cleaup AST
- var spe: nullable Production writable = null
+ # FIXME: cleanup AST
+ var spe: nullable Production = null is writable
# Is self contains only a single alternative (then no need for a abstract production class in the AST)
# FIXME cleanup AST
- var altone writable = false
+ var altone = false is writable
# The cname of the class in the AST
# FIXME: cleanup AST
# Is the production nullable
var is_nullable = false
+
# The first tokens of the production
- var firsts = new HashSet[Token]
+ var firsts = new HashSet[Item]
+
# The tokens that may follows the production (as in SLR)
- var afters = new HashSet[Token]
+ var afters = new HashSet[Item]
# Crate a new empty alternative
do
var res = new Array[Item]
for a in alts do
- res.add new Item(a, 0)
+ if a.phony then continue
+ res.add a.first_item
end
return res
end
var prod: Production
# The name of the alternative
- var name: String writable
+ var name: String is writable
# The elements of the alternative
var elems: Array[Element]
+ # The first item of the alternative
+ var first_item = new Item(self, 0)
+
# The name of the elements
var elems_names = new Array[nullable String]
end
# The code for the reduction
- var codes: nullable Array[Code] writable = null
+ var codes: nullable Array[Code] = null is writable
# Is the alternative transformed (ie not in the AST)
- var trans writable = false
+ var trans = false is writable
+
+ # Is the alternative unparsable? (ie not in the automaton)
+ var phony = false is writable
- # Imitialize codes with the elements
+ # Initialize codes with the elements
fun make_codes
do
if codes != null then return
end
end
-# A step in the construction of the AST. used to modelize transformations
+# A step in the construction of the AST.
+# Used to model transformations
interface Code
end
+
# Get a element from the stack
class CodePop
super Code
redef fun to_s do return "pop"
end
-# Allocate a new AST node for an alternative using the correct number of poped elements
+
+# Allocate a new AST node for an alternative using the correct number of popped elements
class CodeNew
super Code
+
+ # The associated alternative
var alt: Alternative
+
redef fun to_s do return "New {alt.name}/{alt.elems.length}"
end
+
# Get null
class CodeNull
super Code
# The mangled name of the element
fun cname: String do return "N{name.to_cmangle}"
- # the name of the class in the AST
+ # The name of the class in the AST
fun acname: String do
var res = acname_cache
if res == null then
end
return res
end
+
+ # The name of the class in the AST
fun acname=(s: String) do acname_cache = s
end
# A terminal element
class Token
super Element
- # States of the LR automatio that shift on self
+ # States of the LR automaton that shift on self
var shifts = new ArraySet[LRState]
- # States of the LR automatio that reduce on self in the lookahead(1)
+ # States of the LR automaton that reduce on self in the lookahead(1)
var reduces = new ArraySet[LRState]
end
res.add "\tCORE\n"
for i in s.core do
res.add "\t\t{i}\n"
+ if i.next != null then continue
+ for i2 in s.lookahead(i) do
+ res.add "\t\t\t{i2}\n"
+ end
end
res.add "\tOTHER ITEMS\n"
for i in s.items do
if s.core.has(i) then continue
res.add "\t\t{i}\n"
+ if i.next != null then continue
+ for i2 in s.lookahead(i) do
+ res.add "\t\t\t{i2}\n"
+ end
end
res.add "\tTRANSITIONS {s.outs.length}\n"
for t in s.outs do
if s.is_lr0 then
res.add "\t\tSTATE LR0\n"
else
- res.add "\t\tSTATE LALR\n"
+ res.add "\t\tSTATE SLR\n"
for t, a in s.guarded_reduce do
if a.length > 1 then
res.add "\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
# Generate a graphviz file of the automaton
fun to_dot(path: String)
do
- var f = new OFStream.open(path)
+ var f = new FileWriter.open(path)
f.write("digraph g \{\n")
f.write("rankdir=LR;\n")
f.write("node[shape=Mrecord,height=0];\n")
end
# Generate the parser of the automaton
- fun gen_to_nit(filepath: String)
+ fun gen_to_nit(filepath: String, name: String)
do
var gen = new Generator
- gen.gen_to_nit(self)
- var f = new OFStream.open(filepath)
+ gen.gen_to_nit(self, name)
+ var f = new FileWriter.open(filepath)
for s in gen.out do
f.write(s)
f.write("\n")
end
end
-redef class String
- # escape string used in labels for graphviz
- fun escape_to_dot: String
- do
- return escape_more_to_c("|\{\}")
- end
-end
-
private class Generator
var out = new Array[String]
fun add(s: String) do out.add(s)
- fun gen_to_nit(autom: LRAutomaton)
+ fun gen_to_nit(autom: LRAutomaton, name: String)
do
var states = autom.states
var gram = autom.grammar
- add "# Parser generated by nitcc"
+ add "# Parser generated by nitcc for the grammar {name}"
+ add "module {name}_parser is no_warning(\"missing-doc\",\"old-init\")"
add "import nitcc_runtime"
- add "class MyParser"
+ add "class Parser_{name}"
add "\tsuper Parser"
add "\tredef fun start_state do return state_{states.first.cname}"
add "end"
- add "redef class Object"
for s in states do
- add "\tprivate fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
+ add "private fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
end
for p in gram.prods do
- add "\tprivate fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
+ add "private fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
end
- add "end"
add "redef class NToken"
for s in states do
if alt.name.has_suffix("+_more") then
add "\t\tvar prod = n0"
- add "\t\tn0.items.add(n1)"
+ add "\t\tn0.children.add(n1)"
else if alt.name.has_suffix("+_one") then
add "\t\tvar prod = new {alt.prod.acname}"
- add "\t\tprod.items.add(n0)"
+ add "\t\tprod.children.add(n0)"
else
alt.make_codes
var cpt = 0
# A state in a LR automaton
class LRState
- # name of the automaton (short part from the start)
+ # Name of the automaton (short part from the start)
var name: String
- # malglen name
+ # Mangled name
fun cname: String do return name.to_cmangle
- # number
+ # Number
var number: Int = -1
# Set of all items
# Ingoing transitions
var outs = new Array[LRTransition]
- # trans function
+ # Trans function
fun trans(e: Element): nullable LRState
do
for t in outs do if t.elem == e then return t.to
# Add and item in the core
fun add(i: Item): Bool
do
- #if items.has(i) then return
-
- #print "add {i} in {inspect}={self}"
- var found = false
- for i2 in items do
- if i.alt == i2.alt and i.pos == i2.pos then
- if i2.future.has_all(i.future) then return false
- i2.future.add_all(i.future)
- found = true
+ if items.has(i) then return false
- break
- end
- end
- if not found then
- items.add(i)
- if i.pos > 0 or i.alt.prod.accept then core.add(i)
- end
+ items.add(i)
+ if i.pos > 0 or i.alt.prod.accept then core.add(i)
return true
end
- # Recusively extends item outside the core
+ # Recursively extends item outside the core
fun extends(i: Item)
do
var e = i.next
if e == null then return
if not e isa Production then return
- var la = i.lookahead
for i2 in e.start_state do
- i2.future.add_all(la)
if add(i2) then extends(i2)
end
end
+ # SLR lookahead
+ fun lookahead(i: Item): Set[Item]
+ do
+ return i.alt.prod.afters
+ end
+
# Set of all reductions
var reduces = new ArraySet[Alternative]
# Set of all shifts
# Set of all goto
var gotos = new ArraySet[Production]
# Reduction guarded by tokens
- var guarded_reduce = new HashMap[Token, Array[Item]]
- # Shitfs guarded by tokens
- var guarded_shift = new HashMap[Token, Array[Item]]
+ var guarded_reduce = new HashMap[Token, Set[Item]]
+ # Shifts guarded by tokens
+ var guarded_shift = new HashMap[Token, Set[Item]]
# Does the state need a guard to perform an action?
fun need_guard: Bool do return not shifts.is_empty or reduces.length > 1
- # Is the state LR0
+ # Is the state LR0?
fun is_lr0: Bool do return reduces.length <= 1 and shifts.is_empty or reduces.is_empty
- # compute guards and conflicts
+ # Compute guards and conflicts
fun analysis
do
+ # Extends the core
for i in items.to_a do
extends(i)
end
+ # Collect action and conflicts
for i in items do
var n = i.next
if n == null then
reduces.add(i.alt)
- #for t in i.alt.prod.afters do
- for t in i.future do
+ for i2 in lookahead(i) do
+ var t = i2.next
+ assert t isa Token
t.reduces.add(self)
- if guarded_reduce.has_key(t) then
- guarded_reduce[t].add(i)
- else
- guarded_reduce[t] = [i]
+ if not guarded_reduce.has_key(t) then
+ guarded_reduce[t] = new ArraySet[Item]
end
+ guarded_reduce[t].add(i)
end
else if n isa Token then
shifts.add(n)
n.shifts.add(self)
- if guarded_shift.has_key(n) then
- guarded_shift[n].add(i)
- else
- guarded_shift[n] = [i]
+ if not guarded_shift.has_key(n) then
+ guarded_shift[n] = new ArraySet[Item]
end
+ guarded_shift[n].add(i)
else if n isa Production then
gotos.add(n)
n.gotos.add(self)
abort
end
end
+ # Token to remove as reduction guard to solve S/R conflicts
+ var removed_reduces = new Array[Token]
for t, a in guarded_reduce do
if a.length > 1 then
print "REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
- for i in a do print "\t{i}"
- else if guarded_shift.has_key(t) then
- print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
- for i in a do print "\t{i}"
- for i in guarded_shift[t] do print "\t{i}"
+ for i in a do print "\treduce: {i}"
+ end
+ if guarded_shift.has_key(t) then
+ var ri = a.first
+ var confs = new Array[Item]
+ var ress = new Array[String]
+ var g = guarded_shift[t]
+ for si in lookahead(ri) do
+ if si.next != t then continue
+ if not g.has(si) then
+ confs.add(si)
+ continue
+ end
+ var p = ri.alt.prod
+ var csi: nullable Item = null
+ for bsi in back_expand(si) do
+ if bsi.alt.prod == p then
+ csi = bsi
+ break
+ end
+ end
+ if csi == null then
+ confs.add(si)
+ continue
+ end
+ ress.add "\tshift: {si}"
+ if si != csi then
+ ress.add "\tcore shift: {csi}"
+ end
+ end
+ if confs.is_empty then
+ print "Automatic Dangling on state {self.number} {self.name} for token {t}:"
+ print "\treduce: {ri}"
+ for r in ress do print r
+ removed_reduces.add t
+ else
+ print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
+ print "\treduce: {ri}"
+ for i in guarded_shift[t] do print "\tshift: {i}"
+ removed_reduces.add t
+ end
end
end
+ for t in removed_reduces do
+ guarded_reduce.keys.remove(t)
+ t.reduces.remove(self)
+ end
+ end
+
+ # Return `i` and all other items of the state that expands, directly or indirectly, to `i`
+ fun back_expand(i: Item): Set[Item]
+ do
+ var res = new ArraySet[Item]
+ var todo = [i]
+ res.add(i)
+ while not todo.is_empty do
+ var x = todo.pop
+ if x.pos > 0 then continue
+ var p = x.alt.prod
+ for y in items do
+ if y.next != p then continue
+ if res.has(y) then continue
+ res.add(y)
+ todo.add(y)
+ end
+ end
+ return res
end
end
class LRTransition
# The origin state
var from: LRState
- # The testination state
+ # The destination state
var to: LRState
# The element labeling the transition
var elem: Element
end
-# A alternative with a cursor (dot) and possibly a future
+# A alternative with a cursor (dot) before an element
class Item
# The alternative
var alt: Alternative
# The dot index (0 means before the first element)
var pos: Int
- # The possible future
- var future = new ArraySet[Token]
redef fun ==(o) do return o isa Item and alt == o.alt and pos == o.pos
redef fun hash do return alt.hash + pos
redef fun to_s
do
- var b = new Buffer
+ var b = new FlatBuffer
b.append("{alt.prod.name}::{alt.name}=")
for i in [0..alt.elems.length[
do
b.append(alt.elems[i].to_s)
end
if pos == alt.elems.length then b.append(".")
- if not future.is_empty then
- b.append("/\{")
- b.append(future.join(" "))
- b.append("\}")
- end
return b.to_s
end
- # The element thatr follow the dot, null if the fdot is at the end
+ # The element that follows the dot, null if the dot is at the end
fun next: nullable Element
do
if pos >= alt.elems.length then return null
return alt.elems[pos]
end
- # SLR loohahead
+ # SLR lookahead
fun lookahead: Set[Token]
do
var res = new HashSet[Token]
res.add(e)
break
else if e isa Production then
- res.add_all(e.firsts)
+ #res.add_all(e.firsts)
if not e.is_nullable then break
end
p += 1
end
- if p >= alt.elems.length then
- res.add_all(future)
- end
return res
end
fun avance: Item
do
var res = new Item(alt, pos+1)
- res.future.add_all(future)
return res
end
end