lib/standard/string: Moved escape_to_dot from nitcc to standard/string.nit
[nit.git] / contrib / nitcc / src / grammar.nit
index 7e2150a..620b0a0 100644 (file)
@@ -27,14 +27,15 @@ class Gram
        # Dump of the concrete grammar and the transformations
        fun pretty: String
        do
-               var res = new Buffer
+               var res = new FlatBuffer
                for p in prods do
                        if p.spe != null then
                                res.append("{p.name} \{-> {p.spe.name}\}=\n")
                        else
                                res.append("{p.name} =\n")
                        end
-                       var last = p.alts.last
+                       var last = null
+                       if not p.alts.is_empty then p.alts.last
                        for a in p.alts do
                                res.append("\t\{{a.name}:\} {a.elems.join(" ")}")
                                if a.codes == null then a.make_codes
@@ -42,8 +43,14 @@ class Gram
                                if a == last then res.append(" ;\n") else res.append(" |\n")
                        end
                        if p.is_nullable then res.append "\t// is nullable\n"
-                       if not p.firsts.is_empty then res.append "\t// firsts: {p.firsts.join(" ")}\n"
-                       if not p.afters.is_empty then res.append "\t// afters: {p.afters.join(" ")}\n"
+                       if not p.firsts.is_empty then
+                               res.append "\t// firsts:\n"
+                               for x in p.firsts do res.append "\t//   {x}\n"
+                       end
+                       if not p.afters.is_empty then
+                               res.append "\t// afters:\n"
+                               for x in p.afters do res.append "\t//   {x}\n"
+                       end
                end
                return res.to_s
        end
@@ -54,6 +61,7 @@ class Gram
        do
                for p in self.prods do
                        for a in p.alts.to_a do
+                               if a.phony then continue
                                var to_inline = false
                                for e in a.elems do
                                        if e isa Production and prods.has(e) then to_inline = true
@@ -81,6 +89,7 @@ class Gram
                                        end
                                        pool2.clear
                                        for a2 in e.alts do
+                                               if a.phony then continue
                                                if a2.codes == null then a2.make_codes
                                                for x in pool do
                                                        var name = a.name + "_" + pool2.length.to_s
@@ -200,6 +209,7 @@ class Gram
                        for p in prods do
                                if p.is_nullable then continue
                                for a in p.alts do
+                                       if a.phony then continue
                                        var nullabl = true
                                        for e in a.elems do
                                                if e isa Token then
@@ -226,9 +236,13 @@ class Gram
                        for p in prods do
                                var fs = p.firsts
                                for a in p.alts do
-                                       for e in a.elems do
+                                       if a.phony then continue
+                                       var i = a.first_item
+                                       loop
+                                               var e = i.next
+                                               if e == null then break
                                                if e isa Token then
-                                                       if try_add(fs, e) then changed = true
+                                                       if try_add(fs, i) then changed = true
                                                        break
                                                else if e isa Production then
                                                        for t in e.firsts do
@@ -238,6 +252,7 @@ class Gram
                                                else
                                                        abort
                                                end
+                                               i = i.avance
                                        end
                                end
                        end
@@ -248,16 +263,23 @@ class Gram
                        var changed = false
                        for p1 in prods do
                                for a in p1.alts do
+                                       if a.phony then continue
                                        var p0: nullable Production = null
-                                       for e in a.elems do
+                                       var i = a.first_item
+                                       loop
+                                               var e = i.next
+                                               if e == null then break
                                                var p = p0
                                                if e isa Production then p0 = e else p0 = null
-                                               if p == null then continue
+                                               if p == null then
+                                                       i = i.avance
+                                                       continue
+                                               end
 
                                                var afs = p.afters
 
                                                if e isa Token then
-                                                       if try_add(afs, e) then changed = true
+                                                       if try_add(afs, i) then changed = true
                                                else if e isa Production then
                                                        for t in e.firsts do
                                                                if try_add(afs, t) then changed = true
@@ -270,6 +292,7 @@ class Gram
                                                else
                                                        abort
                                                end
+                                               i = i.avance
                                        end
                                        if p0 != null then
                                                var afs = p0.afters
@@ -284,7 +307,7 @@ class Gram
        end
 
        # used by `analyse`
-       private fun try_add(set: Set[Token], t: Token): Bool
+       private fun try_add(set: Set[Item], t: Item): Bool
        do
                var res = not set.has(t)
                if res then set.add(t)
@@ -307,11 +330,11 @@ class Production
 
        # Is self transformed to a other production for the AST
        # FIXME: cleaup AST
-       var spe: nullable Production writable = null
+       var spe: nullable Production = null is writable
 
        # Is self contains only a single alternative (then no need for a abstract production class in the AST)
        # FIXME cleanup AST
-       var altone writable = false
+       var altone = false is writable
 
        # The cname of the class in the AST
        # FIXME: cleanup AST
@@ -323,9 +346,9 @@ class Production
        # Is the production nullable
        var is_nullable = false
        # The first tokens of the production
-       var firsts = new HashSet[Token]
+       var firsts = new HashSet[Item]
        # The tokens that may follows the production (as in SLR)
-       var afters = new HashSet[Token]
+       var afters = new HashSet[Item]
 
 
        # Crate a new empty alternative
@@ -357,7 +380,8 @@ class Production
        do
                var res = new Array[Item]
                for a in alts do
-                       res.add new Item(a, 0)
+                       if a.phony then continue
+                       res.add a.first_item
                end
                return res
        end
@@ -372,11 +396,14 @@ class Alternative
        var prod: Production
 
        # The name of the alternative
-       var name: String writable
+       var name: String is writable
 
        # The elements of the alternative
        var elems: Array[Element]
 
+       # The first item of the alternative
+       var first_item = new Item(self, 0)
+
        # The name of the elements
        var elems_names = new Array[nullable String]
 
@@ -397,10 +424,13 @@ class Alternative
        end
 
        # The code for the reduction
-       var codes: nullable Array[Code] writable = null
+       var codes: nullable Array[Code] = null is writable
 
        # Is the alternative transformed (ie not in the AST)
-       var trans writable = false
+       var trans = false is writable
+
+       # Is the alternative unparsable? (ie not in the automaton)
+       var phony = false is writable
 
        # Imitialize codes with the elements
        fun make_codes
@@ -487,12 +517,20 @@ class LRAutomaton
                        res.add "s{s.number} {s.name}\n"
                        res.add "\tCORE\n"
                        for i in s.core do
-                               res.add "\t\t{s.item_to_s(i)}\n"
+                               res.add "\t\t{i}\n"
+                               if i.next != null then continue
+                               for i2 in s.lookahead(i) do
+                                       res.add "\t\t\t{i2}\n"
+                               end
                        end
                        res.add "\tOTHER ITEMS\n"
                        for i in s.items do
                                if s.core.has(i) then continue
-                               res.add "\t\t{s.item_to_s(i)}\n"
+                               res.add "\t\t{i}\n"
+                               if i.next != null then continue
+                               for i2 in s.lookahead(i) do
+                                       res.add "\t\t\t{i2}\n"
+                               end
                        end
                        res.add "\tTRANSITIONS {s.outs.length}\n"
                        for t in s.outs do
@@ -534,12 +572,12 @@ class LRAutomaton
                for s in states do
                        f.write "s{s.number} [label=\"{s.number} {s.name.escape_to_dot}|"
                        for i in s.core do
-                               f.write "{s.item_to_s(i).escape_to_dot}\\l"
+                               f.write "{i.to_s.escape_to_dot}\\l"
                        end
                        f.write("|")
                        for i in s.items do
                                if s.core.has(i) then continue
-                               f.write "{s.item_to_s(i).escape_to_dot}\\l"
+                               f.write "{i.to_s.escape_to_dot}\\l"
                        end
                        f.write "\""
                        if not s.is_lr0 then
@@ -582,14 +620,6 @@ class LRAutomaton
        end
 end
 
-redef class String
-       # escape string used in labels for graphviz
-       fun escape_to_dot: String
-       do
-               return escape_more_to_c("|\{\}")
-       end
-end
-
 private class Generator
        var out = new Array[String]
        fun add(s: String) do out.add(s)
@@ -897,18 +927,11 @@ class LRState
        end
 
        # SLR lookahead
-       fun lookahead(i: Item): Set[Token]
+       fun lookahead(i: Item): Set[Item]
        do
                return i.alt.prod.afters
        end
 
-       fun item_to_s(i: Item): String
-       do
-               var l = lookahead(i)
-               if l.is_empty then return i.to_s
-               return "{i} \{ {l.join(" ")} \}"
-       end
-
        # Set of all reductions
        var reduces = new ArraySet[Alternative]
        # Set of all shifts
@@ -916,9 +939,9 @@ class LRState
        # Set of all goto
        var gotos = new ArraySet[Production]
        # Reduction guarded by tokens
-       var guarded_reduce = new HashMap[Token, Array[Item]]
+       var guarded_reduce = new HashMap[Token, Set[Item]]
        # Shitfs guarded by tokens
-       var guarded_shift = new HashMap[Token, Array[Item]]
+       var guarded_shift = new HashMap[Token, Set[Item]]
 
        # Does the state need a guard to perform an action?
        fun need_guard: Bool do return not shifts.is_empty or reduces.length > 1
@@ -939,22 +962,22 @@ class LRState
                        var n = i.next
                        if n == null then
                                reduces.add(i.alt)
-                               for t in lookahead(i) do
+                               for i2 in lookahead(i) do
+                                       var t = i2.next
+                                       assert t isa Token
                                        t.reduces.add(self)
-                                       if guarded_reduce.has_key(t) then
-                                               guarded_reduce[t].add(i)
-                                       else
-                                               guarded_reduce[t] = [i]
+                                       if not guarded_reduce.has_key(t) then
+                                               guarded_reduce[t] = new ArraySet[Item]
                                        end
+                                       guarded_reduce[t].add(i)
                                end
                        else if n isa Token then
                                shifts.add(n)
                                n.shifts.add(self)
-                               if guarded_shift.has_key(n) then
-                                       guarded_shift[n].add(i)
-                               else
-                                       guarded_shift[n] = [i]
+                               if not guarded_shift.has_key(n) then
+                                       guarded_shift[n] = new ArraySet[Item]
                                end
+                               guarded_shift[n].add(i)
                        else if n isa Production then
                                gotos.add(n)
                                n.gotos.add(self)
@@ -965,14 +988,68 @@ class LRState
                for t, a in guarded_reduce do
                        if a.length > 1 then
                                print "REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
-                               for i in a do print "\t{i}"
+                               for i in a do print "\treduce: {i}"
                        else if guarded_shift.has_key(t) then
-                               print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
-                               for i in a do print "\t{i}"
-                               for i in guarded_shift[t] do print "\t{i}"
+                               var ri = a.first
+                               var confs = new Array[Item]
+                               var ress = new Array[String]
+                               var g = guarded_shift[t]
+                               for si in lookahead(ri) do
+                                       if si.next != t then continue
+                                       if not g.has(si) then
+                                               confs.add(si)
+                                               continue
+                                       end
+                                       var p = ri.alt.prod
+                                       var csi: nullable Item = null
+                                       for bsi in back_expand(si) do
+                                               if bsi.alt.prod == p then
+                                                       csi = bsi
+                                                       break
+                                               end
+                                       end
+                                       if csi == null then
+                                               confs.add(si)
+                                               continue
+                                       end
+                                       ress.add "\tshift:  {si}"
+                                       if si != csi then
+                                               ress.add "\tcore shift: {csi}"
+                                       end
+                               end
+                               if confs.is_empty then
+                                       print "Automatic Dangling on state {self.number} {self.name} for token {t}:"
+                                       print "\treduce: {ri}"
+                                       for r in ress do print r
+                                       guarded_reduce.keys.remove(t)
+                               else
+                                       print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
+                                       print "\treduce: {ri}"
+                                       for i in guarded_shift[t] do print "\tshift:  {i}"
+                               end
                        end
                end
        end
+
+       # Return `i` and all other items of the state that expands, directly or undirectly, to `i`
+       fun back_expand(i: Item): Set[Item]
+       do
+               var res = new ArraySet[Item]
+               var todo = [i]
+               res.add(i)
+               while not todo.is_empty do
+                       var x = todo.pop
+                       if x.pos > 0 then continue
+                       var p = x.alt.prod
+                       for y in items do
+                               if y.next != p then continue
+                               if res.has(y) then continue
+                               res.add(y)
+                               todo.add(y)
+                       end
+               end
+               return res
+       end
 end
 
 # A transition in a LR automaton
@@ -997,7 +1074,7 @@ class Item
 
        redef fun to_s
        do
-               var b = new Buffer
+               var b = new FlatBuffer
                b.append("{alt.prod.name}::{alt.name}=")
                for i in [0..alt.elems.length[
                do
@@ -1026,7 +1103,7 @@ class Item
                                res.add(e)
                                break
                        else if e isa Production then
-                               res.add_all(e.firsts)
+                               #res.add_all(e.firsts)
                                if not e.is_nullable then break
                        end
                        p += 1