nitcc:: when removing a reduction in S/R, notify the token to not generate the reduce...

[nit.git] / contrib / nitcc / src / grammar.nit
diff --git a/contrib/nitcc/src/grammar.nit b/contrib/nitcc/src/grammar.nit

index 6cb7956..095072e 100644 (file)
--- a/contrib/nitcc/src/grammar.nit
+++ b/contrib/nitcc/src/grammar.nit
@@ -12,12 +12,12 @@
  # See the License for the specific language governing permissions and
  # limitations under the License.
  
-# A gramar describing a language
+# A grammar describing a language
  class Gram
-       # The productions (non-terminal) of the conctete grammar
+       # The productions (non-terminal) of the concrete grammar
         var prods = new Array[Production]
  
-       # The additionnal abstract productions of the grammar
+       # The additional abstract productions of the grammar
         # TODO clean AST
         var ast_prods = new Array[Production]
  
@@ -27,14 +27,15 @@ class Gram
         # Dump of the concrete grammar and the transformations
         fun pretty: String
         do
-               var res = new Buffer
+               var res = new FlatBuffer
                 for p in prods do
                         if p.spe != null then
                                 res.append("{p.name} \{-> {p.spe.name}\}=\n")
                         else
                                 res.append("{p.name} =\n")
                         end
-                       var last = p.alts.last
+                       var last = null
+                       if not p.alts.is_empty then last = p.alts.last
                         for a in p.alts do
                                 res.append("\t\{{a.name}:\} {a.elems.join(" ")}")
                                 if a.codes == null then a.make_codes
@@ -42,18 +43,25 @@ class Gram
                                 if a == last then res.append(" ;\n") else res.append(" |\n")
                         end
                         if p.is_nullable then res.append "\t// is nullable\n"
-                       if not p.firsts.is_empty then res.append "\t// firsts: {p.firsts.join(" ")}\n"
-                       if not p.afters.is_empty then res.append "\t// afters: {p.afters.join(" ")}\n"
+                       if not p.firsts.is_empty then
+                               res.append "\t// firsts:\n"
+                               for x in p.firsts do res.append "\t//   {x}\n"
+                       end
+                       if not p.afters.is_empty then
+                               res.append "\t// afters:\n"
+                               for x in p.afters do res.append "\t//   {x}\n"
+                       end
                 end
                 return res.to_s
         end
  
-       # Inline (ie. remove from the conctete grammar) some production
+       # Inline (ie. remove from the concrete grammar) some production
         # REQUIRE: no circular production in `prods`
         fun inline(prods: Collection[Production])
         do
                 for p in self.prods do
                         for a in p.alts.to_a do
+                               if a.phony then continue
                                 var to_inline = false
                                 for e in a.elems do
                                         if e isa Production and prods.has(e) then to_inline = true
@@ -81,6 +89,7 @@ class Gram
                                         end
                                         pool2.clear
                                         for a2 in e.alts do
+                                               if a.phony then continue
                                                 if a2.codes == null then a2.make_codes
                                                 for x in pool do
                                                         var name = a.name + "_" + pool2.length.to_s
@@ -115,14 +124,15 @@ class Gram
         # Compute a LR automaton
         fun lr0: LRAutomaton
         do
-               analyse
-
                 var start = new Production("_start")
                 start.accept = true
                 var eof = new Token("Eof")
                 tokens.add(eof)
                 start.new_alt("Start", self.prods.first, eof)
                 prods.add(start)
+
+               analyse
+
                 var first = new LRState("Start")
                 first.number = 0
                 for i in start.start_state do first.add(i)
@@ -164,18 +174,17 @@ class Gram
  
                                 #print "#states: {seen.length}"
  
+                               # Look for an existing LR0 state in the automaton
                                 var new_state = true
                                 for n in seen do
                                         if n == next then
-                                               for i in next.items do
-                                                       if n.add(i) then n.extends(i)
-                                               end
                                                 next = n
                                                 new_state = false
                                                 break
                                         end
                                 end
  
+                               # If not, add it to the pool and the todo-list
                                 if new_state then
                                         next.number = seen.length
                                         assert not seen.has(next)
@@ -183,6 +192,7 @@ class Gram
                                         todo.add(next)
                                 end
  
+                               # Add the transition
                                 var t = new LRTransition(state, next, e)
                                 state.outs.add t
                                 next.ins.add t
@@ -199,6 +209,7 @@ class Gram
                         for p in prods do
                                 if p.is_nullable then continue
                                 for a in p.alts do
+                                       if a.phony then continue
                                         var nullabl = true
                                         for e in a.elems do
                                                 if e isa Token then
@@ -225,9 +236,13 @@ class Gram
                         for p in prods do
                                 var fs = p.firsts
                                 for a in p.alts do
-                                       for e in a.elems do
+                                       if a.phony then continue
+                                       var i = a.first_item
+                                       loop
+                                               var e = i.next
+                                               if e == null then break
                                                 if e isa Token then
-                                                       if try_add(fs, e) then changed = true
+                                                       if try_add(fs, i) then changed = true
                                                         break
                                                 else if e isa Production then
                                                         for t in e.firsts do
@@ -237,6 +252,7 @@ class Gram
                                                 else
                                                         abort
                                                 end
+                                               i = i.avance
                                         end
                                 end
                         end
@@ -247,16 +263,23 @@ class Gram
                         var changed = false
                         for p1 in prods do
                                 for a in p1.alts do
+                                       if a.phony then continue
                                         var p0: nullable Production = null
-                                       for e in a.elems do
+                                       var i = a.first_item
+                                       loop
+                                               var e = i.next
+                                               if e == null then break
                                                 var p = p0
                                                 if e isa Production then p0 = e else p0 = null
-                                               if p == null then continue
+                                               if p == null then
+                                                       i = i.avance
+                                                       continue
+                                               end
  
                                                 var afs = p.afters
  
                                                 if e isa Token then
-                                                       if try_add(afs, e) then changed = true
+                                                       if try_add(afs, i) then changed = true
                                                 else if e isa Production then
                                                         for t in e.firsts do
                                                                 if try_add(afs, t) then changed = true
@@ -269,6 +292,7 @@ class Gram
                                                 else
                                                         abort
                                                 end
+                                               i = i.avance
                                         end
                                         if p0 != null then
                                                 var afs = p0.afters
@@ -283,7 +307,7 @@ class Gram
         end
  
         # used by `analyse`
-       private fun try_add(set: Set[Token], t: Token): Bool
+       private fun try_add(set: Set[Item], t: Item): Bool
         do
                 var res = not set.has(t)
                 if res then set.add(t)
@@ -298,19 +322,19 @@ class Production
         # The alternative of the production
         var alts = new Array[Alternative]
  
-       # Additionnal alternatives in the AST
+       # Additional alternatives in the AST
         var ast_alts = new Array[Alternative]
  
         # Is self the accept production
         var accept = false
  
         # Is self transformed to a other production for the AST
-       # FIXME: cleaup AST
-       var spe: nullable Production writable = null
+       # FIXME: cleanup AST
+       var spe: nullable Production = null is writable
  
         # Is self contains only a single alternative (then no need for a abstract production class in the AST)
         # FIXME cleanup AST
-       var altone writable = false
+       var altone = false is writable
  
         # The cname of the class in the AST
         # FIXME: cleanup AST
@@ -321,10 +345,12 @@ class Production
  
         # Is the production nullable
         var is_nullable = false
+
         # The first tokens of the production
-       var firsts = new HashSet[Token]
+       var firsts = new HashSet[Item]
+
         # The tokens that may follows the production (as in SLR)
-       var afters = new HashSet[Token]
+       var afters = new HashSet[Item]
  
  
         # Crate a new empty alternative
@@ -356,7 +382,8 @@ class Production
         do
                 var res = new Array[Item]
                 for a in alts do
-                       res.add new Item(a, 0)
+                       if a.phony then continue
+                       res.add a.first_item
                 end
                 return res
         end
@@ -371,11 +398,14 @@ class Alternative
         var prod: Production
  
         # The name of the alternative
-       var name: String writable
+       var name: String is writable
  
         # The elements of the alternative
         var elems: Array[Element]
  
+       # The first item of the alternative
+       var first_item = new Item(self, 0)
+
         # The name of the elements
         var elems_names = new Array[nullable String]
  
@@ -396,12 +426,15 @@ class Alternative
         end
  
         # The code for the reduction
-       var codes: nullable Array[Code] writable = null
+       var codes: nullable Array[Code] = null is writable
  
         # Is the alternative transformed (ie not in the AST)
-       var trans writable = false
+       var trans = false is writable
+
+       # Is the alternative unparsable? (ie not in the automaton)
+       var phony = false is writable
  
-       # Imitialize codes with the elements
+       # Initialize codes with the elements
         fun make_codes
         do
                 if codes != null then return
@@ -414,20 +447,27 @@ class Alternative
         end
  end
  
-# A step in the construction of the AST. used to modelize transformations
+# A step in the construction of the AST.
+# Used to model transformations
  interface Code
  end
+
  # Get a element from the stack
  class CodePop
         super Code
         redef fun to_s do return "pop"
  end
-# Allocate a new AST node for an alternative using the correct number of poped elements
+
+# Allocate a new AST node for an alternative using the correct number of popped elements
  class CodeNew
         super Code
+
+       # The associated alternative
         var alt: Alternative
+
         redef fun to_s do return "New {alt.name}/{alt.elems.length}"
  end
+
  # Get null
  class CodeNull
         super Code
@@ -446,7 +486,7 @@ abstract class Element
         # The mangled name of the element
         fun cname: String do return "N{name.to_cmangle}"
  
-       # the name of the class in the AST
+       # The name of the class in the AST
         fun acname: String do
                 var res = acname_cache
                 if res == null then
@@ -455,15 +495,17 @@ abstract class Element
                 end
                 return res
         end
+
+       # The name of the class in the AST
         fun acname=(s: String) do acname_cache = s
  end
  
  # A terminal element
  class Token
         super Element
-       # States of the LR automatio that shift on self
+       # States of the LR automaton that shift on self
         var shifts = new ArraySet[LRState]
-       # States of the LR automatio that reduce on self in the lookahead(1)
+       # States of the LR automaton that reduce on self in the lookahead(1)
         var reduces = new ArraySet[LRState]
  end
  
@@ -487,11 +529,19 @@ class LRAutomaton
                         res.add "\tCORE\n"
                         for i in s.core do
                                 res.add "\t\t{i}\n"
+                               if i.next != null then continue
+                               for i2 in s.lookahead(i) do
+                                       res.add "\t\t\t{i2}\n"
+                               end
                         end
                         res.add "\tOTHER ITEMS\n"
                         for i in s.items do
                                 if s.core.has(i) then continue
                                 res.add "\t\t{i}\n"
+                               if i.next != null then continue
+                               for i2 in s.lookahead(i) do
+                                       res.add "\t\t\t{i2}\n"
+                               end
                         end
                         res.add "\tTRANSITIONS {s.outs.length}\n"
                         for t in s.outs do
@@ -501,7 +551,7 @@ class LRAutomaton
                         if s.is_lr0 then
                                 res.add "\t\tSTATE LR0\n"
                         else
-                               res.add "\t\tSTATE LALR\n"
+                               res.add "\t\tSTATE SLR\n"
                                 for t, a in s.guarded_reduce do
                                         if a.length > 1 then
                                                 res.add "\t\t/!\\ REDUCE/REDUCE CONFLICT\n"
@@ -525,7 +575,7 @@ class LRAutomaton
         # Generate a graphviz file of the automaton
         fun to_dot(path: String)
         do
-               var f = new OFStream.open(path)
+               var f = new FileWriter.open(path)
                 f.write("digraph g \{\n")
                 f.write("rankdir=LR;\n")
                 f.write("node[shape=Mrecord,height=0];\n")
@@ -568,11 +618,11 @@ class LRAutomaton
         end
  
         # Generate the parser of the automaton
-       fun gen_to_nit(filepath: String)
+       fun gen_to_nit(filepath: String, name: String)
         do
                 var gen = new Generator
-               gen.gen_to_nit(self)
-               var f = new OFStream.open(filepath)
+               gen.gen_to_nit(self, name)
+               var f = new FileWriter.open(filepath)
                 for s in gen.out do
                         f.write(s)
                         f.write("\n")
@@ -581,38 +631,29 @@ class LRAutomaton
         end
  end
  
-redef class String
-       # escape string used in labels for graphviz
-       fun escape_to_dot: String
-       do
-               return escape_more_to_c("|\{\}")
-       end
-end
-
  private class Generator
         var out = new Array[String]
         fun add(s: String) do out.add(s)
-       fun gen_to_nit(autom: LRAutomaton)
+       fun gen_to_nit(autom: LRAutomaton, name: String)
         do
                 var states = autom.states
                 var gram = autom.grammar
  
-               add "# Parser generated by nitcc"
+               add "# Parser generated by nitcc for the grammar {name}"
+               add "module {name}_parser is no_warning(\"missing-doc\",\"old-init\")"
                 add "import nitcc_runtime"
  
-               add "class MyParser"
+               add "class Parser_{name}"
                 add "\tsuper Parser"
                 add "\tredef fun start_state do return state_{states.first.cname}"
                 add "end"
                 
-               add "redef class Object"
                 for s in states do
-                       add "\tprivate fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
+                       add "private fun state_{s.cname}: LRState{s.cname} do return once new LRState{s.cname}"
                 end
                 for p in gram.prods do
-                       add "\tprivate fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
+                       add "private fun goto_{p.cname}: Goto_{p.cname} do return once new Goto_{p.cname}"
                 end
-               add "end"
  
                 add "redef class NToken"
                 for s in states do
@@ -788,10 +829,10 @@ private class Generator
  
                 if alt.name.has_suffix("+_more") then
                         add "\t\tvar prod = n0"
-                       add "\t\tn0.items.add(n1)"
+                       add "\t\tn0.children.add(n1)"
                 else if alt.name.has_suffix("+_one") then
                         add "\t\tvar prod = new {alt.prod.acname}"
-                       add "\t\tprod.items.add(n0)"
+                       add "\t\tprod.children.add(n0)"
                 else
                         alt.make_codes
                         var cpt = 0
@@ -841,13 +882,13 @@ end
  
  # A state in a LR automaton
  class LRState
-       # name of the automaton (short part from the start)
+       # Name of the automaton (short part from the start)
         var name: String
  
-       # malglen name
+       # Mangled name
         fun cname: String do return name.to_cmangle
  
-       # number
+       # Number
         var number: Int = -1
  
         # Set of all items
@@ -862,7 +903,7 @@ class LRState
         # Ingoing transitions
         var outs = new Array[LRTransition]
  
-       # trans function
+       # Trans function
         fun trans(e: Element): nullable LRState
         do
                 for t in outs do if t.elem == e then return t.to
@@ -877,39 +918,30 @@ class LRState
         # Add and item in the core
         fun add(i: Item): Bool
         do
-               #if items.has(i) then return
-
-               #print "add {i} in {inspect}={self}"
-               var found = false
-               for i2 in items do
-                       if i.alt == i2.alt and i.pos == i2.pos then
-                               if i2.future.has_all(i.future) then return false
-                               i2.future.add_all(i.future)
-                               found = true
+               if items.has(i) then return false
  
-                               break
-                       end
-               end
-               if not found then
-                       items.add(i)
-                       if i.pos > 0 or i.alt.prod.accept then core.add(i)
-               end
+               items.add(i)
+               if i.pos > 0 or i.alt.prod.accept then core.add(i)
                 return true
         end
  
-       # Recusively extends item outside the core
+       # Recursively extends item outside the core
         fun extends(i: Item)
         do
                 var e = i.next
                 if e == null then return
                 if not e isa Production then return
-               var la = i.lookahead
                 for i2 in e.start_state do
-                       i2.future.add_all(la)
                         if add(i2) then extends(i2)
                 end
         end
  
+       # SLR lookahead
+       fun lookahead(i: Item): Set[Item]
+       do
+               return i.alt.prod.afters
+       end
+
         # Set of all reductions
         var reduces = new ArraySet[Alternative]
         # Set of all shifts
@@ -917,44 +949,45 @@ class LRState
         # Set of all goto
         var gotos = new ArraySet[Production]
         # Reduction guarded by tokens
-       var guarded_reduce = new HashMap[Token, Array[Item]]
-       # Shitfs guarded by tokens
-       var guarded_shift = new HashMap[Token, Array[Item]]
+       var guarded_reduce = new HashMap[Token, Set[Item]]
+       # Shifts guarded by tokens
+       var guarded_shift = new HashMap[Token, Set[Item]]
  
         # Does the state need a guard to perform an action?
         fun need_guard: Bool do return not shifts.is_empty or reduces.length > 1
  
-       # Is the state LR0
+       # Is the state LR0?
         fun is_lr0: Bool do return reduces.length <= 1 and shifts.is_empty or reduces.is_empty
  
-       # compute guards and conflicts
+       # Compute guards and conflicts
         fun analysis
         do
+               # Extends the core
                 for i in items.to_a do
                         extends(i)
                 end
  
+               # Collect action and conflicts
                 for i in items do
                         var n = i.next
                         if n == null then
                                 reduces.add(i.alt)
-                               #for t in i.alt.prod.afters do
-                               for t in i.future do
+                               for i2 in lookahead(i) do
+                                       var t = i2.next
+                                       assert t isa Token
                                         t.reduces.add(self)
-                                       if guarded_reduce.has_key(t) then
-                                               guarded_reduce[t].add(i)
-                                       else
-                                               guarded_reduce[t] = [i]
+                                       if not guarded_reduce.has_key(t) then
+                                               guarded_reduce[t] = new ArraySet[Item]
                                         end
+                                       guarded_reduce[t].add(i)
                                 end
                         else if n isa Token then
                                 shifts.add(n)
                                 n.shifts.add(self)
-                               if guarded_shift.has_key(n) then
-                                       guarded_shift[n].add(i)
-                               else
-                                       guarded_shift[n] = [i]
+                               if not guarded_shift.has_key(n) then
+                                       guarded_shift[n] = new ArraySet[Item]
                                 end
+                               guarded_shift[n].add(i)
                         else if n isa Production then
                                 gotos.add(n)
                                 n.gotos.add(self)
@@ -962,16 +995,78 @@ class LRState
                                 abort
                         end
                 end
+               # Token to remove as reduction guard to solve S/R conflicts
+               var removed_reduces = new Array[Token]
                 for t, a in guarded_reduce do
                         if a.length > 1 then
                                 print "REDUCE/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
-                               for i in a do print "\t{i}"
-                       else if guarded_shift.has_key(t) then
-                               print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
-                               for i in a do print "\t{i}"
-                               for i in guarded_shift[t] do print "\t{i}"
+                               for i in a do print "\treduce: {i}"
+                       end
+                       if guarded_shift.has_key(t) then
+                               var ri = a.first
+                               var confs = new Array[Item]
+                               var ress = new Array[String]
+                               var g = guarded_shift[t]
+                               for si in lookahead(ri) do
+                                       if si.next != t then continue
+                                       if not g.has(si) then
+                                               confs.add(si)
+                                               continue
+                                       end
+                                       var p = ri.alt.prod
+                                       var csi: nullable Item = null
+                                       for bsi in back_expand(si) do
+                                               if bsi.alt.prod == p then
+                                                       csi = bsi
+                                                       break
+                                               end
+                                       end
+                                       if csi == null then
+                                               confs.add(si)
+                                               continue
+                                       end
+                                       ress.add "\tshift:  {si}"
+                                       if si != csi then
+                                               ress.add "\tcore shift: {csi}"
+                                       end
+                               end
+                               if confs.is_empty then
+                                       print "Automatic Dangling on state {self.number} {self.name} for token {t}:"
+                                       print "\treduce: {ri}"
+                                       for r in ress do print r
+                                       removed_reduces.add t
+                               else
+                                       print "SHIFT/REDUCE Conflict on state {self.number} {self.name} for token {t}:"
+                                       print "\treduce: {ri}"
+                                       for i in guarded_shift[t] do print "\tshift:  {i}"
+                                       removed_reduces.add t
+                               end
                         end
                 end
+               for t in removed_reduces do
+                       guarded_reduce.keys.remove(t)
+                       t.reduces.remove(self)
+               end
+       end
+
+       # Return `i` and all other items of the state that expands, directly or indirectly, to `i`
+       fun back_expand(i: Item): Set[Item]
+       do
+               var res = new ArraySet[Item]
+               var todo = [i]
+               res.add(i)
+               while not todo.is_empty do
+                       var x = todo.pop
+                       if x.pos > 0 then continue
+                       var p = x.alt.prod
+                       for y in items do
+                               if y.next != p then continue
+                               if res.has(y) then continue
+                               res.add(y)
+                               todo.add(y)
+                       end
+               end
+               return res
         end
  end
  
@@ -979,27 +1074,25 @@ end
  class LRTransition
         # The origin state
         var from: LRState
-       # The testination state
+       # The destination state
         var to: LRState
         # The element labeling the transition
         var elem: Element
  end
  
-# A alternative with a cursor (dot) and possibly a future
+# A alternative with a cursor (dot) before an element
  class Item
         # The alternative
         var alt: Alternative
         # The dot index (0 means before the first element)
         var pos: Int
-       # The possible future
-       var future = new ArraySet[Token]
  
         redef fun ==(o) do return o isa Item and alt == o.alt and pos == o.pos
         redef fun hash do return alt.hash + pos
  
         redef fun to_s
         do
-               var b = new Buffer
+               var b = new FlatBuffer
                 b.append("{alt.prod.name}::{alt.name}=")
                 for i in [0..alt.elems.length[
                 do
@@ -1007,22 +1100,17 @@ class Item
                         b.append(alt.elems[i].to_s)
                 end
                 if pos == alt.elems.length then b.append(".")
-               if not future.is_empty then
-                       b.append("/\{")
-                       b.append(future.join(" "))
-                       b.append("\}")
-               end
                 return b.to_s
         end
  
-       # The element thatr follow the dot, null if the fdot is at the end
+       # The element that follows the dot, null if the dot is at the end
         fun next: nullable Element
         do
                 if pos >= alt.elems.length then return null
                 return alt.elems[pos]
         end
  
-       # SLR loohahead
+       # SLR lookahead
         fun lookahead: Set[Token]
         do
                 var res = new HashSet[Token]
@@ -1033,14 +1121,11 @@ class Item
                                 res.add(e)
                                 break
                         else if e isa Production then
-                               res.add_all(e.firsts)
+                               #res.add_all(e.firsts)
                                 if not e.is_nullable then break
                         end
                         p += 1
                 end
-               if p >= alt.elems.length then
-                       res.add_all(future)
-               end
                 return res
         end
  
@@ -1048,7 +1133,6 @@ class Item
         fun avance: Item
         do
                 var res = new Item(alt, pos+1)
-               res.future.add_all(future)
                 return res
         end
  end