nitcc: DFAStates are private
[nit.git] / contrib / nitcc / src / autom.nit
index 86befbb..c20f3df 100644 (file)
@@ -37,6 +37,12 @@ class Automaton
        # use `add_tag` to update
        var retrotags = new HashMap[Token, Set[State]]
 
+       # Tag all accept states
+       fun tag_accept(t: Token)
+       do
+               for s in accept do add_tag(s, t)
+       end
+
        # Add a token to a state
        fun add_tag(s: State, t: Token)
        do
@@ -55,6 +61,21 @@ class Automaton
                else
                        retrotags[t].add s
                end
+
+               assert tags[s].has(t)
+               assert retrotags[t].has(s)
+       end
+
+       # Remove all occurences of a tag in an automaton
+       fun clear_tag(t: Token)
+       do
+               if not retrotags.has_key(t) then return
+               for s in retrotags[t] do
+                       if not tags.has_key(s) then continue
+                       tags[s].remove(t)
+                       if tags[s].is_empty then tags.keys.remove(s)
+               end
+               retrotags.keys.remove(t)
        end
 
        # Remove tokens from conflicting state according the the inclusion of language
@@ -105,7 +126,8 @@ class Automaton
        do
                var s = new State
                var a = new State
-               s.add_trans(a, symbol)
+               var sym = new TSymbol(symbol, symbol)
+               s.add_trans(a, sym)
                start = s
                accept.add a
                states.add s
@@ -114,13 +136,12 @@ class Automaton
 
        # Initialize a new automation for the language that accepts only a range of symbols
        # Two state, the second is accept, one transition for `from` to `to`
-       init cla(from, to: Int)
+       init cla(first: Int, last: nullable Int)
        do
                var s = new State
                var a = new State
-               for symbol in [from..to] do
-                       s.add_trans(a, symbol)
-               end
+               var sym = new TSymbol(first, last)
+               s.add_trans(a, sym)
                start = s
                accept.add a
                states.add s
@@ -163,6 +184,16 @@ class Automaton
                states.add_all other.states
        end
 
+       # `self` absorbs all states, transisions, tags, and acceptations of `other`
+       # An epsilon transition is added between `self.start` and `other.start`
+       fun absorb(other: Automaton)
+       do
+               states.add_all other.states
+               start.add_trans(other.start, null)
+               for s, ts in other.tags do for t in ts do add_tag(s, t)
+               accept.add_all other.accept
+       end
+
        # Do the Kleene closure (*) on self
        fun close
        do
@@ -187,11 +218,36 @@ class Automaton
        end
 
        # Remove all transitions on a given symbol
-       fun minus_sym(symbol: Int)
+       fun minus_sym(symbol: TSymbol)
        do
+               var f = symbol.first
+               var l = symbol.last
                for s in states do
                        for t in s.outs.to_a do
-                               if t.symbol == symbol then t.delete
+                               if t.symbol == null then continue
+
+                               # Check overlaps
+                               var tf = t.symbol.first
+                               var tl = t.symbol.last
+                               if l != null and tf > l then continue
+                               if tl != null and f > tl then continue
+
+                               t.delete
+
+                               # Add left and right part if non empty
+                               if tf < f then
+                                       var sym = new TSymbol(tf,f-1)
+                                       s.add_trans(t.to, sym)
+                               end
+                               if l != null then
+                                       if tl == null then
+                                               var sym = new TSymbol(l+1, null)
+                                               s.add_trans(t.to, sym)
+                                       else if tl > l then
+                                               var sym = new TSymbol(l+1, tl)
+                                               s.add_trans(t.to, sym)
+                                       end
+                               end
                        end
                end
        end
@@ -222,6 +278,97 @@ class Automaton
                return res
        end
 
+       # Reverse an automaton in place
+       fun reverse
+       do
+               for s in states do
+                       var tmp = s.ins
+                       s.ins = s.outs
+                       s.outs = tmp
+                       for t in s.outs do
+                               var tmp2 = t.from
+                               t.from = t.to
+                               t.to = tmp2
+                       end
+               end
+               var st = start
+               if accept.length == 1 then
+                       start = accept.first
+               else
+                       var st2 = new State
+                       start = st2
+                       states.add(st2)
+
+                       for s in accept do
+                               st2.add_trans(s, null)
+                       end
+               end
+               accept.clear
+               accept.add(st)
+       end
+
+       # Generate a minimal DFA
+       # REQUIRE: self is a DFA
+       fun to_minimal_dfa: Automaton
+       do
+               var distincts = new HashMap[State, Set[State]]
+               for s in states do
+                       distincts[s] = new HashSet[State]
+               end
+
+               # split accept states
+               for s1 in states do
+                       for s2 in states do
+                               if distincts[s1].has(s2) then continue
+                               if not accept.has(s1) then continue
+                               if not accept.has(s2) then
+                                       distincts[s1].add(s2)
+                                       distincts[s2].add(s1)
+                                       continue
+                               end
+                               if tags[s1] != tags[s2] then
+                                       distincts[s1].add(s2)
+                                       distincts[s2].add(s1)
+                                       continue
+                               end
+                       end
+               end
+
+               var changed = true
+               var ints = new Array[Int]
+               while changed do
+                       changed = false
+                       for s1 in states do for s2 in states do
+                               if distincts[s1].has(s2) then continue
+                               ints.clear
+                               for t in s1.outs do
+                                       var sym = t.symbol
+                                       assert sym != null
+                                       ints.add sym.first
+                                       var l = sym.last
+                                       if l != null then ints.add l
+                               end
+                               for i in ints do
+                                       var ds1 = s1.trans(i)
+                                       var ds2 = s2.trans(i)
+                                       if ds1 == null and ds2 == null then continue
+                                       if ds1 != null and ds2 != null and not distincts[ds1].has(ds2) then continue
+                                       distincts[s1].add(s2)
+                                       distincts[s2].add(s1)
+                                       changed = true
+                                       break
+                               end
+                       end
+               end
+
+               for s1 in states do for s2 in states do
+                       if distincts[s1].has(s2) then continue
+                       s1.add_trans(s2, null)
+               end
+
+               return to_dfa
+       end
+
        # Produce a graphvis file for the automaton
        fun to_dot(filepath: String)
        do
@@ -229,20 +376,22 @@ class Automaton
                 f.write("digraph g \{\n")
 
                for s in states do
-                       f.write("s{s.object_id}[")
+                       f.write("s{s.object_id}[shape=oval")
                        #f.write("label=\"\",")
                        if accept.has(s) then
-                               f.write("color=blue")
-                               if tags.has_key(s) then
-                                       f.write(",label=\"")
-                                       for token in tags[s] do
-                                               f.write("{token.name.escape_to_c}\\n")
-                                       end
-                                       f.write("\"")
+                               f.write(",color=blue")
+                       end
+                       if tags.has_key(s) then
+                               f.write(",label=\"")
+                               for token in tags[s] do
+                                       f.write("{token.name.escape_to_c}\\n")
                                end
+                               f.write("\"")
+                       else
+                               f.write(",label=\"\"")
                        end
                        f.write("];\n")
-                       var outs = new HashMap[State, Array[nullable Int]]
+                       var outs = new HashMap[State, Array[nullable TSymbol]]
                        for t in s.outs do
                                var a
                                var s2 = t.to
@@ -250,35 +399,19 @@ class Automaton
                                if outs.has_key(s2) then
                                        a = outs[s2]
                                else
-                                       a = new Array[nullable Int]
+                                       a = new Array[nullable TSymbol]
                                        outs[s2] = a
                                end
                                a.add(c)
                        end
                        for s2, a in outs do
                                var labe = ""
-                               var lastc: nullable Int = null
-                               var elip = 0
-                               a.add(-1)
                                for c in a do
+                                       if not labe.is_empty then labe += "\n"
                                        if c == null then
-                                               if not labe.is_empty then labe += "\n"
                                                labe += "''"
-                                       else if lastc == c - 1 then
-                                               elip += 1
-                                               lastc = c
                                        else
-                                               if elip == 1 then
-                                                       assert lastc != null
-                                                       labe += "\n{sym_to_s(lastc)}"
-                                               else if elip > 1 then
-                                                       assert lastc != null
-                                                       labe += " .. {sym_to_s(lastc)}"
-                                               end
-                                               if c == -1 then break
-                                               if not labe.is_empty then labe += "\n"
-                                               labe += sym_to_s(c)
-                                               lastc = c
+                                               labe += c.to_s
                                        end
                                end
                                f.write("s{s.object_id}->s{s2.object_id} [label=\"{labe.escape_to_c}\"];\n")
@@ -290,19 +423,6 @@ class Automaton
                f.close
        end
 
-       # Transform a symbol to a string
-       # Used by `to_dot`
-       private fun sym_to_s(symbol: nullable Int): String
-       do
-               if symbol == null then
-                       return "''"
-               else if symbol <= 32 then
-                       return "#{symbol}"
-               else
-                       return symbol.ascii.to_s
-               end
-       end
-
        # Transform a NFA to a DFA
        # note: the DFA is not miminized
        fun to_dfa: Automaton
@@ -310,7 +430,7 @@ class Automaton
                var dfa = new Automaton.empty
                var n2d = new ArrayMap[Set[State], State]
                var seen = new ArraySet[Set[State]] 
-               var ts = new HashSet[Int]
+               var alphabet = new HashSet[Int]
                var st = eclosure([start])
                var todo = [st]
                n2d[st] = dfa.start
@@ -319,8 +439,18 @@ class Automaton
                        var nfa_states = todo.pop
                        #print "* work on {nfa_states.inspect}={nfa_states} (remains {todo.length}/{seen.length})"
                        var dfa_state = n2d[nfa_states]
-                       ts.clear
+                       alphabet.clear
                        for s in nfa_states do
+                               # Collect important values to build the alphabet
+                               for t in s.outs do
+                                       var sym = t.symbol
+                                       if sym == null then continue
+                                       alphabet.add(sym.first)
+                                       var l = sym.last
+                                       if l != null then alphabet.add(l)
+                               end
+
+                               # Mark accept and tags
                                if accept.has(s) then
                                        if tags.has_key(s) then
                                                for t in tags[s] do
@@ -329,25 +459,49 @@ class Automaton
                                        end
                                        dfa.accept.add(dfa_state)
                                end
-                               for t in s.outs do
-                                       var sym = t.symbol
-                                       if sym == null or ts.has(sym) then continue
-                                       ts.add(sym)
-                                       var nfa_dest = eclosure(trans(nfa_states, sym))
-                                       #print "{nfa_states} -> {sym} -> {nfa_dest}"
-                                       var dfa_dest
-                                       if seen.has(nfa_dest) then
-                                               #print "* reuse {nfa_dest.inspect}={nfa_dest}"
-                                               dfa_dest = n2d[nfa_dest]
-                                       else
-                                               #print "* new {nfa_dest.inspect}={nfa_dest}"
-                                               dfa_dest = new State
-                                               dfa.states.add(dfa_dest)
-                                               n2d[nfa_dest] = dfa_dest
-                                               todo.add(nfa_dest)
-                                               seen.add(nfa_dest)
-                                       end
-                                       dfa_state.add_trans(dfa_dest, sym)
+                       end
+
+                       # From the important values, build a sequence of TSymbols
+                       var a = alphabet.to_a
+                       (new ComparableSorter[Int]).sort(a)
+                       var tsyms = new Array[TSymbol]
+                       var last = 0
+                       for i in a do
+                               if last > 0 and last <= i-1 then
+                                       tsyms.add(new TSymbol(last,i-1))
+                               end
+                               tsyms.add(new TSymbol(i,i))
+                               last = i+1
+                       end
+                       if last > 0 then
+                               tsyms.add(new TSymbol(last,null))
+                       end
+                       #print "Alphabet: {tsyms.join(", ")}"
+
+                       var lastst: nullable Transition = null
+                       for sym in tsyms do
+                               var nfa_dest = eclosure(trans(nfa_states, sym.first))
+                               if nfa_dest.is_empty then
+                                       lastst = null
+                                       continue
+                               end
+                               #print "{nfa_states} -> {sym} -> {nfa_dest}"
+                               var dfa_dest
+                               if seen.has(nfa_dest) then
+                                       #print "* reuse {nfa_dest.inspect}={nfa_dest}"
+                                       dfa_dest = n2d[nfa_dest]
+                               else
+                                       #print "* new {nfa_dest.inspect}={nfa_dest}"
+                                       dfa_dest = new State
+                                       dfa.states.add(dfa_dest)
+                                       n2d[nfa_dest] = dfa_dest
+                                       todo.add(nfa_dest)
+                                       seen.add(nfa_dest)
+                               end
+                               if lastst != null and lastst.to == dfa_dest then
+                                       lastst.symbol.last = sym.last
+                               else
+                                       lastst = dfa_state.add_trans(dfa_dest, sym)
                                end
                        end
                end
@@ -381,7 +535,11 @@ class Automaton
                var res = new ArraySet[State]
                for s in states do
                        for t in s.outs do
-                               if t.symbol != symbol then continue
+                               var sym = t.symbol
+                               if sym == null then continue
+                               if sym.first > symbol then continue
+                               var l = sym.last
+                               if l != null and l < symbol then continue
                                var to = t.to
                                if res.has(to) then continue 
                                res.add(to)
@@ -451,7 +609,7 @@ private class DFAGenerator
 
                for s in automaton.states do
                        var  n = names[s]
-                       add("class DFAState{n}\n")
+                       add("private class DFAState{n}\n")
                        add("\tsuper DFAState\n")
                        if automaton.accept.has(s) then
                                var token
@@ -476,7 +634,7 @@ private class DFAGenerator
                                end
                                add("\tend\n")
                        end
-                       var trans = new ArrayMap[Int, State]
+                       var trans = new ArrayMap[TSymbol, State]
                        for t in s.outs do
                                var sym = t.symbol
                                assert sym != null
@@ -484,19 +642,26 @@ private class DFAGenerator
                        end
                        if trans.is_empty then
                                # Do nothing, inherit the trans
-                       else if trans.length == 1 then
-                               var sym = trans.keys.first
-                               var next = trans.values.first
-                               add("\tredef fun trans(c) do\n")
-                               add("\t\tif c.ascii == {sym} then return dfastate_{names[next]}\n")
-                               add("\t\treturn null\n")
-                               add("\tend\n")
                        else
-                               add("\tredef fun trans(c) do\n")
+                               add("\tredef fun trans(char) do\n")
+
+                               add("\t\tvar c = char.ascii\n")
+                               var haslast = false
+                               var last = -1
                                for sym, next in trans do
-                                       add("\t\tif c.ascii == {sym} then return dfastate_{names[next]}\n")
+                                       assert not haslast
+                                       assert sym.first > last
+                                       if sym.first > last + 1 then add("\t\tif c <= {sym.first-1} then return null\n")
+                                       var l = sym.last
+                                       if l == null then
+                                               add("\t\treturn dfastate_{names[next]}\n")
+                                               haslast= true
+                                       else
+                                               add("\t\tif c <= {l} then return dfastate_{names[next]}\n")
+                                               last = l
+                                       end
                                end
-                               add("\t\treturn null\n")
+                               if not haslast then add("\t\treturn null\n")
                                add("\tend\n")
                        end
                        add("end\n")
@@ -515,13 +680,50 @@ class State
        var ins = new Array[Transition]
 
        # Add a transitions to `to` on `symbol` (null means epsilon)
-       fun add_trans(to: State, symbol: nullable Int): Transition
+       fun add_trans(to: State, symbol: nullable TSymbol): Transition
        do
                var t = new Transition(self, to, symbol)
                outs.add(t)
                to.ins.add(t)
                return t
        end
+
+       fun trans(i: Int): nullable State
+       do
+               for t in outs do
+                       var sym = t.symbol
+                       assert sym != null
+                       var f = sym.first
+                       var l = sym.last
+                       if i < f then continue
+                       if l != null and i > l then continue
+                       return t.to
+               end
+               return null
+       end
+end
+
+# A range of symbols on a transition
+class TSymbol
+       var first: Int
+       var last: nullable Int
+
+       redef fun to_s
+       do
+               var res
+               var f = first
+               if f <= 32 then
+                       res = "#{f}"
+               else
+                       res = f.ascii.to_s
+               end
+               var l = last
+               if f == l then return res
+               res += " .. "
+               if l == null then return res
+               if l <= 32 or l >= 127 then return res + "#{l}"
+               return res + l.ascii.to_s
+       end
 end
 
 # A transition in a finite automaton
@@ -531,7 +733,7 @@ class Transition
        # The destination state
        var to: State
        # The symbol on the transition (null means epsilon)
-       var symbol: nullable Int
+       var symbol: nullable TSymbol
 
        # Remove the transition from the automaton
        # Detash from `from` and `to`