Merge: contrib/re_parser
[nit.git] / contrib / nitcc / src / autom.nit
index 84df722..3248e17 100644 (file)
@@ -252,8 +252,8 @@ class Automaton
                                if t.symbol == null then continue
 
                                # Check overlaps
-                               var tf = t.symbol.first
-                               var tl = t.symbol.last
+                               var tf = t.symbol.as(not null).first
+                               var tl = t.symbol.as(not null).last
                                if l != null and tf > l then continue
                                if tl != null and f > tl then continue
 
@@ -360,8 +360,8 @@ class Automaton
 
                # Remove their transitions
                for s in bads do
-                       for t in s.ins do t.delete
-                       for t in s.outs do t.delete
+                       for t in s.ins.to_a do t.delete
+                       for t in s.outs.to_a do t.delete
                end
 
                # Keep only the good stuff
@@ -373,14 +373,18 @@ class Automaton
        # REQUIRE: self is a DFA
        fun to_minimal_dfa: Automaton
        do
+               assert_valid
+
                trim
 
+               # Graph of known distinct states.
                var distincts = new HashMap[State, Set[State]]
                for s in states do
                        distincts[s] = new HashSet[State]
                end
 
-               # split accept states
+               # split accept states.
+               # An accept state is distinct with a non accept state.
                for s1 in states do
                        for s2 in states do
                                if distincts[s1].has(s2) then continue
@@ -390,7 +394,7 @@ class Automaton
                                        distincts[s2].add(s1)
                                        continue
                                end
-                               if tags[s1] != tags[s2] then
+                               if tags.get_or_null(s1) != tags.get_or_null(s2) then
                                        distincts[s1].add(s2)
                                        distincts[s2].add(s1)
                                        continue
@@ -398,24 +402,36 @@ class Automaton
                        end
                end
 
+               # Fixed point algorithm.
+               # * Get 2 states s1 and s2 not yet distinguished.
+               # * Get a symbol w.
+               # * If s1.trans(w) and s2.trans(w) are distinguished, then
+               #   distinguish s1 and s2.
                var changed = true
-               var ints = new Array[Int]
+               var ints = new Array[Int] # List of symbols to check
                while changed do
                        changed = false
                        for s1 in states do for s2 in states do
                                if distincts[s1].has(s2) then continue
+
+                               # The transitions use intervals. Therefore, for the states s1 and s2,
+                               # we need to check only the meaningful symbols. They are the `first`
+                               # symbol of each interval and the first one after the interval (`last+1`).
                                ints.clear
+                               # Check only `s1`; `s2` will be checked later when s1 and s2 are switched.
                                for t in s1.outs do
                                        var sym = t.symbol
                                        assert sym != null
                                        ints.add sym.first
                                        var l = sym.last
-                                       if l != null then ints.add l
+                                       if l != null then ints.add l + 1
                                end
+
+                               # Check each symbol
                                for i in ints do
                                        var ds1 = s1.trans(i)
                                        var ds2 = s2.trans(i)
-                                       if ds1 == null and ds2 == null then continue
+                                       if ds1 == ds2 then continue
                                        if ds1 != null and ds2 != null and not distincts[ds1].has(ds2) then continue
                                        distincts[s1].add(s2)
                                        distincts[s2].add(s1)
@@ -425,6 +441,8 @@ class Automaton
                        end
                end
 
+               # We need to unify not-distinguished states.
+               # Just add an epsilon-transition and DFAize the automaton.
                for s1 in states do for s2 in states do
                        if distincts[s1].has(s2) then continue
                        s1.add_trans(s2, null)
@@ -433,8 +451,25 @@ class Automaton
                return to_dfa
        end
 
-       # Produce a graphvis file for the automaton
-       fun to_dot(filepath: String)
+       # Assert that `self` is a valid automaton or abort
+       fun assert_valid
+       do
+               assert states.has(start)
+               assert states.has_all(accept)
+               for s in states do
+                       for t in s.outs do assert states.has(t.to)
+                       for t in s.ins do assert states.has(t.from)
+               end
+               assert states.has_all(tags.keys)
+               for t, ss in retrotags do
+                       assert states.has_all(ss)
+               end
+       end
+
+       # Produce a graphviz string from the automatom
+       #
+       # Set `merge_transitions = false` to generate one edge by transition (default true).
+       fun to_dot(merge_transitions: nullable Bool): Writable
        do
                var names = new HashMap[State, String]
                var ni = 0
@@ -443,25 +478,27 @@ class Automaton
                        ni += 1
                end
 
-               var f = new FileWriter.open(filepath)
-                f.write("digraph g \{\n")
+               var f = new Buffer
+               f.append("digraph g \{\n")
+               f.append("rankdir=LR;")
 
+               var state_nb = 0
                for s in states do
-                       f.write("s{names[s]}[shape=oval")
+                       f.append("s{names[s]}[shape=circle")
                        #f.write("label=\"\",")
                        if accept.has(s) then
-                               f.write(",color=blue")
+                               f.append(",shape=doublecircle")
                        end
                        if tags.has_key(s) then
-                               f.write(",label=\"")
+                               f.append(",label=\"")
                                for token in tags[s] do
-                                       f.write("{token.name.escape_to_c}\\n")
+                                       f.append("{token.name.escape_to_dot}\\n")
                                end
-                               f.write("\"")
+                               f.append("\"")
                        else
-                               f.write(",label=\"\"")
+                               f.append(",label=\"{state_nb}\"")
                        end
-                       f.write("];\n")
+                       f.append("];\n")
                        var outs = new HashMap[State, Array[nullable TSymbol]]
                        for t in s.outs do
                                var a
@@ -478,31 +515,39 @@ class Automaton
                        for s2, a in outs do
                                var labe = ""
                                for c in a do
+                                       if merge_transitions == false then labe = ""
                                        if not labe.is_empty then labe += "\n"
                                        if c == null then
-                                               labe += "''"
+                                               labe += "ε"
                                        else
                                                labe += c.to_s
                                        end
+                                       if merge_transitions == false then
+                                               f.append("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_dot}\"];\n")
+                                       end
+                               end
+                               if merge_transitions == null or merge_transitions == true then
+                                       f.append("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_c}\"];\n")
                                end
-                               f.write("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_c}\"];\n")
                        end
+                       state_nb += 1
                end
-               f.write("empty->s{names[start]}; empty[label=\"\",shape=none];\n")
-
-                f.write("\}\n")
-               f.close
+               f.append("empty->s{names[start]}; empty[label=\"\",shape=none];\n")
+               f.append("\}\n")
+               return f
        end
 
        # Transform a NFA to a DFA.
        # note: the DFA is not minimized.
        fun to_dfa: Automaton
        do
+               assert_valid
+
                trim
 
                var dfa = new Automaton.empty
                var n2d = new ArrayMap[Set[State], State]
-               var seen = new ArraySet[Set[State]] 
+               var seen = new ArraySet[Set[State]]
                var alphabet = new HashSet[Int]
                var st = eclosure([start])
                var todo = [st]
@@ -572,7 +617,7 @@ class Automaton
                                        seen.add(nfa_dest)
                                end
                                if lastst != null and lastst.to == dfa_dest then
-                                       lastst.symbol.last = sym.last
+                                       lastst.symbol.as(not null).last = sym.last
                                else
                                        lastst = dfa_state.add_trans(dfa_dest, sym)
                                end
@@ -593,7 +638,7 @@ class Automaton
                        for t in s.outs do
                                if t.symbol != null then continue
                                var to = t.to
-                               if res.has(to) then continue 
+                               if res.has(to) then continue
                                res.add(to)
                                todo.add(to)
                        end
@@ -614,7 +659,7 @@ class Automaton
                                var l = sym.last
                                if l != null and l < symbol then continue
                                var to = t.to
-                               if res.has(to) then continue 
+                               if res.has(to) then continue
                                res.add(to)
                        end
                end
@@ -656,7 +701,7 @@ private class DFAGenerator
                end
 
                add "# Lexer generated by nitcc for the grammar {name}\n"
-               add "module {name}_lexer is no_warning \"missing-doc\"\n"
+               add "module {name}_lexer is generated, no_warning \"missing-doc\"\n"
                add("import nitcc_runtime\n")
 
                var p = parser
@@ -725,11 +770,9 @@ private class DFAGenerator
                        else
                                add("\tredef fun trans(char) do\n")
 
-                               add("\t\tvar c = char.code_point\n")
-
                                # Collect the sequence of tests in the dispatch sequence
                                # The point here is that for each transition, there is a first and a last
-                               # So holes hare to be identified
+                               # So holes have to be identified
                                var dispatch = new HashMap[Int, nullable State]
                                var haslast: nullable State = null
 
@@ -749,8 +792,15 @@ private class DFAGenerator
                                        end
                                end
 
+                               if dispatch.is_empty and haslast != null then
+                                       # Only one transition that accepts everything (quite rare)
+                               else
+                                       # We need to check
+                                       add("\t\tvar c = char.code_point\n")
+                               end
+
                                # Generate a sequence of `if` for the dispatch
-                               if haslast != null then
+                               if haslast != null and last >= 0 then
                                        # Special case: handle up-bound first if not an error
                                        add("\t\tif c > {last} then return dfastate_{names[haslast]}\n")
                                        # previous become the new last case