nitc: use is_generated in various tools and generated files
[nit.git] / contrib / nitcc / src / autom.nit
index f286894..aa8c218 100644 (file)
@@ -21,7 +21,7 @@ import grammar
 # A finite automaton
 class Automaton
        # The start state
-       var start: State
+       var start: State is noinit
 
        # State that are accept states
        var accept = new Array[State]
@@ -360,8 +360,8 @@ class Automaton
 
                # Remove their transitions
                for s in bads do
-                       for t in s.ins do t.delete
-                       for t in s.outs do t.delete
+                       for t in s.ins.to_a do t.delete
+                       for t in s.outs.to_a do t.delete
                end
 
                # Keep only the good stuff
@@ -373,14 +373,18 @@ class Automaton
        # REQUIRE: self is a DFA
        fun to_minimal_dfa: Automaton
        do
+               assert_valid
+
                trim
 
+               # Graph of known distinct states.
                var distincts = new HashMap[State, Set[State]]
                for s in states do
                        distincts[s] = new HashSet[State]
                end
 
-               # split accept states
+               # split accept states.
+               # An accept state is distinct with a non accept state.
                for s1 in states do
                        for s2 in states do
                                if distincts[s1].has(s2) then continue
@@ -390,7 +394,7 @@ class Automaton
                                        distincts[s2].add(s1)
                                        continue
                                end
-                               if tags[s1] != tags[s2] then
+                               if tags.get_or_null(s1) != tags.get_or_null(s2) then
                                        distincts[s1].add(s2)
                                        distincts[s2].add(s1)
                                        continue
@@ -398,24 +402,36 @@ class Automaton
                        end
                end
 
+               # Fixed point algorithm.
+               # * Get 2 states s1 and s2 not yet distinguished.
+               # * Get a symbol w.
+               # * If s1.trans(w) and s2.trans(w) are distinguished, then
+               #   distinguish s1 and s2.
                var changed = true
-               var ints = new Array[Int]
+               var ints = new Array[Int] # List of symbols to check
                while changed do
                        changed = false
                        for s1 in states do for s2 in states do
                                if distincts[s1].has(s2) then continue
+
+                               # The transitions use intervals. Therefore, for the states s1 and s2,
+                               # we need to check only the meaningful symbols. They are the `first`
+                               # symbol of each interval and the first one after the interval (`last+1`).
                                ints.clear
+                               # Check only `s1`; `s2` will be checked later when s1 and s2 are switched.
                                for t in s1.outs do
                                        var sym = t.symbol
                                        assert sym != null
                                        ints.add sym.first
                                        var l = sym.last
-                                       if l != null then ints.add l
+                                       if l != null then ints.add l + 1
                                end
+
+                               # Check each symbol
                                for i in ints do
                                        var ds1 = s1.trans(i)
                                        var ds2 = s2.trans(i)
-                                       if ds1 == null and ds2 == null then continue
+                                       if ds1 == ds2 then continue
                                        if ds1 != null and ds2 != null and not distincts[ds1].has(ds2) then continue
                                        distincts[s1].add(s2)
                                        distincts[s2].add(s1)
@@ -425,6 +441,8 @@ class Automaton
                        end
                end
 
+               # We need to unify not-distinguished states.
+               # Just add an epsilon-transition and DFAize the automaton.
                for s1 in states do for s2 in states do
                        if distincts[s1].has(s2) then continue
                        s1.add_trans(s2, null)
@@ -433,6 +451,21 @@ class Automaton
                return to_dfa
        end
 
+       # Assert that `self` is a valid automaton or abort
+       fun assert_valid
+       do
+               assert states.has(start)
+               assert states.has_all(accept)
+               for s in states do
+                       for t in s.outs do assert states.has(t.to)
+                       for t in s.ins do assert states.has(t.from)
+               end
+               assert states.has_all(tags.keys)
+               for t, ss in retrotags do
+                       assert states.has_all(ss)
+               end
+       end
+
        # Produce a graphvis file for the automaton
        fun to_dot(filepath: String)
        do
@@ -498,6 +531,8 @@ class Automaton
        # note: the DFA is not minimized.
        fun to_dfa: Automaton
        do
+               assert_valid
+
                trim
 
                var dfa = new Automaton.empty
@@ -656,7 +691,7 @@ private class DFAGenerator
                end
 
                add "# Lexer generated by nitcc for the grammar {name}\n"
-               add "module {name}_lexer is no_warning \"missing-doc\"\n"
+               add "module {name}_lexer is generated, no_warning \"missing-doc\"\n"
                add("import nitcc_runtime\n")
 
                var p = parser
@@ -667,12 +702,10 @@ private class DFAGenerator
                add("\tredef fun start_state do return dfastate_{names[automaton.start]}\n")
                add("end\n")
 
-               add("redef class Object\n")
                for s in automaton.states do
                        var n = names[s]
-                       add("\tprivate fun dfastate_{n}: DFAState{n} do return once new DFAState{n}\n")
+                       add("private fun dfastate_{n}: DFAState{n} do return once new DFAState{n}\n")
                end
-               add("end\n")
 
                add("class MyNToken\n")
                add("\tsuper NToken\n")
@@ -690,17 +723,28 @@ private class DFAGenerator
                                        token = null
                                end
                                add("\tredef fun is_accept do return true\n")
-                               add("\tredef fun make_token(position, text) do\n")
+                               var is_ignored = false
                                if token != null and token.name == "Ignored" then
+                                       is_ignored = true
+                                       add("\tredef fun is_ignored do return true\n")
+                               end
+                               add("\tredef fun make_token(position, source) do\n")
+                               if is_ignored then
                                        add("\t\treturn null\n")
                                else
                                        if token == null then
                                                add("\t\tvar t = new MyNToken\n")
+                                               add("\t\tt.text = position.extract(source)\n")
                                        else
                                                add("\t\tvar t = new {token.cname}\n")
+                                               var ttext = token.text
+                                               if ttext == null then
+                                                       add("\t\tt.text = position.extract(source)\n")
+                                               else
+                                                       add("\t\tt.text = \"{ttext.escape_to_nit}\"\n")
+                                               end
                                        end
                                        add("\t\tt.position = position\n")
-                                       add("\t\tt.text = text\n")
                                        add("\t\treturn t\n")
                                end
                                add("\tend\n")
@@ -716,23 +760,56 @@ private class DFAGenerator
                        else
                                add("\tredef fun trans(char) do\n")
 
-                               add("\t\tvar c = char.ascii\n")
-                               var haslast = false
+                               # Collect the sequence of tests in the dispatch sequence
+                               # The point here is that for each transition, there is a first and a last
+                               # So holes have to be identified
+                               var dispatch = new HashMap[Int, nullable State]
+                               var haslast: nullable State = null
+
                                var last = -1
                                for sym, next in trans do
-                                       assert not haslast
+                                       assert haslast == null
                                        assert sym.first > last
-                                       if sym.first > last + 1 then add("\t\tif c <= {sym.first-1} then return null\n")
+                                       if sym.first > last + 1 then
+                                               dispatch[sym.first-1] = null
+                                       end
                                        var l = sym.last
                                        if l == null then
-                                               add("\t\treturn dfastate_{names[next]}\n")
-                                               haslast= true
+                                               haslast = next
                                        else
-                                               add("\t\tif c <= {l} then return dfastate_{names[next]}\n")
+                                               dispatch[l] = next
                                                last = l
                                        end
                                end
-                               if not haslast then add("\t\treturn null\n")
+
+                               if dispatch.is_empty and haslast != null then
+                                       # Only one transition that accepts everything (quite rare)
+                               else
+                                       # We need to check
+                                       add("\t\tvar c = char.code_point\n")
+                               end
+
+                               # Generate a sequence of `if` for the dispatch
+                               if haslast != null and last >= 0 then
+                                       # Special case: handle up-bound first if not an error
+                                       add("\t\tif c > {last} then return dfastate_{names[haslast]}\n")
+                                       # previous become the new last case
+                                       haslast = dispatch[last]
+                                       dispatch.keys.remove(last)
+                               end
+                               for c, next in dispatch do
+                                       if next == null then
+                                               add("\t\tif c <= {c} then return null\n")
+                                       else
+                                               add("\t\tif c <= {c} then return dfastate_{names[next]}\n")
+                                       end
+                               end
+                               if haslast == null then
+                                       add("\t\treturn null\n")
+                               else
+                                       add("\t\treturn dfastate_{names[haslast]}\n")
+                               end
+
                                add("\tend\n")
                        end
                        add("end\n")
@@ -742,6 +819,11 @@ private class DFAGenerator
        end
 end
 
+redef class Token
+       # The associated text (if any, ie defined in the parser part)
+       var text: nullable String is noautoinit, writable
+end
+
 # A state in a finite automaton
 class State
        # Outgoing transitions
@@ -793,14 +875,14 @@ class TSymbol
                if f <= 32 then
                        res = "#{f}"
                else
-                       res = f.ascii.to_s
+                       res = f.code_point.to_s
                end
                var l = last
                if f == l then return res
                res += " .. "
                if l == null then return res
                if l <= 32 or l >= 127 then return res + "#{l}"
-               return res + l.ascii.to_s
+               return res + l.code_point.to_s
        end
 end