nitcc: generate a faster DFA::trans sequence of `if`
[nit.git] / contrib / nitcc / src / autom.nit
index d143684..84df722 100644 (file)
@@ -667,12 +667,10 @@ private class DFAGenerator
                add("\tredef fun start_state do return dfastate_{names[automaton.start]}\n")
                add("end\n")
 
-               add("redef class Object\n")
                for s in automaton.states do
                        var n = names[s]
-                       add("\tprivate fun dfastate_{n}: DFAState{n} do return once new DFAState{n}\n")
+                       add("private fun dfastate_{n}: DFAState{n} do return once new DFAState{n}\n")
                end
-               add("end\n")
 
                add("class MyNToken\n")
                add("\tsuper NToken\n")
@@ -690,17 +688,28 @@ private class DFAGenerator
                                        token = null
                                end
                                add("\tredef fun is_accept do return true\n")
-                               add("\tredef fun make_token(position, text) do\n")
+                               var is_ignored = false
                                if token != null and token.name == "Ignored" then
+                                       is_ignored = true
+                                       add("\tredef fun is_ignored do return true\n")
+                               end
+                               add("\tredef fun make_token(position, source) do\n")
+                               if is_ignored then
                                        add("\t\treturn null\n")
                                else
                                        if token == null then
                                                add("\t\tvar t = new MyNToken\n")
+                                               add("\t\tt.text = position.extract(source)\n")
                                        else
                                                add("\t\tvar t = new {token.cname}\n")
+                                               var ttext = token.text
+                                               if ttext == null then
+                                                       add("\t\tt.text = position.extract(source)\n")
+                                               else
+                                                       add("\t\tt.text = \"{ttext.escape_to_nit}\"\n")
+                                               end
                                        end
                                        add("\t\tt.position = position\n")
-                                       add("\t\tt.text = text\n")
                                        add("\t\treturn t\n")
                                end
                                add("\tend\n")
@@ -716,23 +725,51 @@ private class DFAGenerator
                        else
                                add("\tredef fun trans(char) do\n")
 
-                               add("\t\tvar c = char.ascii\n")
-                               var haslast = false
+                               add("\t\tvar c = char.code_point\n")
+
+                               # Collect the sequence of tests in the dispatch sequence
+                               # The point here is that for each transition, there is a first and a last
+                               # So holes hare to be identified
+                               var dispatch = new HashMap[Int, nullable State]
+                               var haslast: nullable State = null
+
                                var last = -1
                                for sym, next in trans do
-                                       assert not haslast
+                                       assert haslast == null
                                        assert sym.first > last
-                                       if sym.first > last + 1 then add("\t\tif c <= {sym.first-1} then return null\n")
+                                       if sym.first > last + 1 then
+                                               dispatch[sym.first-1] = null
+                                       end
                                        var l = sym.last
                                        if l == null then
-                                               add("\t\treturn dfastate_{names[next]}\n")
-                                               haslast= true
+                                               haslast = next
                                        else
-                                               add("\t\tif c <= {l} then return dfastate_{names[next]}\n")
+                                               dispatch[l] = next
                                                last = l
                                        end
                                end
-                               if not haslast then add("\t\treturn null\n")
+
+                               # Generate a sequence of `if` for the dispatch
+                               if haslast != null then
+                                       # Special case: handle up-bound first if not an error
+                                       add("\t\tif c > {last} then return dfastate_{names[haslast]}\n")
+                                       # previous become the new last case
+                                       haslast = dispatch[last]
+                                       dispatch.keys.remove(last)
+                               end
+                               for c, next in dispatch do
+                                       if next == null then
+                                               add("\t\tif c <= {c} then return null\n")
+                                       else
+                                               add("\t\tif c <= {c} then return dfastate_{names[next]}\n")
+                                       end
+                               end
+                               if haslast == null then
+                                       add("\t\treturn null\n")
+                               else
+                                       add("\t\treturn dfastate_{names[haslast]}\n")
+                               end
+
                                add("\tend\n")
                        end
                        add("end\n")
@@ -742,6 +779,11 @@ private class DFAGenerator
        end
 end
 
+redef class Token
+       # The associated text (if any, ie defined in the parser part)
+       var text: nullable String is noautoinit, writable
+end
+
 # A state in a finite automaton
 class State
        # Outgoing transitions
@@ -793,14 +835,14 @@ class TSymbol
                if f <= 32 then
                        res = "#{f}"
                else
-                       res = f.ascii.to_s
+                       res = f.code_point.to_s
                end
                var l = last
                if f == l then return res
                res += " .. "
                if l == null then return res
                if l <= 32 or l >= 127 then return res + "#{l}"
-               return res + l.ascii.to_s
+               return res + l.code_point.to_s
        end
 end