X-Git-Url: http://nitlanguage.org diff --git a/contrib/nitcc/src/autom.nit b/contrib/nitcc/src/autom.nit index 6971e7c..3248e17 100644 --- a/contrib/nitcc/src/autom.nit +++ b/contrib/nitcc/src/autom.nit @@ -252,8 +252,8 @@ class Automaton if t.symbol == null then continue # Check overlaps - var tf = t.symbol.first - var tl = t.symbol.last + var tf = t.symbol.as(not null).first + var tl = t.symbol.as(not null).last if l != null and tf > l then continue if tl != null and f > tl then continue @@ -360,8 +360,8 @@ class Automaton # Remove their transitions for s in bads do - for t in s.ins do t.delete - for t in s.outs do t.delete + for t in s.ins.to_a do t.delete + for t in s.outs.to_a do t.delete end # Keep only the good stuff @@ -373,14 +373,18 @@ class Automaton # REQUIRE: self is a DFA fun to_minimal_dfa: Automaton do + assert_valid + trim + # Graph of known distinct states. var distincts = new HashMap[State, Set[State]] for s in states do distincts[s] = new HashSet[State] end - # split accept states + # split accept states. + # An accept state is distinct with a non accept state. for s1 in states do for s2 in states do if distincts[s1].has(s2) then continue @@ -390,7 +394,7 @@ class Automaton distincts[s2].add(s1) continue end - if tags[s1] != tags[s2] then + if tags.get_or_null(s1) != tags.get_or_null(s2) then distincts[s1].add(s2) distincts[s2].add(s1) continue @@ -398,24 +402,36 @@ class Automaton end end + # Fixed point algorithm. + # * Get 2 states s1 and s2 not yet distinguished. + # * Get a symbol w. + # * If s1.trans(w) and s2.trans(w) are distinguished, then + # distinguish s1 and s2. var changed = true - var ints = new Array[Int] + var ints = new Array[Int] # List of symbols to check while changed do changed = false for s1 in states do for s2 in states do if distincts[s1].has(s2) then continue + + # The transitions use intervals. Therefore, for the states s1 and s2, + # we need to check only the meaningful symbols. They are the `first` + # symbol of each interval and the first one after the interval (`last+1`). ints.clear + # Check only `s1`; `s2` will be checked later when s1 and s2 are switched. for t in s1.outs do var sym = t.symbol assert sym != null ints.add sym.first var l = sym.last - if l != null then ints.add l + if l != null then ints.add l + 1 end + + # Check each symbol for i in ints do var ds1 = s1.trans(i) var ds2 = s2.trans(i) - if ds1 == null and ds2 == null then continue + if ds1 == ds2 then continue if ds1 != null and ds2 != null and not distincts[ds1].has(ds2) then continue distincts[s1].add(s2) distincts[s2].add(s1) @@ -425,6 +441,8 @@ class Automaton end end + # We need to unify not-distinguished states. + # Just add an epsilon-transition and DFAize the automaton. for s1 in states do for s2 in states do if distincts[s1].has(s2) then continue s1.add_trans(s2, null) @@ -433,8 +451,25 @@ class Automaton return to_dfa end - # Produce a graphvis file for the automaton - fun to_dot(filepath: String) + # Assert that `self` is a valid automaton or abort + fun assert_valid + do + assert states.has(start) + assert states.has_all(accept) + for s in states do + for t in s.outs do assert states.has(t.to) + for t in s.ins do assert states.has(t.from) + end + assert states.has_all(tags.keys) + for t, ss in retrotags do + assert states.has_all(ss) + end + end + + # Produce a graphviz string from the automatom + # + # Set `merge_transitions = false` to generate one edge by transition (default true). + fun to_dot(merge_transitions: nullable Bool): Writable do var names = new HashMap[State, String] var ni = 0 @@ -443,25 +478,27 @@ class Automaton ni += 1 end - var f = new FileWriter.open(filepath) - f.write("digraph g \{\n") + var f = new Buffer + f.append("digraph g \{\n") + f.append("rankdir=LR;") + var state_nb = 0 for s in states do - f.write("s{names[s]}[shape=oval") + f.append("s{names[s]}[shape=circle") #f.write("label=\"\",") if accept.has(s) then - f.write(",color=blue") + f.append(",shape=doublecircle") end if tags.has_key(s) then - f.write(",label=\"") + f.append(",label=\"") for token in tags[s] do - f.write("{token.name.escape_to_c}\\n") + f.append("{token.name.escape_to_dot}\\n") end - f.write("\"") + f.append("\"") else - f.write(",label=\"\"") + f.append(",label=\"{state_nb}\"") end - f.write("];\n") + f.append("];\n") var outs = new HashMap[State, Array[nullable TSymbol]] for t in s.outs do var a @@ -478,31 +515,39 @@ class Automaton for s2, a in outs do var labe = "" for c in a do + if merge_transitions == false then labe = "" if not labe.is_empty then labe += "\n" if c == null then - labe += "''" + labe += "ε" else labe += c.to_s end + if merge_transitions == false then + f.append("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_dot}\"];\n") + end + end + if merge_transitions == null or merge_transitions == true then + f.append("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_c}\"];\n") end - f.write("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_c}\"];\n") end + state_nb += 1 end - f.write("empty->s{names[start]}; empty[label=\"\",shape=none];\n") - - f.write("\}\n") - f.close + f.append("empty->s{names[start]}; empty[label=\"\",shape=none];\n") + f.append("\}\n") + return f end # Transform a NFA to a DFA. # note: the DFA is not minimized. fun to_dfa: Automaton do + assert_valid + trim var dfa = new Automaton.empty var n2d = new ArrayMap[Set[State], State] - var seen = new ArraySet[Set[State]] + var seen = new ArraySet[Set[State]] var alphabet = new HashSet[Int] var st = eclosure([start]) var todo = [st] @@ -572,7 +617,7 @@ class Automaton seen.add(nfa_dest) end if lastst != null and lastst.to == dfa_dest then - lastst.symbol.last = sym.last + lastst.symbol.as(not null).last = sym.last else lastst = dfa_state.add_trans(dfa_dest, sym) end @@ -593,7 +638,7 @@ class Automaton for t in s.outs do if t.symbol != null then continue var to = t.to - if res.has(to) then continue + if res.has(to) then continue res.add(to) todo.add(to) end @@ -614,7 +659,7 @@ class Automaton var l = sym.last if l != null and l < symbol then continue var to = t.to - if res.has(to) then continue + if res.has(to) then continue res.add(to) end end @@ -656,7 +701,7 @@ private class DFAGenerator end add "# Lexer generated by nitcc for the grammar {name}\n" - add "module {name}_lexer is no_warning \"missing-doc\"\n" + add "module {name}_lexer is generated, no_warning \"missing-doc\"\n" add("import nitcc_runtime\n") var p = parser