X-Git-Url: http://nitlanguage.org diff --git a/contrib/nitcc/src/autom.nit b/contrib/nitcc/src/autom.nit index 86befbb..c20f3df 100644 --- a/contrib/nitcc/src/autom.nit +++ b/contrib/nitcc/src/autom.nit @@ -37,6 +37,12 @@ class Automaton # use `add_tag` to update var retrotags = new HashMap[Token, Set[State]] + # Tag all accept states + fun tag_accept(t: Token) + do + for s in accept do add_tag(s, t) + end + # Add a token to a state fun add_tag(s: State, t: Token) do @@ -55,6 +61,21 @@ class Automaton else retrotags[t].add s end + + assert tags[s].has(t) + assert retrotags[t].has(s) + end + + # Remove all occurences of a tag in an automaton + fun clear_tag(t: Token) + do + if not retrotags.has_key(t) then return + for s in retrotags[t] do + if not tags.has_key(s) then continue + tags[s].remove(t) + if tags[s].is_empty then tags.keys.remove(s) + end + retrotags.keys.remove(t) end # Remove tokens from conflicting state according the the inclusion of language @@ -105,7 +126,8 @@ class Automaton do var s = new State var a = new State - s.add_trans(a, symbol) + var sym = new TSymbol(symbol, symbol) + s.add_trans(a, sym) start = s accept.add a states.add s @@ -114,13 +136,12 @@ class Automaton # Initialize a new automation for the language that accepts only a range of symbols # Two state, the second is accept, one transition for `from` to `to` - init cla(from, to: Int) + init cla(first: Int, last: nullable Int) do var s = new State var a = new State - for symbol in [from..to] do - s.add_trans(a, symbol) - end + var sym = new TSymbol(first, last) + s.add_trans(a, sym) start = s accept.add a states.add s @@ -163,6 +184,16 @@ class Automaton states.add_all other.states end + # `self` absorbs all states, transisions, tags, and acceptations of `other` + # An epsilon transition is added between `self.start` and `other.start` + fun absorb(other: Automaton) + do + states.add_all other.states + start.add_trans(other.start, null) + for s, ts in other.tags do for t in ts do add_tag(s, t) + accept.add_all other.accept + end + # Do the Kleene closure (*) on self fun close do @@ -187,11 +218,36 @@ class Automaton end # Remove all transitions on a given symbol - fun minus_sym(symbol: Int) + fun minus_sym(symbol: TSymbol) do + var f = symbol.first + var l = symbol.last for s in states do for t in s.outs.to_a do - if t.symbol == symbol then t.delete + if t.symbol == null then continue + + # Check overlaps + var tf = t.symbol.first + var tl = t.symbol.last + if l != null and tf > l then continue + if tl != null and f > tl then continue + + t.delete + + # Add left and right part if non empty + if tf < f then + var sym = new TSymbol(tf,f-1) + s.add_trans(t.to, sym) + end + if l != null then + if tl == null then + var sym = new TSymbol(l+1, null) + s.add_trans(t.to, sym) + else if tl > l then + var sym = new TSymbol(l+1, tl) + s.add_trans(t.to, sym) + end + end end end end @@ -222,6 +278,97 @@ class Automaton return res end + # Reverse an automaton in place + fun reverse + do + for s in states do + var tmp = s.ins + s.ins = s.outs + s.outs = tmp + for t in s.outs do + var tmp2 = t.from + t.from = t.to + t.to = tmp2 + end + end + var st = start + if accept.length == 1 then + start = accept.first + else + var st2 = new State + start = st2 + states.add(st2) + + for s in accept do + st2.add_trans(s, null) + end + end + accept.clear + accept.add(st) + end + + # Generate a minimal DFA + # REQUIRE: self is a DFA + fun to_minimal_dfa: Automaton + do + var distincts = new HashMap[State, Set[State]] + for s in states do + distincts[s] = new HashSet[State] + end + + # split accept states + for s1 in states do + for s2 in states do + if distincts[s1].has(s2) then continue + if not accept.has(s1) then continue + if not accept.has(s2) then + distincts[s1].add(s2) + distincts[s2].add(s1) + continue + end + if tags[s1] != tags[s2] then + distincts[s1].add(s2) + distincts[s2].add(s1) + continue + end + end + end + + var changed = true + var ints = new Array[Int] + while changed do + changed = false + for s1 in states do for s2 in states do + if distincts[s1].has(s2) then continue + ints.clear + for t in s1.outs do + var sym = t.symbol + assert sym != null + ints.add sym.first + var l = sym.last + if l != null then ints.add l + end + for i in ints do + var ds1 = s1.trans(i) + var ds2 = s2.trans(i) + if ds1 == null and ds2 == null then continue + if ds1 != null and ds2 != null and not distincts[ds1].has(ds2) then continue + distincts[s1].add(s2) + distincts[s2].add(s1) + changed = true + break + end + end + end + + for s1 in states do for s2 in states do + if distincts[s1].has(s2) then continue + s1.add_trans(s2, null) + end + + return to_dfa + end + # Produce a graphvis file for the automaton fun to_dot(filepath: String) do @@ -229,20 +376,22 @@ class Automaton f.write("digraph g \{\n") for s in states do - f.write("s{s.object_id}[") + f.write("s{s.object_id}[shape=oval") #f.write("label=\"\",") if accept.has(s) then - f.write("color=blue") - if tags.has_key(s) then - f.write(",label=\"") - for token in tags[s] do - f.write("{token.name.escape_to_c}\\n") - end - f.write("\"") + f.write(",color=blue") + end + if tags.has_key(s) then + f.write(",label=\"") + for token in tags[s] do + f.write("{token.name.escape_to_c}\\n") end + f.write("\"") + else + f.write(",label=\"\"") end f.write("];\n") - var outs = new HashMap[State, Array[nullable Int]] + var outs = new HashMap[State, Array[nullable TSymbol]] for t in s.outs do var a var s2 = t.to @@ -250,35 +399,19 @@ class Automaton if outs.has_key(s2) then a = outs[s2] else - a = new Array[nullable Int] + a = new Array[nullable TSymbol] outs[s2] = a end a.add(c) end for s2, a in outs do var labe = "" - var lastc: nullable Int = null - var elip = 0 - a.add(-1) for c in a do + if not labe.is_empty then labe += "\n" if c == null then - if not labe.is_empty then labe += "\n" labe += "''" - else if lastc == c - 1 then - elip += 1 - lastc = c else - if elip == 1 then - assert lastc != null - labe += "\n{sym_to_s(lastc)}" - else if elip > 1 then - assert lastc != null - labe += " .. {sym_to_s(lastc)}" - end - if c == -1 then break - if not labe.is_empty then labe += "\n" - labe += sym_to_s(c) - lastc = c + labe += c.to_s end end f.write("s{s.object_id}->s{s2.object_id} [label=\"{labe.escape_to_c}\"];\n") @@ -290,19 +423,6 @@ class Automaton f.close end - # Transform a symbol to a string - # Used by `to_dot` - private fun sym_to_s(symbol: nullable Int): String - do - if symbol == null then - return "''" - else if symbol <= 32 then - return "#{symbol}" - else - return symbol.ascii.to_s - end - end - # Transform a NFA to a DFA # note: the DFA is not miminized fun to_dfa: Automaton @@ -310,7 +430,7 @@ class Automaton var dfa = new Automaton.empty var n2d = new ArrayMap[Set[State], State] var seen = new ArraySet[Set[State]] - var ts = new HashSet[Int] + var alphabet = new HashSet[Int] var st = eclosure([start]) var todo = [st] n2d[st] = dfa.start @@ -319,8 +439,18 @@ class Automaton var nfa_states = todo.pop #print "* work on {nfa_states.inspect}={nfa_states} (remains {todo.length}/{seen.length})" var dfa_state = n2d[nfa_states] - ts.clear + alphabet.clear for s in nfa_states do + # Collect important values to build the alphabet + for t in s.outs do + var sym = t.symbol + if sym == null then continue + alphabet.add(sym.first) + var l = sym.last + if l != null then alphabet.add(l) + end + + # Mark accept and tags if accept.has(s) then if tags.has_key(s) then for t in tags[s] do @@ -329,25 +459,49 @@ class Automaton end dfa.accept.add(dfa_state) end - for t in s.outs do - var sym = t.symbol - if sym == null or ts.has(sym) then continue - ts.add(sym) - var nfa_dest = eclosure(trans(nfa_states, sym)) - #print "{nfa_states} -> {sym} -> {nfa_dest}" - var dfa_dest - if seen.has(nfa_dest) then - #print "* reuse {nfa_dest.inspect}={nfa_dest}" - dfa_dest = n2d[nfa_dest] - else - #print "* new {nfa_dest.inspect}={nfa_dest}" - dfa_dest = new State - dfa.states.add(dfa_dest) - n2d[nfa_dest] = dfa_dest - todo.add(nfa_dest) - seen.add(nfa_dest) - end - dfa_state.add_trans(dfa_dest, sym) + end + + # From the important values, build a sequence of TSymbols + var a = alphabet.to_a + (new ComparableSorter[Int]).sort(a) + var tsyms = new Array[TSymbol] + var last = 0 + for i in a do + if last > 0 and last <= i-1 then + tsyms.add(new TSymbol(last,i-1)) + end + tsyms.add(new TSymbol(i,i)) + last = i+1 + end + if last > 0 then + tsyms.add(new TSymbol(last,null)) + end + #print "Alphabet: {tsyms.join(", ")}" + + var lastst: nullable Transition = null + for sym in tsyms do + var nfa_dest = eclosure(trans(nfa_states, sym.first)) + if nfa_dest.is_empty then + lastst = null + continue + end + #print "{nfa_states} -> {sym} -> {nfa_dest}" + var dfa_dest + if seen.has(nfa_dest) then + #print "* reuse {nfa_dest.inspect}={nfa_dest}" + dfa_dest = n2d[nfa_dest] + else + #print "* new {nfa_dest.inspect}={nfa_dest}" + dfa_dest = new State + dfa.states.add(dfa_dest) + n2d[nfa_dest] = dfa_dest + todo.add(nfa_dest) + seen.add(nfa_dest) + end + if lastst != null and lastst.to == dfa_dest then + lastst.symbol.last = sym.last + else + lastst = dfa_state.add_trans(dfa_dest, sym) end end end @@ -381,7 +535,11 @@ class Automaton var res = new ArraySet[State] for s in states do for t in s.outs do - if t.symbol != symbol then continue + var sym = t.symbol + if sym == null then continue + if sym.first > symbol then continue + var l = sym.last + if l != null and l < symbol then continue var to = t.to if res.has(to) then continue res.add(to) @@ -451,7 +609,7 @@ private class DFAGenerator for s in automaton.states do var n = names[s] - add("class DFAState{n}\n") + add("private class DFAState{n}\n") add("\tsuper DFAState\n") if automaton.accept.has(s) then var token @@ -476,7 +634,7 @@ private class DFAGenerator end add("\tend\n") end - var trans = new ArrayMap[Int, State] + var trans = new ArrayMap[TSymbol, State] for t in s.outs do var sym = t.symbol assert sym != null @@ -484,19 +642,26 @@ private class DFAGenerator end if trans.is_empty then # Do nothing, inherit the trans - else if trans.length == 1 then - var sym = trans.keys.first - var next = trans.values.first - add("\tredef fun trans(c) do\n") - add("\t\tif c.ascii == {sym} then return dfastate_{names[next]}\n") - add("\t\treturn null\n") - add("\tend\n") else - add("\tredef fun trans(c) do\n") + add("\tredef fun trans(char) do\n") + + add("\t\tvar c = char.ascii\n") + var haslast = false + var last = -1 for sym, next in trans do - add("\t\tif c.ascii == {sym} then return dfastate_{names[next]}\n") + assert not haslast + assert sym.first > last + if sym.first > last + 1 then add("\t\tif c <= {sym.first-1} then return null\n") + var l = sym.last + if l == null then + add("\t\treturn dfastate_{names[next]}\n") + haslast= true + else + add("\t\tif c <= {l} then return dfastate_{names[next]}\n") + last = l + end end - add("\t\treturn null\n") + if not haslast then add("\t\treturn null\n") add("\tend\n") end add("end\n") @@ -515,13 +680,50 @@ class State var ins = new Array[Transition] # Add a transitions to `to` on `symbol` (null means epsilon) - fun add_trans(to: State, symbol: nullable Int): Transition + fun add_trans(to: State, symbol: nullable TSymbol): Transition do var t = new Transition(self, to, symbol) outs.add(t) to.ins.add(t) return t end + + fun trans(i: Int): nullable State + do + for t in outs do + var sym = t.symbol + assert sym != null + var f = sym.first + var l = sym.last + if i < f then continue + if l != null and i > l then continue + return t.to + end + return null + end +end + +# A range of symbols on a transition +class TSymbol + var first: Int + var last: nullable Int + + redef fun to_s + do + var res + var f = first + if f <= 32 then + res = "#{f}" + else + res = f.ascii.to_s + end + var l = last + if f == l then return res + res += " .. " + if l == null then return res + if l <= 32 or l >= 127 then return res + "#{l}" + return res + l.ascii.to_s + end end # A transition in a finite automaton @@ -531,7 +733,7 @@ class Transition # The destination state var to: State # The symbol on the transition (null means epsilon) - var symbol: nullable Int + var symbol: nullable TSymbol # Remove the transition from the automaton # Detash from `from` and `to`