if t.symbol == null then continue
# Check overlaps
- var tf = t.symbol.first
- var tl = t.symbol.last
+ var tf = t.symbol.as(not null).first
+ var tl = t.symbol.as(not null).last
if l != null and tf > l then continue
if tl != null and f > tl then continue
if not goods.has(s) then bads.add(s)
end
- # Remove their transitions
+ # Remove their transitions and tags
for s in bads do
- for t in s.ins do t.delete
- for t in s.outs do t.delete
+ for t in s.ins.to_a do t.delete
+ for t in s.outs.to_a do t.delete
+ if tags.has_key(s) then
+ for t in tags[s] do retrotags[t].remove(s)
+ tags.keys.remove(s)
+ end
end
# Keep only the good stuff
states.clear
states.add_all(goods)
+ states.add(start)
end
# Generate a minimal DFA
# REQUIRE: self is a DFA
fun to_minimal_dfa: Automaton
do
+ assert_valid
+
trim
+ # Graph of known distinct states.
var distincts = new HashMap[State, Set[State]]
for s in states do
distincts[s] = new HashSet[State]
end
- # split accept states
- for s1 in states do
+ # split accept states.
+ # An accept state is distinct with a non accept state.
+ for s1 in accept do
for s2 in states do
if distincts[s1].has(s2) then continue
- if not accept.has(s1) then continue
if not accept.has(s2) then
distincts[s1].add(s2)
distincts[s2].add(s1)
continue
end
- if tags[s1] != tags[s2] then
+ if tags.get_or_null(s1) != tags.get_or_null(s2) then
distincts[s1].add(s2)
distincts[s2].add(s1)
continue
end
end
+ # Fixed point algorithm.
+ # * Get 2 states s1 and s2 not yet distinguished.
+ # * Get a symbol w.
+ # * If s1.trans(w) and s2.trans(w) are distinguished, then
+ # distinguish s1 and s2.
var changed = true
- var ints = new Array[Int]
+ var ints = new Array[Int] # List of symbols to check
while changed do
changed = false
for s1 in states do for s2 in states do
if distincts[s1].has(s2) then continue
+
+ # The transitions use intervals. Therefore, for the states s1 and s2,
+ # we need to check only the meaningful symbols. They are the `first`
+ # symbol of each interval and the first one after the interval (`last+1`).
ints.clear
+ # Check only `s1`; `s2` will be checked later when s1 and s2 are switched.
for t in s1.outs do
var sym = t.symbol
assert sym != null
ints.add sym.first
var l = sym.last
- if l != null then ints.add l
+ if l != null then ints.add l + 1
end
+
+ # Check each symbol
for i in ints do
var ds1 = s1.trans(i)
var ds2 = s2.trans(i)
- if ds1 == null and ds2 == null then continue
+ if ds1 == ds2 then continue
if ds1 != null and ds2 != null and not distincts[ds1].has(ds2) then continue
distincts[s1].add(s2)
distincts[s2].add(s1)
end
end
+ # We need to unify not-distinguished states.
+ # Just add an epsilon-transition and DFAize the automaton.
for s1 in states do for s2 in states do
if distincts[s1].has(s2) then continue
s1.add_trans(s2, null)
return to_dfa
end
- # Produce a graphvis file for the automaton
- fun to_dot(filepath: String)
+ # Assert that `self` is a valid automaton or abort
+ fun assert_valid
+ do
+ assert states.has(start)
+ assert states.has_all(accept)
+ for s in states do
+ for t in s.outs do assert states.has(t.to)
+ for t in s.ins do assert states.has(t.from)
+ end
+ assert states.has_all(tags.keys)
+ for t, ss in retrotags do
+ assert states.has_all(ss)
+ end
+ end
+
+ # Produce a graphviz string from the automatom
+ #
+ # Set `merge_transitions = false` to generate one edge by transition (default true).
+ fun to_dot(merge_transitions: nullable Bool): Writable
do
var names = new HashMap[State, String]
var ni = 0
ni += 1
end
- var f = new FileWriter.open(filepath)
- f.write("digraph g \{\n")
+ var f = new Buffer
+ f.append("digraph g \{\n")
+ f.append("rankdir=LR;")
+ var state_nb = 0
for s in states do
- f.write("s{names[s]}[shape=oval")
+ f.append("s{names[s]}[shape=circle")
#f.write("label=\"\",")
if accept.has(s) then
- f.write(",color=blue")
+ f.append(",shape=doublecircle")
end
if tags.has_key(s) then
- f.write(",label=\"")
+ f.append(",label=\"")
for token in tags[s] do
- f.write("{token.name.escape_to_c}\\n")
+ f.append("{token.name.escape_to_dot}\\n")
end
- f.write("\"")
+ f.append("\"")
else
- f.write(",label=\"\"")
+ f.append(",label=\"{state_nb}\"")
end
- f.write("];\n")
+ f.append("];\n")
var outs = new HashMap[State, Array[nullable TSymbol]]
for t in s.outs do
var a
for s2, a in outs do
var labe = ""
for c in a do
+ if merge_transitions == false then labe = ""
if not labe.is_empty then labe += "\n"
if c == null then
- labe += "''"
+ labe += "ε"
else
labe += c.to_s
end
+ if merge_transitions == false then
+ f.append("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_dot}\"];\n")
+ end
+ end
+ if merge_transitions or else true then
+ f.append("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_c}\"];\n")
end
- f.write("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_c}\"];\n")
end
+ state_nb += 1
end
- f.write("empty->s{names[start]}; empty[label=\"\",shape=none];\n")
-
- f.write("\}\n")
- f.close
+ f.append("empty->s{names[start]}; empty[label=\"\",shape=none];\n")
+ f.append("\}\n")
+ return f
end
# Transform a NFA to a DFA.
# note: the DFA is not minimized.
fun to_dfa: Automaton
do
+ assert_valid
+
trim
var dfa = new Automaton.empty
var n2d = new ArrayMap[Set[State], State]
- var seen = new ArraySet[Set[State]]
+ var seen = new ArraySet[Set[State]]
var alphabet = new HashSet[Int]
var st = eclosure([start])
var todo = [st]
seen.add(nfa_dest)
end
if lastst != null and lastst.to == dfa_dest then
- lastst.symbol.last = sym.last
+ lastst.symbol.as(not null).last = sym.last
else
lastst = dfa_state.add_trans(dfa_dest, sym)
end
return dfa
end
+ # Transform a NFA to a epsilonless NFA.
+ fun to_nfa_noe: Automaton
+ do
+ assert_valid
+
+ trim
+
+ var dfa = new Automaton.empty
+ var n2d = new ArrayMap[Set[State], State]
+ var seen = new ArraySet[Set[State]]
+ var st = eclosure([start])
+ var todo = [st]
+ n2d[st] = dfa.start
+ seen.add(st)
+ while not todo.is_empty do
+ var nfa_states = todo.pop
+ #print "* work on {nfa_states.inspect}={nfa_states} (remains {todo.length}/{seen.length})"
+ var dfa_state = n2d[nfa_states]
+ for s in nfa_states do
+ for t in s.outs do
+ if t.symbol == null then continue
+ var nfa_dest = eclosure([t.to])
+ #print "{nfa_states} -> {sym} -> {nfa_dest}"
+ var dfa_dest
+ if seen.has(nfa_dest) then
+ #print "* reuse {nfa_dest.inspect}={nfa_dest}"
+ dfa_dest = n2d[nfa_dest]
+ else
+ #print "* new {nfa_dest.inspect}={nfa_dest}"
+ dfa_dest = new State
+ dfa.states.add(dfa_dest)
+ n2d[nfa_dest] = dfa_dest
+ todo.add(nfa_dest)
+ seen.add(nfa_dest)
+ end
+ dfa_state.add_trans(dfa_dest, t.symbol)
+ end
+
+ # Mark accept and tags
+ if accept.has(s) then
+ if tags.has_key(s) then
+ for t in tags[s] do
+ dfa.add_tag(dfa_state, t)
+ end
+ end
+ dfa.accept.add(dfa_state)
+ end
+ end
+ end
+ return dfa
+ end
+
# Epsilon-closure on a state of states.
# Used by `to_dfa`.
private fun eclosure(states: Collection[State]): Set[State]
for t in s.outs do
if t.symbol != null then continue
var to = t.to
- if res.has(to) then continue
+ if res.has(to) then continue
res.add(to)
todo.add(to)
end
var l = sym.last
if l != null and l < symbol then continue
var to = t.to
- if res.has(to) then continue
+ if res.has(to) then continue
res.add(to)
end
end
end
add "# Lexer generated by nitcc for the grammar {name}\n"
- add "module {name}_lexer is no_warning \"missing-doc\"\n"
+ add "module {name}_lexer is generated, no_warning \"missing-doc\"\n"
add("import nitcc_runtime\n")
var p = parser
token = null
end
add("\tredef fun is_accept do return true\n")
- add("\tredef fun make_token(position, text) do\n")
+ var is_ignored = false
if token != null and token.name == "Ignored" then
+ is_ignored = true
+ add("\tredef fun is_ignored do return true\n")
+ end
+ add("\tredef fun make_token(position, source) do\n")
+ if is_ignored then
add("\t\treturn null\n")
else
if token == null then
add("\t\tvar t = new MyNToken\n")
+ add("\t\tt.text = position.extract(source)\n")
else
add("\t\tvar t = new {token.cname}\n")
+ var ttext = token.text
+ if ttext == null then
+ add("\t\tt.text = position.extract(source)\n")
+ else
+ add("\t\tt.text = \"{ttext.escape_to_nit}\"\n")
+ end
end
add("\t\tt.position = position\n")
- add("\t\tt.text = text\n")
add("\t\treturn t\n")
end
add("\tend\n")
else
add("\tredef fun trans(char) do\n")
- add("\t\tvar c = char.ascii\n")
- var haslast = false
+ # Collect the sequence of tests in the dispatch sequence
+ # The point here is that for each transition, there is a first and a last
+ # So holes have to be identified
+ var dispatch = new HashMap[Int, nullable State]
+ var haslast: nullable State = null
+
var last = -1
for sym, next in trans do
- assert not haslast
+ assert haslast == null
assert sym.first > last
- if sym.first > last + 1 then add("\t\tif c <= {sym.first-1} then return null\n")
+ if sym.first > last + 1 then
+ dispatch[sym.first-1] = null
+ end
var l = sym.last
if l == null then
- add("\t\treturn dfastate_{names[next]}\n")
- haslast= true
+ haslast = next
else
- add("\t\tif c <= {l} then return dfastate_{names[next]}\n")
+ dispatch[l] = next
last = l
end
end
- if not haslast then add("\t\treturn null\n")
+
+ if dispatch.is_empty and haslast != null then
+ # Only one transition that accepts everything (quite rare)
+ else
+ # We need to check
+ add("\t\tvar c = char.code_point\n")
+ end
+
+ # Generate a sequence of `if` for the dispatch
+ if haslast != null and last >= 0 then
+ # Special case: handle up-bound first if not an error
+ add("\t\tif c > {last} then return dfastate_{names[haslast]}\n")
+ # previous become the new last case
+ haslast = dispatch[last]
+ dispatch.keys.remove(last)
+ end
+ for c, next in dispatch do
+ if next == null then
+ add("\t\tif c <= {c} then return null\n")
+ else
+ add("\t\tif c <= {c} then return dfastate_{names[next]}\n")
+ end
+ end
+ if haslast == null then
+ add("\t\treturn null\n")
+ else
+ add("\t\treturn dfastate_{names[haslast]}\n")
+ end
+
add("\tend\n")
end
add("end\n")
end
end
+redef class Token
+ # The associated text (if any, ie defined in the parser part)
+ var text: nullable String is noautoinit, writable
+end
+
# A state in a finite automaton
class State
# Outgoing transitions
if f <= 32 then
res = "#{f}"
else
- res = f.ascii.to_s
+ res = f.code_point.to_s
end
var l = last
if f == l then return res
res += " .. "
if l == null then return res
if l <= 32 or l >= 127 then return res + "#{l}"
- return res + l.ascii.to_s
+ return res + l.code_point.to_s
end
end