# A finite automaton
class Automaton
# The start state
- var start: State
+ var start: State is noinit
- # State that are accect states
+ # State that are accept states
var accept = new Array[State]
# All states
var states = new Array[State]
- # Tokens associated on accept states
- # use `add_tag` to update
+ # Tokens associated on accept states.
+ # Use `add_tag` to update
var tags = new HashMap[State, Set[Token]]
- # Accept states associated on tokens
- # use `add_tag` to update
+ # Accept states associated on tokens.
+ # Use `add_tag` to update
var retrotags = new HashMap[Token, Set[State]]
# Tag all accept states
assert retrotags[t].has(s)
end
- # Remove all occurences of a tag in an automaton
+ # Remove all occurrences of a tag in an automaton
fun clear_tag(t: Token)
do
if not retrotags.has_key(t) then return
retrotags.keys.remove(t)
end
- # Remove tokens from conflicting state according the the inclusion of language
+ # Remove tokens from conflicting state according the inclusion of language.
# REQUIRE: self isa DFA automaton
fun solve_token_inclusion
do
end
end
- # Initialize a new automaton for the empty language
- # one state, no accept, no transition
+ # Initialize a new automaton for the empty language.
+ # One state, no accept, no transition.
init empty
do
var state = new State
states.add state
end
- # Initialize a new automaton for the empty-string language
- # one state, is accept, no transition
+ # Initialize a new automaton for the empty-string language.
+ # One state, is accept, no transition.
init epsilon
do
var state = new State
states.add state
end
- # Initialize a new automation for the language that accepts only a single symbol
- # Two state, the second is accept, one transition on `symbol`
+ # Initialize a new automation for the language that accepts only a single symbol.
+ # Two state, the second is accept, one transition on `symbol`.
init atom(symbol: Int)
do
var s = new State
states.add a
end
- # Contatenate `other` to `self`
- # other is modified and invalidated.
+ # Concatenate `other` to `self`.
+ # Other is modified and invalidated.
fun concat(other: Automaton)
do
var s2 = other.start
states.add_all other.states
end
- # `self` become the alternation of `self` and `other`
+ # `self` become the alternation of `self` and `other`.
# `other` is modified and invalidated.
fun alternate(other: Automaton)
do
states.add_all other.states
end
- # Return a new automaton that recognize `self` but not `other`
- # For a theorical POV, this is the substraction of languages.
- # Note: the implementation use `to_dfa` internally, so the theorical complexity is not cheap.
+ # Return a new automaton that recognize `self` but not `other`.
+ # For a theoretical POV, this is the subtraction of languages.
+ # Note: the implementation use `to_dfa` internally, so the theoretical complexity is not cheap.
fun except(other: Automaton): Automaton
do
var ta = new Token("1")
return c
end
- # `self` absorbs all states, transisions, tags, and acceptations of `other`
- # An epsilon transition is added between `self.start` and `other.start`
+ # `self` absorbs all states, transitions, tags, and acceptations of `other`.
+ # An epsilon transition is added between `self.start` and `other.start`.
fun absorb(other: Automaton)
do
states.add_all other.states
ni += 1
end
- var f = new OFStream.open(filepath)
+ var f = new FileWriter.open(filepath)
f.write("digraph g \{\n")
for s in states do
f.close
end
- # Transform a NFA to a DFA
- # note: the DFA is not miminized
+ # Transform a NFA to a DFA.
+ # note: the DFA is not minimized.
fun to_dfa: Automaton
do
trim
return dfa
end
- # epsilon-closure on a state of states
- # used by `to_dfa`
+ # Epsilon-closure on a state of states.
+ # Used by `to_dfa`.
private fun eclosure(states: Collection[State]): Set[State]
do
var res = new ArraySet[State]
return res
end
- # trans on a set of states
- # Used by `to_dfa`
+ # Trans on a set of states.
+ # Used by `to_dfa`.
fun trans(states: Collection[State], symbol: Int): Set[State]
do
var res = new ArraySet[State]
return res
end
- # Generate the Nit source code of the lexer
- # `filepath` is the name of the ouptit file
- # `parser` is the name of the parser module (used to import the token classes)
+ # Generate the Nit source code of the lexer.
+ # `filepath` is the name of the output file.
+ # `parser` is the name of the parser module (used to import the token classes).
fun gen_to_nit(filepath: String, name: String, parser: nullable String)
do
var gen = new DFAGenerator(filepath, name, self, parser)
var automaton: Automaton
var parser: nullable String
- var out: OStream
- init(filepath: String, name: String, automaton: Automaton, parser: nullable String) do
- self.filepath = filepath
- self.name = name
- self.automaton = automaton
- self.parser = parser
- self.out = new OFStream.open(filepath)
+ var out: Writer is noinit
+
+ init do
+ self.out = new FileWriter.open(filepath)
end
fun add(s: String) do out.write(s)
i += 1
end
- add "# Lexer generated by nitcc for the grammar {name}"
+ add "# Lexer generated by nitcc for the grammar {name}\n"
+ add "module {name}_lexer is no_warning \"missing-doc\"\n"
add("import nitcc_runtime\n")
var p = parser
add("\tredef fun start_state do return dfastate_{names[automaton.start]}\n")
add("end\n")
- add("redef class Object\n")
for s in automaton.states do
var n = names[s]
- add("\tprivate fun dfastate_{n}: DFAState{n} do return once new DFAState{n}\n")
+ add("private fun dfastate_{n}: DFAState{n} do return once new DFAState{n}\n")
end
- add("end\n")
add("class MyNToken\n")
add("\tsuper NToken\n")
token = null
end
add("\tredef fun is_accept do return true\n")
- add("\tredef fun make_token(position, text) do\n")
+ var is_ignored = false
if token != null and token.name == "Ignored" then
+ is_ignored = true
+ add("\tredef fun is_ignored do return true\n")
+ end
+ add("\tredef fun make_token(position, source) do\n")
+ if is_ignored then
add("\t\treturn null\n")
else
if token == null then
add("\t\tvar t = new MyNToken\n")
+ add("\t\tt.text = position.extract(source)\n")
else
add("\t\tvar t = new {token.cname}\n")
+ var ttext = token.text
+ if ttext == null then
+ add("\t\tt.text = position.extract(source)\n")
+ else
+ add("\t\tt.text = \"{ttext.escape_to_nit}\"\n")
+ end
end
add("\t\tt.position = position\n")
- add("\t\tt.text = text\n")
add("\t\treturn t\n")
end
add("\tend\n")
else
add("\tredef fun trans(char) do\n")
- add("\t\tvar c = char.ascii\n")
- var haslast = false
+ add("\t\tvar c = char.code_point\n")
+
+ # Collect the sequence of tests in the dispatch sequence
+ # The point here is that for each transition, there is a first and a last
+ # So holes hare to be identified
+ var dispatch = new HashMap[Int, nullable State]
+ var haslast: nullable State = null
+
var last = -1
for sym, next in trans do
- assert not haslast
+ assert haslast == null
assert sym.first > last
- if sym.first > last + 1 then add("\t\tif c <= {sym.first-1} then return null\n")
+ if sym.first > last + 1 then
+ dispatch[sym.first-1] = null
+ end
var l = sym.last
if l == null then
- add("\t\treturn dfastate_{names[next]}\n")
- haslast= true
+ haslast = next
else
- add("\t\tif c <= {l} then return dfastate_{names[next]}\n")
+ dispatch[l] = next
last = l
end
end
- if not haslast then add("\t\treturn null\n")
+
+ # Generate a sequence of `if` for the dispatch
+ if haslast != null then
+ # Special case: handle up-bound first if not an error
+ add("\t\tif c > {last} then return dfastate_{names[haslast]}\n")
+ # previous become the new last case
+ haslast = dispatch[last]
+ dispatch.keys.remove(last)
+ end
+ for c, next in dispatch do
+ if next == null then
+ add("\t\tif c <= {c} then return null\n")
+ else
+ add("\t\tif c <= {c} then return dfastate_{names[next]}\n")
+ end
+ end
+ if haslast == null then
+ add("\t\treturn null\n")
+ else
+ add("\t\treturn dfastate_{names[haslast]}\n")
+ end
+
add("\tend\n")
end
add("end\n")
end
end
+redef class Token
+ # The associated text (if any, ie defined in the parser part)
+ var text: nullable String is noautoinit, writable
+end
+
# A state in a finite automaton
class State
# Outgoing transitions
-
var outs = new Array[Transition]
- # Ingoing tyransitions
+
+ # Ingoing transitions
var ins = new Array[Transition]
# Add a transitions to `to` on `symbol` (null means epsilon)
return t
end
+ # Get the first state following the transition `i`.
+ # Null if no transition for `i`.
fun trans(i: Int): nullable State
do
for t in outs do
# A range of symbols on a transition
class TSymbol
+ # The first symbol in the range
var first: Int
+
+ # The last symbol if any.
+ #
+ # `null` means infinity.
var last: nullable Int
redef fun to_s
if f <= 32 then
res = "#{f}"
else
- res = f.ascii.to_s
+ res = f.code_point.to_s
end
var l = last
if f == l then return res
res += " .. "
if l == null then return res
if l <= 32 or l >= 127 then return res + "#{l}"
- return res + l.ascii.to_s
+ return res + l.code_point.to_s
end
end
# The symbol on the transition (null means epsilon)
var symbol: nullable TSymbol
- # Remove the transition from the automaton
- # Detash from `from` and `to`
+ # Remove the transition from the automaton.
+ # Detach from `from` and `to`.
fun delete
do
from.outs.remove(self)