2d1ca78f4baeea92169e4dbc6929264316799145
1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Finite automaton (NFA & DFA)
26 # State that are accect states
27 var accept
= new Array[State]
30 var states
= new Array[State]
32 # Tokens associated on accept states
33 # use `add_tag` to update
34 var tags
= new HashMap[State, Set[Token]]
36 # Accept states associated on tokens
37 # use `add_tag` to update
38 var retrotags
= new HashMap[Token, Set[State]]
40 # Add a token to a state
41 fun add_tag
(s
: State, t
: Token)
43 if not tags
.has_key
(s
) then
44 var set
= new ArraySet[Token]
51 if not retrotags
.has_key
(t
) then
52 var set
= new ArraySet[State]
60 # Remove tokens from conflicting state according the the inclusion of language
61 # REQUIRE: self isa DFA automaton
62 fun solve_token_inclusion
65 if ts
.length
<= 1 then continue
66 var losers
= new Array[Token]
69 if t1
== t2
then continue
70 if retrotags
[t1
].length
> retrotags
[t2
].length
and retrotags
[t1
].has_all
(retrotags
[t2
]) then
83 # Initialize a new automaton for the empty language
84 # one state, no accept, no transition
92 # Initialize a new automaton for the empty-string language
93 # one state, is accept, no transition
102 # Initialize a new automation for the language that accepts only a single symbol
103 # Two state, the second is accept, one transition on `symbol`
104 init atom
(symbol
: Int)
108 s
.add_trans
(a
, symbol
)
115 # Initialize a new automation for the language that accepts only a range of symbols
116 # Two state, the second is accept, one transition for `from` to `to`
117 init cla
(from
, to
: Int)
121 for symbol
in [from
..to
] do
122 s
.add_trans
(a
, symbol
)
130 # Contatenate `other` to `self`
131 # other is modified and invalidated.
132 fun concat
(other
: Automaton)
136 a1
.add_trans
(s2
, null)
138 accept
= other
.accept
139 states
.add_all other
.states
142 # `self` become the alternation of `self` and `other`
143 # `other` is modified and invalidated.
144 fun alternate
(other
: Automaton)
148 s
.add_trans
(start
, null)
150 a1
.add_trans
(a
, null)
152 s
.add_trans
(other
.start
, null)
153 for a2
in other
.accept
do
154 a2
.add_trans
(a
, null)
163 states
.add_all other
.states
166 # Do the Kleene closure (*) on self
170 a1
.add_trans
(start
, null)
171 start
.add_trans
(a1
, null)
179 a1
.add_trans
(start
, null)
186 alternate
(new Automaton.epsilon
)
189 # Remove all transitions on a given symbol
190 fun minus_sym
(symbol
: Int)
193 for t
in s
.outs
.to_a
do
194 if t
.symbol
== symbol
then t
.delete
199 # Fully duplicate an automaton
202 var res
= new Automaton.empty
203 var map
= new HashMap[State, State]
204 map
[start
] = res
.start
206 if s
== start
then continue
212 res
.accept
.add map
[s
]
214 for s
, ts
in tags
do for t
in ts
do
215 res
.add_tag
(map
[s
], t
)
219 map
[s
].add_trans
(map
[t
.to
], t
.symbol
)
225 # Produce a graphvis file for the automaton
226 fun to_dot
(filepath
: String)
228 var f
= new OFStream.open
(filepath
)
229 f
.write
("digraph g \{\n")
232 f
.write
("s{s.object_id}[")
233 #f.write("label=\"\",")
234 if accept
.has
(s
) then
235 f
.write
("color=blue")
236 if tags
.has_key
(s
) then
238 for token in tags[s] do
239 f.write("{token.name.escape_to_c}\\n
")
245 var outs
= new HashMap[State, Array[nullable Int]]
250 if outs
.has_key
(s2
) then
253 a
= new Array[nullable Int]
260 var lastc
: nullable Int = null
265 if not labe
.is_empty
then labe
+= "\n"
267 else if lastc
== c
- 1 then
273 labe
+= "\n{sym_to_s(lastc)}"
274 else if elip
> 1 then
276 labe
+= " .. {sym_to_s(lastc)}"
278 if c
== -1 then break
279 if not labe
.is_empty
then labe
+= "\n"
284 f
.write
("s{s.object_id}->s{s2.object_id} [label=\"{labe.escape_to_c}\
"];\n")
287 f
.write
("empty->s{start.object_id}; empty[label=\"\
",shape=none];\n")
293 # Transform a symbol to a string
295 private fun sym_to_s
(symbol
: nullable Int): String
297 if symbol
== null then
299 else if symbol
<= 32 then
302 return symbol
.ascii
.to_s
306 # Transform a NFA to a DFA
307 # note: the DFA is not miminized
308 fun to_dfa
: Automaton
310 var dfa
= new Automaton.empty
311 var n2d
= new ArrayMap[Set[State], State]
312 var seen
= new ArraySet[Set[State]]
313 var ts
= new HashSet[Int]
314 var st
= eclosure
([start
])
318 while not todo
.is_empty
do
319 var nfa_states
= todo
.pop
320 #print "* work on {nfa_states.inspect}={nfa_states} (remains {todo.length}/{seen.length})"
321 var dfa_state
= n2d
[nfa_states
]
323 for s
in nfa_states
do
324 if accept
.has
(s
) then
325 if tags
.has_key
(s
) then
327 dfa
.add_tag
(dfa_state
, t
)
330 dfa
.accept
.add
(dfa_state
)
334 if sym
== null or ts
.has
(sym
) then continue
336 var nfa_dest
= eclosure
(trans
(nfa_states
, sym
))
337 #print "{nfa_states} -> {sym} -> {nfa_dest}"
339 if seen
.has
(nfa_dest
) then
340 #print "* reuse {nfa_dest.inspect}={nfa_dest}"
341 dfa_dest
= n2d
[nfa_dest
]
343 #print "* new {nfa_dest.inspect}={nfa_dest}"
345 dfa
.states
.add
(dfa_dest
)
346 n2d
[nfa_dest
] = dfa_dest
350 dfa_state
.add_trans
(dfa_dest
, sym
)
357 # epsilon-closure on a state of states
359 private fun eclosure
(states
: Collection[State]): Set[State]
361 var res
= new ArraySet[State]
363 var todo
= states
.to_a
364 while not todo
.is_empty
do
367 if t
.symbol
!= null then continue
369 if res
.has
(to
) then continue
377 # trans on a set of states
379 fun trans
(states
: Collection[State], symbol
: Int): Set[State]
381 var res
= new ArraySet[State]
384 if t
.symbol
!= symbol
then continue
386 if res
.has
(to
) then continue
393 # Generate the Nit source code of the lexer
394 # `filepath` is the name of the ouptit file
395 # `parser` is the name of the parser module (used to import the token classes)
396 fun gen_to_nit
(filepath
: String, parser
: nullable String)
398 var gen
= new DFAGenerator(filepath
, self, parser
)
403 # Generate the Nit source code of the lexer
404 private class DFAGenerator
406 var automaton
: Automaton
407 var parser
: nullable String
410 init(filepath
: String, automaton
: Automaton, parser
: nullable String) do
411 self.filepath
= filepath
412 self.automaton
= automaton
414 self.out
= new OFStream.open
(filepath
)
417 fun add
(s
: String) do out
.write
(s
)
421 var names
= new HashMap[State, String]
423 for s
in automaton
.states
do
428 add
"# Lexer generated by nitcc"
429 add
("import nitcc_runtime\n")
432 if p
!= null then add
("import {p}\n")
434 add
("class MyLexer\n")
435 add
("\tsuper Lexer\n")
436 add
("\tredef fun start_state do return dfastate_{names[automaton.start]}\n")
439 add
("redef class Object\n")
440 for s
in automaton
.states
do
442 add
("\tprivate fun dfastate_{n}: DFAState{n} do return once new DFAState{n}\n")
446 add
("class MyNToken\n")
447 add
("\tsuper NToken\n")
450 for s
in automaton
.states
do
452 add
("class DFAState{n}\n")
453 add
("\tsuper DFAState\n")
454 if automaton
.accept
.has
(s
) then
456 if automaton
.tags
.has_key
(s
) then
457 token
= automaton
.tags
[s
].first
461 add
("\tredef fun is_accept do return true\n")
462 add
("\tredef fun make_token(position, text) do\n")
463 if token
!= null and token
.name
== "Ignored" then
464 add
("\t\treturn null\n")
466 if token
== null then
467 add
("\t\tvar t = new MyNToken\n")
469 add
("\t\tvar t = new {token.cname}\n")
471 add
("\t\tt.position = position\n")
472 add
("\t\tt.text = text\n")
473 add
("\t\treturn t\n")
477 var trans
= new ArrayMap[Int, State]
483 if trans
.is_empty
then
484 # Do nothing, inherit the trans
485 else if trans
.length
== 1 then
486 var sym
= trans
.keys
.first
487 var next
= trans
.values
.first
488 add
("\tredef fun trans(c) do\n")
489 add
("\t\tif c.ascii == {sym} then return dfastate_{names[next]}\n")
490 add
("\t\treturn null\n")
493 add
("\tredef fun trans(c) do\n")
494 for sym
, next
in trans
do
495 add
("\t\tif c.ascii == {sym} then return dfastate_{names[next]}\n")
497 add
("\t\treturn null\n")
507 # A state in a finite automaton
509 # Outgoing transitions
511 var outs
= new Array[Transition]
512 # Ingoing tyransitions
513 var ins
= new Array[Transition]
515 # Add a transitions to `to` on `symbol` (null means epsilon)
516 fun add_trans
(to
: State, symbol
: nullable Int): Transition
518 var t
= new Transition(self, to
, symbol
)
525 # A transition in a finite automaton
529 # The destination state
531 # The symbol on the transition (null means epsilon)
532 var symbol
: nullable Int
534 # Remove the transition from the automaton
535 # Detash from `from` and `to`
538 from
.outs
.remove
(self)