1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Finite automaton (NFA & DFA)
26 # State that are accect states
27 var accept
= new Array[State]
30 var states
= new Array[State]
32 # Tokens associated on accept states
33 # use `add_tag` to update
34 var tags
= new HashMap[State, Set[Token]]
36 # Accept states associated on tokens
37 # use `add_tag` to update
38 var retrotags
= new HashMap[Token, Set[State]]
40 # Tag all accept states
41 fun tag_accept
(t
: Token)
43 for s
in accept
do add_tag
(s
, t
)
46 # Add a token to a state
47 fun add_tag
(s
: State, t
: Token)
49 if not tags
.has_key
(s
) then
50 var set
= new ArraySet[Token]
57 if not retrotags
.has_key
(t
) then
58 var set
= new ArraySet[State]
66 # Remove tokens from conflicting state according the the inclusion of language
67 # REQUIRE: self isa DFA automaton
68 fun solve_token_inclusion
71 if ts
.length
<= 1 then continue
72 var losers
= new Array[Token]
75 if t1
== t2
then continue
76 if retrotags
[t1
].length
> retrotags
[t2
].length
and retrotags
[t1
].has_all
(retrotags
[t2
]) then
89 # Initialize a new automaton for the empty language
90 # one state, no accept, no transition
98 # Initialize a new automaton for the empty-string language
99 # one state, is accept, no transition
102 var state
= new State
108 # Initialize a new automation for the language that accepts only a single symbol
109 # Two state, the second is accept, one transition on `symbol`
110 init atom
(symbol
: Int)
114 s
.add_trans
(a
, symbol
)
121 # Initialize a new automation for the language that accepts only a range of symbols
122 # Two state, the second is accept, one transition for `from` to `to`
123 init cla
(from
, to
: Int)
127 for symbol
in [from
..to
] do
128 s
.add_trans
(a
, symbol
)
136 # Contatenate `other` to `self`
137 # other is modified and invalidated.
138 fun concat
(other
: Automaton)
142 a1
.add_trans
(s2
, null)
144 accept
= other
.accept
145 states
.add_all other
.states
148 # `self` become the alternation of `self` and `other`
149 # `other` is modified and invalidated.
150 fun alternate
(other
: Automaton)
154 s
.add_trans
(start
, null)
156 a1
.add_trans
(a
, null)
158 s
.add_trans
(other
.start
, null)
159 for a2
in other
.accept
do
160 a2
.add_trans
(a
, null)
169 states
.add_all other
.states
172 # `self` absorbs all states, transisions, tags, and acceptations of `other`
173 # An epsilon transition is added between `self.start` and `other.start`
174 fun absorb
(other
: Automaton)
176 states
.add_all other
.states
177 start
.add_trans
(other
.start
, null)
178 for s
, ts
in other
.tags
do for t
in ts
do add_tag
(s
, t
)
179 accept
.add_all other
.accept
182 # Do the Kleene closure (*) on self
186 a1
.add_trans
(start
, null)
187 start
.add_trans
(a1
, null)
195 a1
.add_trans
(start
, null)
202 alternate
(new Automaton.epsilon
)
205 # Remove all transitions on a given symbol
206 fun minus_sym
(symbol
: Int)
209 for t
in s
.outs
.to_a
do
210 if t
.symbol
== symbol
then t
.delete
215 # Fully duplicate an automaton
218 var res
= new Automaton.empty
219 var map
= new HashMap[State, State]
220 map
[start
] = res
.start
222 if s
== start
then continue
228 res
.accept
.add map
[s
]
230 for s
, ts
in tags
do for t
in ts
do
231 res
.add_tag
(map
[s
], t
)
235 map
[s
].add_trans
(map
[t
.to
], t
.symbol
)
241 # Produce a graphvis file for the automaton
242 fun to_dot
(filepath
: String)
244 var f
= new OFStream.open
(filepath
)
245 f
.write
("digraph g \{\n")
248 f
.write
("s{s.object_id}[")
249 #f.write("label=\"\",")
250 if accept
.has
(s
) then
251 f
.write
("color=blue")
252 if tags
.has_key
(s
) then
254 for token in tags[s] do
255 f.write("{token.name.escape_to_c}\\n
")
261 var outs
= new HashMap[State, Array[nullable Int]]
266 if outs
.has_key
(s2
) then
269 a
= new Array[nullable Int]
276 var lastc
: nullable Int = null
281 if not labe
.is_empty
then labe
+= "\n"
283 else if lastc
== c
- 1 then
289 labe
+= "\n{sym_to_s(lastc)}"
290 else if elip
> 1 then
292 labe
+= " .. {sym_to_s(lastc)}"
294 if c
== -1 then break
295 if not labe
.is_empty
then labe
+= "\n"
300 f
.write
("s{s.object_id}->s{s2.object_id} [label=\"{labe.escape_to_c}\
"];\n")
303 f
.write
("empty->s{start.object_id}; empty[label=\"\
",shape=none];\n")
309 # Transform a symbol to a string
311 private fun sym_to_s
(symbol
: nullable Int): String
313 if symbol
== null then
315 else if symbol
<= 32 then
318 return symbol
.ascii
.to_s
322 # Transform a NFA to a DFA
323 # note: the DFA is not miminized
324 fun to_dfa
: Automaton
326 var dfa
= new Automaton.empty
327 var n2d
= new ArrayMap[Set[State], State]
328 var seen
= new ArraySet[Set[State]]
329 var ts
= new HashSet[Int]
330 var st
= eclosure
([start
])
334 while not todo
.is_empty
do
335 var nfa_states
= todo
.pop
336 #print "* work on {nfa_states.inspect}={nfa_states} (remains {todo.length}/{seen.length})"
337 var dfa_state
= n2d
[nfa_states
]
339 for s
in nfa_states
do
340 if accept
.has
(s
) then
341 if tags
.has_key
(s
) then
343 dfa
.add_tag
(dfa_state
, t
)
346 dfa
.accept
.add
(dfa_state
)
350 if sym
== null or ts
.has
(sym
) then continue
352 var nfa_dest
= eclosure
(trans
(nfa_states
, sym
))
353 #print "{nfa_states} -> {sym} -> {nfa_dest}"
355 if seen
.has
(nfa_dest
) then
356 #print "* reuse {nfa_dest.inspect}={nfa_dest}"
357 dfa_dest
= n2d
[nfa_dest
]
359 #print "* new {nfa_dest.inspect}={nfa_dest}"
361 dfa
.states
.add
(dfa_dest
)
362 n2d
[nfa_dest
] = dfa_dest
366 dfa_state
.add_trans
(dfa_dest
, sym
)
373 # epsilon-closure on a state of states
375 private fun eclosure
(states
: Collection[State]): Set[State]
377 var res
= new ArraySet[State]
379 var todo
= states
.to_a
380 while not todo
.is_empty
do
383 if t
.symbol
!= null then continue
385 if res
.has
(to
) then continue
393 # trans on a set of states
395 fun trans
(states
: Collection[State], symbol
: Int): Set[State]
397 var res
= new ArraySet[State]
400 if t
.symbol
!= symbol
then continue
402 if res
.has
(to
) then continue
409 # Generate the Nit source code of the lexer
410 # `filepath` is the name of the ouptit file
411 # `parser` is the name of the parser module (used to import the token classes)
412 fun gen_to_nit
(filepath
: String, name
: String, parser
: nullable String)
414 var gen
= new DFAGenerator(filepath
, name
, self, parser
)
419 # Generate the Nit source code of the lexer
420 private class DFAGenerator
423 var automaton
: Automaton
424 var parser
: nullable String
427 init(filepath
: String, name
: String, automaton
: Automaton, parser
: nullable String) do
428 self.filepath
= filepath
430 self.automaton
= automaton
432 self.out
= new OFStream.open
(filepath
)
435 fun add
(s
: String) do out
.write
(s
)
439 var names
= new HashMap[State, String]
441 for s
in automaton
.states
do
446 add
"# Lexer generated by nitcc for the grammar {name}"
447 add
("import nitcc_runtime\n")
450 if p
!= null then add
("import {p}\n")
452 add
("class Lexer_{name}\n")
453 add
("\tsuper Lexer\n")
454 add
("\tredef fun start_state do return dfastate_{names[automaton.start]}\n")
457 add
("redef class Object\n")
458 for s
in automaton
.states
do
460 add
("\tprivate fun dfastate_{n}: DFAState{n} do return once new DFAState{n}\n")
464 add
("class MyNToken\n")
465 add
("\tsuper NToken\n")
468 for s
in automaton
.states
do
470 add
("class DFAState{n}\n")
471 add
("\tsuper DFAState\n")
472 if automaton
.accept
.has
(s
) then
474 if automaton
.tags
.has_key
(s
) then
475 token
= automaton
.tags
[s
].first
479 add
("\tredef fun is_accept do return true\n")
480 add
("\tredef fun make_token(position, text) do\n")
481 if token
!= null and token
.name
== "Ignored" then
482 add
("\t\treturn null\n")
484 if token
== null then
485 add
("\t\tvar t = new MyNToken\n")
487 add
("\t\tvar t = new {token.cname}\n")
489 add
("\t\tt.position = position\n")
490 add
("\t\tt.text = text\n")
491 add
("\t\treturn t\n")
495 var trans
= new ArrayMap[Int, State]
501 if trans
.is_empty
then
502 # Do nothing, inherit the trans
503 else if trans
.length
== 1 then
504 var sym
= trans
.keys
.first
505 var next
= trans
.values
.first
506 add
("\tredef fun trans(c) do\n")
507 add
("\t\tif c.ascii == {sym} then return dfastate_{names[next]}\n")
508 add
("\t\treturn null\n")
511 add
("\tredef fun trans(c) do\n")
512 for sym
, next
in trans
do
513 add
("\t\tif c.ascii == {sym} then return dfastate_{names[next]}\n")
515 add
("\t\treturn null\n")
525 # A state in a finite automaton
527 # Outgoing transitions
529 var outs
= new Array[Transition]
530 # Ingoing tyransitions
531 var ins
= new Array[Transition]
533 # Add a transitions to `to` on `symbol` (null means epsilon)
534 fun add_trans
(to
: State, symbol
: nullable Int): Transition
536 var t
= new Transition(self, to
, symbol
)
543 # A transition in a finite automaton
547 # The destination state
549 # The symbol on the transition (null means epsilon)
550 var symbol
: nullable Int
552 # Remove the transition from the automaton
553 # Detash from `from` and `to`
556 from
.outs
.remove
(self)