1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Finite automaton (NFA & DFA)
26 # State that are accect states
27 var accept
= new Array[State]
30 var states
= new Array[State]
32 # Tokens associated on accept states
33 # use `add_tag` to update
34 var tags
= new HashMap[State, Set[Token]]
36 # Accept states associated on tokens
37 # use `add_tag` to update
38 var retrotags
= new HashMap[Token, Set[State]]
40 # Tag all accept states
41 fun tag_accept
(t
: Token)
43 for s
in accept
do add_tag
(s
, t
)
46 # Add a token to a state
47 fun add_tag
(s
: State, t
: Token)
49 if not tags
.has_key
(s
) then
50 var set
= new ArraySet[Token]
57 if not retrotags
.has_key
(t
) then
58 var set
= new ArraySet[State]
66 assert retrotags
[t
].has
(s
)
69 # Remove all occurences of a tag in an automaton
70 fun clear_tag
(t
: Token)
72 if not retrotags
.has_key
(t
) then return
73 for s
in retrotags
[t
] do
74 if not tags
.has_key
(s
) then continue
76 if tags
[s
].is_empty
then tags
.keys
.remove
(s
)
78 retrotags
.keys
.remove
(t
)
81 # Remove tokens from conflicting state according the the inclusion of language
82 # REQUIRE: self isa DFA automaton
83 fun solve_token_inclusion
86 if ts
.length
<= 1 then continue
87 var losers
= new Array[Token]
90 if t1
== t2
then continue
91 if retrotags
[t1
].length
> retrotags
[t2
].length
and retrotags
[t1
].has_all
(retrotags
[t2
]) then
104 # Initialize a new automaton for the empty language
105 # one state, no accept, no transition
108 var state
= new State
113 # Initialize a new automaton for the empty-string language
114 # one state, is accept, no transition
117 var state
= new State
123 # Initialize a new automation for the language that accepts only a single symbol
124 # Two state, the second is accept, one transition on `symbol`
125 init atom
(symbol
: Int)
129 var sym
= new TSymbol(symbol
, symbol
)
137 # Initialize a new automation for the language that accepts only a range of symbols
138 # Two state, the second is accept, one transition for `from` to `to`
139 init cla
(first
: Int, last
: nullable Int)
143 var sym
= new TSymbol(first
, last
)
151 # Contatenate `other` to `self`
152 # other is modified and invalidated.
153 fun concat
(other
: Automaton)
157 a1
.add_trans
(s2
, null)
159 accept
= other
.accept
160 states
.add_all other
.states
163 # `self` become the alternation of `self` and `other`
164 # `other` is modified and invalidated.
165 fun alternate
(other
: Automaton)
169 s
.add_trans
(start
, null)
171 a1
.add_trans
(a
, null)
173 s
.add_trans
(other
.start
, null)
174 for a2
in other
.accept
do
175 a2
.add_trans
(a
, null)
184 states
.add_all other
.states
187 # `self` absorbs all states, transisions, tags, and acceptations of `other`
188 # An epsilon transition is added between `self.start` and `other.start`
189 fun absorb
(other
: Automaton)
191 states
.add_all other
.states
192 start
.add_trans
(other
.start
, null)
193 for s
, ts
in other
.tags
do for t
in ts
do add_tag
(s
, t
)
194 accept
.add_all other
.accept
197 # Do the Kleene closure (*) on self
201 a1
.add_trans
(start
, null)
202 start
.add_trans
(a1
, null)
210 a1
.add_trans
(start
, null)
217 alternate
(new Automaton.epsilon
)
220 # Remove all transitions on a given symbol
221 fun minus_sym
(symbol
: TSymbol)
226 for t
in s
.outs
.to_a
do
227 if t
.symbol
== null then continue
230 var tf
= t
.symbol
.first
231 var tl
= t
.symbol
.last
232 if l
!= null and tf
> l
then continue
233 if tl
!= null and f
> tl
then continue
237 # Add left and right part if non empty
239 var sym
= new TSymbol(tf
,f-1
)
240 s
.add_trans
(t
.to
, sym
)
244 var sym
= new TSymbol(l
+1, null)
245 s
.add_trans
(t
.to
, sym
)
247 var sym
= new TSymbol(l
+1, tl
)
248 s
.add_trans
(t
.to
, sym
)
255 # Fully duplicate an automaton
258 var res
= new Automaton.empty
259 var map
= new HashMap[State, State]
260 map
[start
] = res
.start
262 if s
== start
then continue
268 res
.accept
.add map
[s
]
270 for s
, ts
in tags
do for t
in ts
do
271 res
.add_tag
(map
[s
], t
)
275 map
[s
].add_trans
(map
[t
.to
], t
.symbol
)
281 # Reverse an automaton in place
295 if accept
.length
== 1 then
303 st2
.add_trans
(s
, null)
310 # Generate a minimal DFA
311 # REQUIRE: self is a DFA
312 fun to_minimal_dfa
: Automaton
314 var distincts
= new HashMap[State, Set[State]]
316 distincts
[s
] = new HashSet[State]
319 # split accept states
322 if distincts
[s1
].has
(s2
) then continue
323 if not accept
.has
(s1
) then continue
324 if not accept
.has
(s2
) then
325 distincts
[s1
].add
(s2
)
326 distincts
[s2
].add
(s1
)
329 if tags
[s1
] != tags
[s2
] then
330 distincts
[s1
].add
(s2
)
331 distincts
[s2
].add
(s1
)
338 var ints
= new Array[Int]
341 for s1
in states
do for s2
in states
do
342 if distincts
[s1
].has
(s2
) then continue
349 if l
!= null then ints
.add l
352 var ds1
= s1
.trans
(i
)
353 var ds2
= s2
.trans
(i
)
354 if ds1
== null and ds2
== null then continue
355 if ds1
!= null and ds2
!= null and not distincts
[ds1
].has
(ds2
) then continue
356 distincts
[s1
].add
(s2
)
357 distincts
[s2
].add
(s1
)
364 for s1
in states
do for s2
in states
do
365 if distincts
[s1
].has
(s2
) then continue
366 s1
.add_trans
(s2
, null)
372 # Produce a graphvis file for the automaton
373 fun to_dot
(filepath
: String)
375 var f
= new OFStream.open
(filepath
)
376 f
.write
("digraph g \{\n")
379 f
.write
("s{s.object_id}[shape=oval")
380 #f.write("label=\"\",")
381 if accept
.has
(s
) then
382 f
.write
(",color=blue")
384 if tags
.has_key
(s
) then
386 for token in tags[s] do
387 f.write("{token.name.escape_to_c}\\n
")
391 f
.write
(",label=\"\
"")
394 var outs
= new HashMap[State, Array[nullable TSymbol]]
399 if outs
.has_key
(s2
) then
402 a
= new Array[nullable TSymbol]
410 if not labe
.is_empty
then labe
+= "\n"
417 f
.write
("s{s.object_id}->s{s2.object_id} [label=\"{labe.escape_to_c}\
"];\n")
420 f
.write
("empty->s{start.object_id}; empty[label=\"\
",shape=none];\n")
426 # Transform a NFA to a DFA
427 # note: the DFA is not miminized
428 fun to_dfa
: Automaton
430 var dfa
= new Automaton.empty
431 var n2d
= new ArrayMap[Set[State], State]
432 var seen
= new ArraySet[Set[State]]
433 var alphabet
= new HashSet[Int]
434 var st
= eclosure
([start
])
438 while not todo
.is_empty
do
439 var nfa_states
= todo
.pop
440 #print "* work on {nfa_states.inspect}={nfa_states} (remains {todo.length}/{seen.length})"
441 var dfa_state
= n2d
[nfa_states
]
443 for s
in nfa_states
do
444 # Collect important values to build the alphabet
447 if sym
== null then continue
448 alphabet
.add
(sym
.first
)
450 if l
!= null then alphabet
.add
(l
)
453 # Mark accept and tags
454 if accept
.has
(s
) then
455 if tags
.has_key
(s
) then
457 dfa
.add_tag
(dfa_state
, t
)
460 dfa
.accept
.add
(dfa_state
)
464 # From the important values, build a sequence of TSymbols
465 var a
= alphabet
.to_a
466 (new ComparableSorter[Int]).sort
(a
)
467 var tsyms
= new Array[TSymbol]
470 if last
> 0 and last
<= i-1
then
471 tsyms
.add
(new TSymbol(last
,i-1
))
473 tsyms
.add
(new TSymbol(i
,i
))
477 tsyms
.add
(new TSymbol(last
,null))
479 #print "Alphabet: {tsyms.join(", ")}"
481 var lastst
: nullable Transition = null
483 var nfa_dest
= eclosure
(trans
(nfa_states
, sym
.first
))
484 if nfa_dest
.is_empty
then
488 #print "{nfa_states} -> {sym} -> {nfa_dest}"
490 if seen
.has
(nfa_dest
) then
491 #print "* reuse {nfa_dest.inspect}={nfa_dest}"
492 dfa_dest
= n2d
[nfa_dest
]
494 #print "* new {nfa_dest.inspect}={nfa_dest}"
496 dfa
.states
.add
(dfa_dest
)
497 n2d
[nfa_dest
] = dfa_dest
501 if lastst
!= null and lastst
.to
== dfa_dest
then
502 lastst
.symbol
.last
= sym
.last
504 lastst
= dfa_state
.add_trans
(dfa_dest
, sym
)
511 # epsilon-closure on a state of states
513 private fun eclosure
(states
: Collection[State]): Set[State]
515 var res
= new ArraySet[State]
517 var todo
= states
.to_a
518 while not todo
.is_empty
do
521 if t
.symbol
!= null then continue
523 if res
.has
(to
) then continue
531 # trans on a set of states
533 fun trans
(states
: Collection[State], symbol
: Int): Set[State]
535 var res
= new ArraySet[State]
539 if sym
== null then continue
540 if sym
.first
> symbol
then continue
542 if l
!= null and l
< symbol
then continue
544 if res
.has
(to
) then continue
551 # Generate the Nit source code of the lexer
552 # `filepath` is the name of the ouptit file
553 # `parser` is the name of the parser module (used to import the token classes)
554 fun gen_to_nit
(filepath
: String, name
: String, parser
: nullable String)
556 var gen
= new DFAGenerator(filepath
, name
, self, parser
)
561 # Generate the Nit source code of the lexer
562 private class DFAGenerator
565 var automaton
: Automaton
566 var parser
: nullable String
569 init(filepath
: String, name
: String, automaton
: Automaton, parser
: nullable String) do
570 self.filepath
= filepath
572 self.automaton
= automaton
574 self.out
= new OFStream.open
(filepath
)
577 fun add
(s
: String) do out
.write
(s
)
581 var names
= new HashMap[State, String]
583 for s
in automaton
.states
do
588 add
"# Lexer generated by nitcc for the grammar {name}"
589 add
("import nitcc_runtime\n")
592 if p
!= null then add
("import {p}\n")
594 add
("class Lexer_{name}\n")
595 add
("\tsuper Lexer\n")
596 add
("\tredef fun start_state do return dfastate_{names[automaton.start]}\n")
599 add
("redef class Object\n")
600 for s
in automaton
.states
do
602 add
("\tprivate fun dfastate_{n}: DFAState{n} do return once new DFAState{n}\n")
606 add
("class MyNToken\n")
607 add
("\tsuper NToken\n")
610 for s
in automaton
.states
do
612 add
("class DFAState{n}\n")
613 add
("\tsuper DFAState\n")
614 if automaton
.accept
.has
(s
) then
616 if automaton
.tags
.has_key
(s
) then
617 token
= automaton
.tags
[s
].first
621 add
("\tredef fun is_accept do return true\n")
622 add
("\tredef fun make_token(position, text) do\n")
623 if token
!= null and token
.name
== "Ignored" then
624 add
("\t\treturn null\n")
626 if token
== null then
627 add
("\t\tvar t = new MyNToken\n")
629 add
("\t\tvar t = new {token.cname}\n")
631 add
("\t\tt.position = position\n")
632 add
("\t\tt.text = text\n")
633 add
("\t\treturn t\n")
637 var trans
= new ArrayMap[TSymbol, State]
643 if trans
.is_empty
then
644 # Do nothing, inherit the trans
646 add
("\tredef fun trans(char) do\n")
648 add
("\t\tvar c = char.ascii\n")
651 for sym
, next
in trans
do
653 assert sym
.first
> last
654 if sym
.first
> last
+ 1 then add
("\t\tif c <= {sym.first-1} then return null\n")
657 add
("\t\treturn dfastate_{names[next]}\n")
660 add
("\t\tif c <= {l} then return dfastate_{names[next]}\n")
664 if not haslast
then add
("\t\treturn null\n")
674 # A state in a finite automaton
676 # Outgoing transitions
678 var outs
= new Array[Transition]
679 # Ingoing tyransitions
680 var ins
= new Array[Transition]
682 # Add a transitions to `to` on `symbol` (null means epsilon)
683 fun add_trans
(to
: State, symbol
: nullable TSymbol): Transition
685 var t
= new Transition(self, to
, symbol
)
691 fun trans
(i
: Int): nullable State
698 if i
< f
then continue
699 if l
!= null and i
> l
then continue
706 # A range of symbols on a transition
709 var last
: nullable Int
721 if f
== l
then return res
723 if l
== null then return res
724 if l
<= 32 or l
>= 127 then return res
+ "#{l}"
725 return res
+ l
.ascii
.to_s
729 # A transition in a finite automaton
733 # The destination state
735 # The symbol on the transition (null means epsilon)
736 var symbol
: nullable TSymbol
738 # Remove the transition from the automaton
739 # Detash from `from` and `to`
742 from
.outs
.remove
(self)