nitcc: trim also remove tags on bad states
[nit.git] / contrib / nitcc / src / autom.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Finite automaton (NFA & DFA)
16 module autom
17
18 # For the class Token
19 import grammar
20
21 # A finite automaton
22 class Automaton
23 # The start state
24 var start: State is noinit
25
26 # State that are accept states
27 var accept = new Array[State]
28
29 # All states
30 var states = new Array[State]
31
32 # Tokens associated on accept states.
33 # Use `add_tag` to update
34 var tags = new HashMap[State, Set[Token]]
35
36 # Accept states associated on tokens.
37 # Use `add_tag` to update
38 var retrotags = new HashMap[Token, Set[State]]
39
40 # Tag all accept states
41 fun tag_accept(t: Token)
42 do
43 for s in accept do add_tag(s, t)
44 end
45
46 # Add a token to a state
47 fun add_tag(s: State, t: Token)
48 do
49 if not tags.has_key(s) then
50 var set = new ArraySet[Token]
51 tags[s] = set
52 set.add t
53 else
54 tags[s].add t
55 end
56
57 if not retrotags.has_key(t) then
58 var set = new ArraySet[State]
59 retrotags[t] = set
60 set.add s
61 else
62 retrotags[t].add s
63 end
64
65 assert tags[s].has(t)
66 assert retrotags[t].has(s)
67 end
68
69 # Remove all occurrences of a tag in an automaton
70 fun clear_tag(t: Token)
71 do
72 if not retrotags.has_key(t) then return
73 for s in retrotags[t] do
74 if not tags.has_key(s) then continue
75 tags[s].remove(t)
76 if tags[s].is_empty then tags.keys.remove(s)
77 end
78 retrotags.keys.remove(t)
79 end
80
81 # Remove tokens from conflicting state according the inclusion of language.
82 # REQUIRE: self isa DFA automaton
83 fun solve_token_inclusion
84 do
85 for s, ts in tags do
86 if ts.length <= 1 then continue
87 var losers = new Array[Token]
88 for t1 in ts do
89 for t2 in ts do
90 if t1 == t2 then continue
91 if retrotags[t1].length > retrotags[t2].length and retrotags[t1].has_all(retrotags[t2]) then
92 losers.add(t1)
93 break
94 end
95 end
96 end
97 for t in losers do
98 ts.remove(t)
99 retrotags[t].remove s
100 end
101 end
102 end
103
104 # Initialize a new automaton for the empty language.
105 # One state, no accept, no transition.
106 init empty
107 do
108 var state = new State
109 start = state
110 states.add state
111 end
112
113 # Initialize a new automaton for the empty-string language.
114 # One state, is accept, no transition.
115 init epsilon
116 do
117 var state = new State
118 start = state
119 accept.add state
120 states.add state
121 end
122
123 # Initialize a new automation for the language that accepts only a single symbol.
124 # Two state, the second is accept, one transition on `symbol`.
125 init atom(symbol: Int)
126 do
127 var s = new State
128 var a = new State
129 var sym = new TSymbol(symbol, symbol)
130 s.add_trans(a, sym)
131 start = s
132 accept.add a
133 states.add s
134 states.add a
135 end
136
137 # Initialize a new automation for the language that accepts only a range of symbols
138 # Two state, the second is accept, one transition for `from` to `to`
139 init cla(first: Int, last: nullable Int)
140 do
141 var s = new State
142 var a = new State
143 var sym = new TSymbol(first, last)
144 s.add_trans(a, sym)
145 start = s
146 accept.add a
147 states.add s
148 states.add a
149 end
150
151 # Concatenate `other` to `self`.
152 # Other is modified and invalidated.
153 fun concat(other: Automaton)
154 do
155 var s2 = other.start
156 for a1 in accept do
157 a1.add_trans(s2, null)
158 end
159 accept = other.accept
160 states.add_all other.states
161 end
162
163 # `self` become the alternation of `self` and `other`.
164 # `other` is modified and invalidated.
165 fun alternate(other: Automaton)
166 do
167 var s = new State
168 var a = new State
169 s.add_trans(start, null)
170 for a1 in accept do
171 a1.add_trans(a, null)
172 end
173 s.add_trans(other.start, null)
174 for a2 in other.accept do
175 a2.add_trans(a, null)
176 accept.add(a2)
177 end
178
179 start = s
180 accept = [a]
181
182 states.add s
183 states.add a
184 states.add_all other.states
185 end
186
187 # Return a new automaton that recognize `self` but not `other`.
188 # For a theoretical POV, this is the subtraction of languages.
189 # Note: the implementation use `to_dfa` internally, so the theoretical complexity is not cheap.
190 fun except(other: Automaton): Automaton
191 do
192 var ta = new Token("1")
193 self.tag_accept(ta)
194 var tb = new Token("2")
195 other.tag_accept(tb)
196
197 var c = new Automaton.empty
198 c.absorb(self)
199 c.absorb(other)
200 c = c.to_dfa
201 c.accept.clear
202 for s in c.retrotags[ta] do
203 if not c.tags[s].has(tb) then
204 c.accept.add(s)
205 end
206 end
207 c.clear_tag(ta)
208 c.clear_tag(tb)
209 return c
210 end
211
212 # `self` absorbs all states, transitions, tags, and acceptations of `other`.
213 # An epsilon transition is added between `self.start` and `other.start`.
214 fun absorb(other: Automaton)
215 do
216 states.add_all other.states
217 start.add_trans(other.start, null)
218 for s, ts in other.tags do for t in ts do add_tag(s, t)
219 accept.add_all other.accept
220 end
221
222 # Do the Kleene closure (*) on self
223 fun close
224 do
225 for a1 in accept do
226 a1.add_trans(start, null)
227 start.add_trans(a1, null)
228 end
229 end
230
231 # Do the + on self
232 fun plus
233 do
234 for a1 in accept do
235 a1.add_trans(start, null)
236 end
237 end
238
239 # Do the ? on self
240 fun optionnal
241 do
242 alternate(new Automaton.epsilon)
243 end
244
245 # Remove all transitions on a given symbol
246 fun minus_sym(symbol: TSymbol)
247 do
248 var f = symbol.first
249 var l = symbol.last
250 for s in states do
251 for t in s.outs.to_a do
252 if t.symbol == null then continue
253
254 # Check overlaps
255 var tf = t.symbol.as(not null).first
256 var tl = t.symbol.as(not null).last
257 if l != null and tf > l then continue
258 if tl != null and f > tl then continue
259
260 t.delete
261
262 # Add left and right part if non empty
263 if tf < f then
264 var sym = new TSymbol(tf,f-1)
265 s.add_trans(t.to, sym)
266 end
267 if l != null then
268 if tl == null then
269 var sym = new TSymbol(l+1, null)
270 s.add_trans(t.to, sym)
271 else if tl > l then
272 var sym = new TSymbol(l+1, tl)
273 s.add_trans(t.to, sym)
274 end
275 end
276 end
277 end
278 end
279
280 # Fully duplicate an automaton
281 fun dup: Automaton
282 do
283 var res = new Automaton.empty
284 var map = new HashMap[State, State]
285 map[start] = res.start
286 for s in states do
287 if s == start then continue
288 var s2 = new State
289 map[s] = s2
290 res.states.add(s2)
291 end
292 for s in accept do
293 res.accept.add map[s]
294 end
295 for s, ts in tags do for t in ts do
296 res.add_tag(map[s], t)
297 end
298 for s in states do
299 for t in s.outs do
300 map[s].add_trans(map[t.to], t.symbol)
301 end
302 end
303 return res
304 end
305
306 # Reverse an automaton in place
307 fun reverse
308 do
309 for s in states do
310 var tmp = s.ins
311 s.ins = s.outs
312 s.outs = tmp
313 for t in s.outs do
314 var tmp2 = t.from
315 t.from = t.to
316 t.to = tmp2
317 end
318 end
319 var st = start
320 if accept.length == 1 then
321 start = accept.first
322 else
323 var st2 = new State
324 start = st2
325 states.add(st2)
326
327 for s in accept do
328 st2.add_trans(s, null)
329 end
330 end
331 accept.clear
332 accept.add(st)
333 end
334
335 # Remove states (and transitions) that does not reach an accept state
336 fun trim
337 do
338 # Good states are those we want to keep
339 var goods = new HashSet[State]
340 goods.add_all(accept)
341
342 var todo = accept.to_a
343
344 # Propagate goodness
345 while not todo.is_empty do
346 var s = todo.pop
347 for t in s.ins do
348 var s2 = t.from
349 if goods.has(s2) then continue
350 goods.add(s2)
351 todo.add(s2)
352 end
353 end
354
355 # What are the bad state then?
356 var bads = new Array[State]
357 for s in states do
358 if not goods.has(s) then bads.add(s)
359 end
360
361 # Remove their transitions and tags
362 for s in bads do
363 for t in s.ins.to_a do t.delete
364 for t in s.outs.to_a do t.delete
365 if tags.has_key(s) then
366 for t in tags[s] do retrotags[t].remove(s)
367 tags.keys.remove(s)
368 end
369 end
370
371 # Keep only the good stuff
372 states.clear
373 states.add_all(goods)
374 states.add(start)
375 end
376
377 # Generate a minimal DFA
378 # REQUIRE: self is a DFA
379 fun to_minimal_dfa: Automaton
380 do
381 assert_valid
382
383 trim
384
385 # Graph of known distinct states.
386 var distincts = new HashMap[State, Set[State]]
387 for s in states do
388 distincts[s] = new HashSet[State]
389 end
390
391 # split accept states.
392 # An accept state is distinct with a non accept state.
393 for s1 in accept do
394 for s2 in states do
395 if distincts[s1].has(s2) then continue
396 if not accept.has(s2) then
397 distincts[s1].add(s2)
398 distincts[s2].add(s1)
399 continue
400 end
401 if tags.get_or_null(s1) != tags.get_or_null(s2) then
402 distincts[s1].add(s2)
403 distincts[s2].add(s1)
404 continue
405 end
406 end
407 end
408
409 # Fixed point algorithm.
410 # * Get 2 states s1 and s2 not yet distinguished.
411 # * Get a symbol w.
412 # * If s1.trans(w) and s2.trans(w) are distinguished, then
413 # distinguish s1 and s2.
414 var changed = true
415 var ints = new Array[Int] # List of symbols to check
416 while changed do
417 changed = false
418 for s1 in states do for s2 in states do
419 if distincts[s1].has(s2) then continue
420
421 # The transitions use intervals. Therefore, for the states s1 and s2,
422 # we need to check only the meaningful symbols. They are the `first`
423 # symbol of each interval and the first one after the interval (`last+1`).
424 ints.clear
425 # Check only `s1`; `s2` will be checked later when s1 and s2 are switched.
426 for t in s1.outs do
427 var sym = t.symbol
428 assert sym != null
429 ints.add sym.first
430 var l = sym.last
431 if l != null then ints.add l + 1
432 end
433
434 # Check each symbol
435 for i in ints do
436 var ds1 = s1.trans(i)
437 var ds2 = s2.trans(i)
438 if ds1 == ds2 then continue
439 if ds1 != null and ds2 != null and not distincts[ds1].has(ds2) then continue
440 distincts[s1].add(s2)
441 distincts[s2].add(s1)
442 changed = true
443 break
444 end
445 end
446 end
447
448 # We need to unify not-distinguished states.
449 # Just add an epsilon-transition and DFAize the automaton.
450 for s1 in states do for s2 in states do
451 if distincts[s1].has(s2) then continue
452 s1.add_trans(s2, null)
453 end
454
455 return to_dfa
456 end
457
458 # Assert that `self` is a valid automaton or abort
459 fun assert_valid
460 do
461 assert states.has(start)
462 assert states.has_all(accept)
463 for s in states do
464 for t in s.outs do assert states.has(t.to)
465 for t in s.ins do assert states.has(t.from)
466 end
467 assert states.has_all(tags.keys)
468 for t, ss in retrotags do
469 assert states.has_all(ss)
470 end
471 end
472
473 # Produce a graphviz string from the automatom
474 #
475 # Set `merge_transitions = false` to generate one edge by transition (default true).
476 fun to_dot(merge_transitions: nullable Bool): Writable
477 do
478 var names = new HashMap[State, String]
479 var ni = 0
480 for s in states do
481 names[s] = ni.to_s
482 ni += 1
483 end
484
485 var f = new Buffer
486 f.append("digraph g \{\n")
487 f.append("rankdir=LR;")
488
489 var state_nb = 0
490 for s in states do
491 f.append("s{names[s]}[shape=circle")
492 #f.write("label=\"\",")
493 if accept.has(s) then
494 f.append(",shape=doublecircle")
495 end
496 if tags.has_key(s) then
497 f.append(",label=\"")
498 for token in tags[s] do
499 f.append("{token.name.escape_to_dot}\\n")
500 end
501 f.append("\"")
502 else
503 f.append(",label=\"{state_nb}\"")
504 end
505 f.append("];\n")
506 var outs = new HashMap[State, Array[nullable TSymbol]]
507 for t in s.outs do
508 var a
509 var s2 = t.to
510 var c = t.symbol
511 if outs.has_key(s2) then
512 a = outs[s2]
513 else
514 a = new Array[nullable TSymbol]
515 outs[s2] = a
516 end
517 a.add(c)
518 end
519 for s2, a in outs do
520 var labe = ""
521 for c in a do
522 if merge_transitions == false then labe = ""
523 if not labe.is_empty then labe += "\n"
524 if c == null then
525 labe += "ε"
526 else
527 labe += c.to_s
528 end
529 if merge_transitions == false then
530 f.append("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_dot}\"];\n")
531 end
532 end
533 if merge_transitions or else true then
534 f.append("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_c}\"];\n")
535 end
536 end
537 state_nb += 1
538 end
539 f.append("empty->s{names[start]}; empty[label=\"\",shape=none];\n")
540 f.append("\}\n")
541 return f
542 end
543
544 # Transform a NFA to a DFA.
545 # note: the DFA is not minimized.
546 fun to_dfa: Automaton
547 do
548 assert_valid
549
550 trim
551
552 var dfa = new Automaton.empty
553 var n2d = new ArrayMap[Set[State], State]
554 var seen = new ArraySet[Set[State]]
555 var alphabet = new HashSet[Int]
556 var st = eclosure([start])
557 var todo = [st]
558 n2d[st] = dfa.start
559 seen.add(st)
560 while not todo.is_empty do
561 var nfa_states = todo.pop
562 #print "* work on {nfa_states.inspect}={nfa_states} (remains {todo.length}/{seen.length})"
563 var dfa_state = n2d[nfa_states]
564 alphabet.clear
565 for s in nfa_states do
566 # Collect important values to build the alphabet
567 for t in s.outs do
568 var sym = t.symbol
569 if sym == null then continue
570 alphabet.add(sym.first)
571 var l = sym.last
572 if l != null then alphabet.add(l)
573 end
574
575 # Mark accept and tags
576 if accept.has(s) then
577 if tags.has_key(s) then
578 for t in tags[s] do
579 dfa.add_tag(dfa_state, t)
580 end
581 end
582 dfa.accept.add(dfa_state)
583 end
584 end
585
586 # From the important values, build a sequence of TSymbols
587 var a = alphabet.to_a
588 default_comparator.sort(a)
589 var tsyms = new Array[TSymbol]
590 var last = 0
591 for i in a do
592 if last > 0 and last <= i-1 then
593 tsyms.add(new TSymbol(last,i-1))
594 end
595 tsyms.add(new TSymbol(i,i))
596 last = i+1
597 end
598 if last > 0 then
599 tsyms.add(new TSymbol(last,null))
600 end
601 #print "Alphabet: {tsyms.join(", ")}"
602
603 var lastst: nullable Transition = null
604 for sym in tsyms do
605 var nfa_dest = eclosure(trans(nfa_states, sym.first))
606 if nfa_dest.is_empty then
607 lastst = null
608 continue
609 end
610 #print "{nfa_states} -> {sym} -> {nfa_dest}"
611 var dfa_dest
612 if seen.has(nfa_dest) then
613 #print "* reuse {nfa_dest.inspect}={nfa_dest}"
614 dfa_dest = n2d[nfa_dest]
615 else
616 #print "* new {nfa_dest.inspect}={nfa_dest}"
617 dfa_dest = new State
618 dfa.states.add(dfa_dest)
619 n2d[nfa_dest] = dfa_dest
620 todo.add(nfa_dest)
621 seen.add(nfa_dest)
622 end
623 if lastst != null and lastst.to == dfa_dest then
624 lastst.symbol.as(not null).last = sym.last
625 else
626 lastst = dfa_state.add_trans(dfa_dest, sym)
627 end
628 end
629 end
630 return dfa
631 end
632
633 # Transform a NFA to a epsilonless NFA.
634 fun to_nfa_noe: Automaton
635 do
636 assert_valid
637
638 trim
639
640 var dfa = new Automaton.empty
641 var n2d = new ArrayMap[Set[State], State]
642 var seen = new ArraySet[Set[State]]
643 var st = eclosure([start])
644 var todo = [st]
645 n2d[st] = dfa.start
646 seen.add(st)
647 while not todo.is_empty do
648 var nfa_states = todo.pop
649 #print "* work on {nfa_states.inspect}={nfa_states} (remains {todo.length}/{seen.length})"
650 var dfa_state = n2d[nfa_states]
651 for s in nfa_states do
652 for t in s.outs do
653 if t.symbol == null then continue
654 var nfa_dest = eclosure([t.to])
655 #print "{nfa_states} -> {sym} -> {nfa_dest}"
656 var dfa_dest
657 if seen.has(nfa_dest) then
658 #print "* reuse {nfa_dest.inspect}={nfa_dest}"
659 dfa_dest = n2d[nfa_dest]
660 else
661 #print "* new {nfa_dest.inspect}={nfa_dest}"
662 dfa_dest = new State
663 dfa.states.add(dfa_dest)
664 n2d[nfa_dest] = dfa_dest
665 todo.add(nfa_dest)
666 seen.add(nfa_dest)
667 end
668 dfa_state.add_trans(dfa_dest, t.symbol)
669 end
670
671 # Mark accept and tags
672 if accept.has(s) then
673 if tags.has_key(s) then
674 for t in tags[s] do
675 dfa.add_tag(dfa_state, t)
676 end
677 end
678 dfa.accept.add(dfa_state)
679 end
680 end
681 end
682 return dfa
683 end
684
685 # Epsilon-closure on a state of states.
686 # Used by `to_dfa`.
687 private fun eclosure(states: Collection[State]): Set[State]
688 do
689 var res = new ArraySet[State]
690 res.add_all(states)
691 var todo = states.to_a
692 while not todo.is_empty do
693 var s = todo.pop
694 for t in s.outs do
695 if t.symbol != null then continue
696 var to = t.to
697 if res.has(to) then continue
698 res.add(to)
699 todo.add(to)
700 end
701 end
702 return res
703 end
704
705 # Trans on a set of states.
706 # Used by `to_dfa`.
707 fun trans(states: Collection[State], symbol: Int): Set[State]
708 do
709 var res = new ArraySet[State]
710 for s in states do
711 for t in s.outs do
712 var sym = t.symbol
713 if sym == null then continue
714 if sym.first > symbol then continue
715 var l = sym.last
716 if l != null and l < symbol then continue
717 var to = t.to
718 if res.has(to) then continue
719 res.add(to)
720 end
721 end
722 return res
723 end
724
725 # Generate the Nit source code of the lexer.
726 # `filepath` is the name of the output file.
727 # `parser` is the name of the parser module (used to import the token classes).
728 fun gen_to_nit(filepath: String, name: String, parser: nullable String)
729 do
730 var gen = new DFAGenerator(filepath, name, self, parser)
731 gen.gen_to_nit
732 end
733 end
734
735 # Generate the Nit source code of the lexer
736 private class DFAGenerator
737 var filepath: String
738 var name: String
739 var automaton: Automaton
740 var parser: nullable String
741
742 var out: Writer is noinit
743
744 init do
745 self.out = new FileWriter.open(filepath)
746 end
747
748 fun add(s: String) do out.write(s)
749
750 fun gen_to_nit
751 do
752 var names = new HashMap[State, String]
753 var i = 0
754 for s in automaton.states do
755 names[s] = i.to_s
756 i += 1
757 end
758
759 add "# Lexer generated by nitcc for the grammar {name}\n"
760 add "module {name}_lexer is generated, no_warning \"missing-doc\"\n"
761 add("import nitcc_runtime\n")
762
763 var p = parser
764 if p != null then add("import {p}\n")
765
766 add("class Lexer_{name}\n")
767 add("\tsuper Lexer\n")
768 add("\tredef fun start_state do return dfastate_{names[automaton.start]}\n")
769 add("end\n")
770
771 for s in automaton.states do
772 var n = names[s]
773 add("private fun dfastate_{n}: DFAState{n} do return once new DFAState{n}\n")
774 end
775
776 add("class MyNToken\n")
777 add("\tsuper NToken\n")
778 add("end\n")
779
780 for s in automaton.states do
781 var n = names[s]
782 add("private class DFAState{n}\n")
783 add("\tsuper DFAState\n")
784 if automaton.accept.has(s) then
785 var token
786 if automaton.tags.has_key(s) then
787 token = automaton.tags[s].first
788 else
789 token = null
790 end
791 add("\tredef fun is_accept do return true\n")
792 var is_ignored = false
793 if token != null and token.name == "Ignored" then
794 is_ignored = true
795 add("\tredef fun is_ignored do return true\n")
796 end
797 add("\tredef fun make_token(position, source) do\n")
798 if is_ignored then
799 add("\t\treturn null\n")
800 else
801 if token == null then
802 add("\t\tvar t = new MyNToken\n")
803 add("\t\tt.text = position.extract(source)\n")
804 else
805 add("\t\tvar t = new {token.cname}\n")
806 var ttext = token.text
807 if ttext == null then
808 add("\t\tt.text = position.extract(source)\n")
809 else
810 add("\t\tt.text = \"{ttext.escape_to_nit}\"\n")
811 end
812 end
813 add("\t\tt.position = position\n")
814 add("\t\treturn t\n")
815 end
816 add("\tend\n")
817 end
818 var trans = new ArrayMap[TSymbol, State]
819 for t in s.outs do
820 var sym = t.symbol
821 assert sym != null
822 trans[sym] = t.to
823 end
824 if trans.is_empty then
825 # Do nothing, inherit the trans
826 else
827 add("\tredef fun trans(char) do\n")
828
829 # Collect the sequence of tests in the dispatch sequence
830 # The point here is that for each transition, there is a first and a last
831 # So holes have to be identified
832 var dispatch = new HashMap[Int, nullable State]
833 var haslast: nullable State = null
834
835 var last = -1
836 for sym, next in trans do
837 assert haslast == null
838 assert sym.first > last
839 if sym.first > last + 1 then
840 dispatch[sym.first-1] = null
841 end
842 var l = sym.last
843 if l == null then
844 haslast = next
845 else
846 dispatch[l] = next
847 last = l
848 end
849 end
850
851 if dispatch.is_empty and haslast != null then
852 # Only one transition that accepts everything (quite rare)
853 else
854 # We need to check
855 add("\t\tvar c = char.code_point\n")
856 end
857
858 # Generate a sequence of `if` for the dispatch
859 if haslast != null and last >= 0 then
860 # Special case: handle up-bound first if not an error
861 add("\t\tif c > {last} then return dfastate_{names[haslast]}\n")
862 # previous become the new last case
863 haslast = dispatch[last]
864 dispatch.keys.remove(last)
865 end
866 for c, next in dispatch do
867 if next == null then
868 add("\t\tif c <= {c} then return null\n")
869 else
870 add("\t\tif c <= {c} then return dfastate_{names[next]}\n")
871 end
872 end
873 if haslast == null then
874 add("\t\treturn null\n")
875 else
876 add("\t\treturn dfastate_{names[haslast]}\n")
877 end
878
879 add("\tend\n")
880 end
881 add("end\n")
882 end
883
884 self.out.close
885 end
886 end
887
888 redef class Token
889 # The associated text (if any, ie defined in the parser part)
890 var text: nullable String is noautoinit, writable
891 end
892
893 # A state in a finite automaton
894 class State
895 # Outgoing transitions
896 var outs = new Array[Transition]
897
898 # Ingoing transitions
899 var ins = new Array[Transition]
900
901 # Add a transitions to `to` on `symbol` (null means epsilon)
902 fun add_trans(to: State, symbol: nullable TSymbol): Transition
903 do
904 var t = new Transition(self, to, symbol)
905 outs.add(t)
906 to.ins.add(t)
907 return t
908 end
909
910 # Get the first state following the transition `i`.
911 # Null if no transition for `i`.
912 fun trans(i: Int): nullable State
913 do
914 for t in outs do
915 var sym = t.symbol
916 assert sym != null
917 var f = sym.first
918 var l = sym.last
919 if i < f then continue
920 if l != null and i > l then continue
921 return t.to
922 end
923 return null
924 end
925 end
926
927 # A range of symbols on a transition
928 class TSymbol
929 # The first symbol in the range
930 var first: Int
931
932 # The last symbol if any.
933 #
934 # `null` means infinity.
935 var last: nullable Int
936
937 redef fun to_s
938 do
939 var res
940 var f = first
941 if f <= 32 then
942 res = "#{f}"
943 else
944 res = f.code_point.to_s
945 end
946 var l = last
947 if f == l then return res
948 res += " .. "
949 if l == null then return res
950 if l <= 32 or l >= 127 then return res + "#{l}"
951 return res + l.code_point.to_s
952 end
953 end
954
955 # A transition in a finite automaton
956 class Transition
957 # The source state
958 var from: State
959 # The destination state
960 var to: State
961 # The symbol on the transition (null means epsilon)
962 var symbol: nullable TSymbol
963
964 # Remove the transition from the automaton.
965 # Detach from `from` and `to`.
966 fun delete
967 do
968 from.outs.remove(self)
969 to.ins.remove(self)
970 end
971 end