contrib/nitcc/src/autom.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Finite automaton (NFA & DFA)
  16 module autom
  17
  18 # For the class Token
  19 import grammar
  20
  21 # A finite automaton
  22 class Automaton
  23         # The start state
  24         var start: State is noinit
  25
  26         # State that are accept states
  27         var accept = new Array[State]
  28
  29         # All states
  30         var states = new Array[State]
  31
  32         # Tokens associated on accept states.
  33         # Use `add_tag` to update
  34         var tags = new HashMap[State, Set[Token]]
  35
  36         # Accept states associated on tokens.
  37         # Use `add_tag` to update
  38         var retrotags = new HashMap[Token, Set[State]]
  39
  40         # Tag all accept states
  41         fun tag_accept(t: Token)
  42         do
  43                 for s in accept do add_tag(s, t)
  44         end
  45
  46         # Add a token to a state
  47         fun add_tag(s: State, t: Token)
  48         do
  49                 if not tags.has_key(s) then
  50                         var set = new ArraySet[Token]
  51                         tags[s] = set
  52                         set.add t
  53                 else
  54                         tags[s].add t
  55                 end
  56
  57                 if not retrotags.has_key(t) then
  58                         var set = new ArraySet[State]
  59                         retrotags[t] = set
  60                         set.add s
  61                 else
  62                         retrotags[t].add s
  63                 end
  64
  65                 assert tags[s].has(t)
  66                 assert retrotags[t].has(s)
  67         end
  68
  69         # Remove all occurrences of a tag in an automaton
  70         fun clear_tag(t: Token)
  71         do
  72                 if not retrotags.has_key(t) then return
  73                 for s in retrotags[t] do
  74                         if not tags.has_key(s) then continue
  75                         tags[s].remove(t)
  76                         if tags[s].is_empty then tags.keys.remove(s)
  77                 end
  78                 retrotags.keys.remove(t)
  79         end
  80
  81         # Remove tokens from conflicting state according the inclusion of language.
  82         # REQUIRE: self isa DFA automaton
  83         fun solve_token_inclusion
  84         do
  85                 for s, ts in tags do
  86                         if ts.length <= 1 then continue
  87                         var losers = new Array[Token]
  88                         for t1 in ts do
  89                                 for t2 in ts do
  90                                         if t1 == t2 then continue
  91                                         if retrotags[t1].length > retrotags[t2].length and retrotags[t1].has_all(retrotags[t2]) then
  92                                                 losers.add(t1)
  93                                                 break
  94                                         end
  95                                 end
  96                         end
  97                         for t in losers do
  98                                 ts.remove(t)
  99                                 retrotags[t].remove s
 100                         end
 101                 end
 102         end
 103
 104         # Initialize a new automaton for the empty language.
 105         # One state, no accept, no transition.
 106         init empty
 107         do
 108                 var state = new State
 109                 start = state
 110                 states.add state
 111         end
 112
 113         # Initialize a new automaton for the empty-string language.
 114         # One state, is accept, no transition.
 115         init epsilon
 116         do
 117                 var state = new State
 118                 start = state
 119                 accept.add state
 120                 states.add state
 121         end
 122
 123         # Initialize a new automation for the language that accepts only a single symbol.
 124         # Two state, the second is accept, one transition on `symbol`.
 125         init atom(symbol: Int)
 126         do
 127                 var s = new State
 128                 var a = new State
 129                 var sym = new TSymbol(symbol, symbol)
 130                 s.add_trans(a, sym)
 131                 start = s
 132                 accept.add a
 133                 states.add s
 134                 states.add a
 135         end
 136
 137         # Initialize a new automation for the language that accepts only a range of symbols
 138         # Two state, the second is accept, one transition for `from` to `to`
 139         init cla(first: Int, last: nullable Int)
 140         do
 141                 var s = new State
 142                 var a = new State
 143                 var sym = new TSymbol(first, last)
 144                 s.add_trans(a, sym)
 145                 start = s
 146                 accept.add a
 147                 states.add s
 148                 states.add a
 149         end
 150
 151         # Concatenate `other` to `self`.
 152         # Other is modified and invalidated.
 153         fun concat(other: Automaton)
 154         do
 155                 var s2 = other.start
 156                 for a1 in accept do
 157                         a1.add_trans(s2, null)
 158                 end
 159                 accept = other.accept
 160                 states.add_all other.states
 161         end
 162
 163         # `self` become the alternation of `self` and `other`.
 164         # `other` is modified and invalidated.
 165         fun alternate(other: Automaton)
 166         do
 167                 var s = new State
 168                 var a = new State
 169                 s.add_trans(start, null)
 170                 for a1 in accept do
 171                         a1.add_trans(a, null)
 172                 end
 173                 s.add_trans(other.start, null)
 174                 for a2 in other.accept do
 175                         a2.add_trans(a, null)
 176                         accept.add(a2)
 177                 end
 178
 179                 start = s
 180                 accept = [a]
 181
 182                 states.add s
 183                 states.add a
 184                 states.add_all other.states
 185         end
 186
 187         # Return a new automaton that recognize `self` but not `other`.
 188         # For a theoretical POV, this is the subtraction of languages.
 189         # Note: the implementation use `to_dfa` internally, so the theoretical complexity is not cheap.
 190         fun except(other: Automaton): Automaton
 191         do
 192                 var ta = new Token("1")
 193                 self.tag_accept(ta)
 194                 var tb = new Token("2")
 195                 other.tag_accept(tb)
 196
 197                 var c = new Automaton.empty
 198                 c.absorb(self)
 199                 c.absorb(other)
 200                 c = c.to_dfa
 201                 c.accept.clear
 202                 for s in c.retrotags[ta] do
 203                         if not c.tags[s].has(tb) then
 204                                 c.accept.add(s)
 205                         end
 206                 end
 207                 c.clear_tag(ta)
 208                 c.clear_tag(tb)
 209                 return c
 210         end
 211
 212         # `self` absorbs all states, transitions, tags, and acceptations of `other`.
 213         # An epsilon transition is added between `self.start` and `other.start`.
 214         fun absorb(other: Automaton)
 215         do
 216                 states.add_all other.states
 217                 start.add_trans(other.start, null)
 218                 for s, ts in other.tags do for t in ts do add_tag(s, t)
 219                 accept.add_all other.accept
 220         end
 221
 222         # Do the Kleene closure (*) on self
 223         fun close
 224         do
 225                 for a1 in accept do
 226                         a1.add_trans(start, null)
 227                         start.add_trans(a1, null)
 228                 end
 229         end
 230
 231         # Do the + on self
 232         fun plus
 233         do
 234                 for a1 in accept do
 235                         a1.add_trans(start, null)
 236                 end
 237         end
 238
 239         # Do the ? on self
 240         fun optionnal
 241         do
 242                 alternate(new Automaton.epsilon)
 243         end
 244
 245         # Remove all transitions on a given symbol
 246         fun minus_sym(symbol: TSymbol)
 247         do
 248                 var f = symbol.first
 249                 var l = symbol.last
 250                 for s in states do
 251                         for t in s.outs.to_a do
 252                                 if t.symbol == null then continue
 253
 254                                 # Check overlaps
 255                                 var tf = t.symbol.as(not null).first
 256                                 var tl = t.symbol.as(not null).last
 257                                 if l != null and tf > l then continue
 258                                 if tl != null and f > tl then continue
 259
 260                                 t.delete
 261
 262                                 # Add left and right part if non empty
 263                                 if tf < f then
 264                                         var sym = new TSymbol(tf,f-1)
 265                                         s.add_trans(t.to, sym)
 266                                 end
 267                                 if l != null then
 268                                         if tl == null then
 269                                                 var sym = new TSymbol(l+1, null)
 270                                                 s.add_trans(t.to, sym)
 271                                         else if tl > l then
 272                                                 var sym = new TSymbol(l+1, tl)
 273                                                 s.add_trans(t.to, sym)
 274                                         end
 275                                 end
 276                         end
 277                 end
 278         end
 279
 280         # Fully duplicate an automaton
 281         fun dup: Automaton
 282         do
 283                 var res = new Automaton.empty
 284                 var map = new HashMap[State, State]
 285                 map[start] = res.start
 286                 for s in states do
 287                         if s == start then continue
 288                         var s2 = new State
 289                         map[s] = s2
 290                         res.states.add(s2)
 291                 end
 292                 for s in accept do
 293                         res.accept.add map[s]
 294                 end
 295                 for s, ts in tags do for t in ts do
 296                         res.add_tag(map[s], t)
 297                 end
 298                 for s in states do
 299                         for t in s.outs do
 300                                 map[s].add_trans(map[t.to], t.symbol)
 301                         end
 302                 end
 303                 return res
 304         end
 305
 306         # Reverse an automaton in place
 307         fun reverse
 308         do
 309                 for s in states do
 310                         var tmp = s.ins
 311                         s.ins = s.outs
 312                         s.outs = tmp
 313                         for t in s.outs do
 314                                 var tmp2 = t.from
 315                                 t.from = t.to
 316                                 t.to = tmp2
 317                         end
 318                 end
 319                 var st = start
 320                 if accept.length == 1 then
 321                         start = accept.first
 322                 else
 323                         var st2 = new State
 324                         start = st2
 325                         states.add(st2)
 326
 327                         for s in accept do
 328                                 st2.add_trans(s, null)
 329                         end
 330                 end
 331                 accept.clear
 332                 accept.add(st)
 333         end
 334
 335         # Remove states (and transitions) that does not reach an accept state
 336         fun trim
 337         do
 338                 # Good states are those we want to keep
 339                 var goods = new HashSet[State]
 340                 goods.add_all(accept)
 341
 342                 var todo = accept.to_a
 343
 344                 # Propagate goodness
 345                 while not todo.is_empty do
 346                         var s = todo.pop
 347                         for t in s.ins do
 348                                 var s2 = t.from
 349                                 if goods.has(s2) then continue
 350                                 goods.add(s2)
 351                                 todo.add(s2)
 352                         end
 353                 end
 354
 355                 # What are the bad state then?
 356                 var bads = new Array[State]
 357                 for s in states do
 358                         if not goods.has(s) then bads.add(s)
 359                 end
 360
 361                 # Remove their transitions
 362                 for s in bads do
 363                         for t in s.ins.to_a do t.delete
 364                         for t in s.outs.to_a do t.delete
 365                 end
 366
 367                 # Keep only the good stuff
 368                 states.clear
 369                 states.add_all(goods)
 370         end
 371
 372         # Generate a minimal DFA
 373         # REQUIRE: self is a DFA
 374         fun to_minimal_dfa: Automaton
 375         do
 376                 assert_valid
 377
 378                 trim
 379
 380                 # Graph of known distinct states.
 381                 var distincts = new HashMap[State, Set[State]]
 382                 for s in states do
 383                         distincts[s] = new HashSet[State]
 384                 end
 385
 386                 # split accept states.
 387                 # An accept state is distinct with a non accept state.
 388                 for s1 in states do
 389                         for s2 in states do
 390                                 if distincts[s1].has(s2) then continue
 391                                 if not accept.has(s1) then continue
 392                                 if not accept.has(s2) then
 393                                         distincts[s1].add(s2)
 394                                         distincts[s2].add(s1)
 395                                         continue
 396                                 end
 397                                 if tags.get_or_null(s1) != tags.get_or_null(s2) then
 398                                         distincts[s1].add(s2)
 399                                         distincts[s2].add(s1)
 400                                         continue
 401                                 end
 402                         end
 403                 end
 404
 405                 # Fixed point algorithm.
 406                 # * Get 2 states s1 and s2 not yet distinguished.
 407                 # * Get a symbol w.
 408                 # * If s1.trans(w) and s2.trans(w) are distinguished, then
 409                 #   distinguish s1 and s2.
 410                 var changed = true
 411                 var ints = new Array[Int] # List of symbols to check
 412                 while changed do
 413                         changed = false
 414                         for s1 in states do for s2 in states do
 415                                 if distincts[s1].has(s2) then continue
 416
 417                                 # The transitions use intervals. Therefore, for the states s1 and s2,
 418                                 # we need to check only the meaningful symbols. They are the `first`
 419                                 # symbol of each interval and the first one after the interval (`last+1`).
 420                                 ints.clear
 421                                 # Check only `s1`; `s2` will be checked later when s1 and s2 are switched.
 422                                 for t in s1.outs do
 423                                         var sym = t.symbol
 424                                         assert sym != null
 425                                         ints.add sym.first
 426                                         var l = sym.last
 427                                         if l != null then ints.add l + 1
 428                                 end
 429
 430                                 # Check each symbol
 431                                 for i in ints do
 432                                         var ds1 = s1.trans(i)
 433                                         var ds2 = s2.trans(i)
 434                                         if ds1 == ds2 then continue
 435                                         if ds1 != null and ds2 != null and not distincts[ds1].has(ds2) then continue
 436                                         distincts[s1].add(s2)
 437                                         distincts[s2].add(s1)
 438                                         changed = true
 439                                         break
 440                                 end
 441                         end
 442                 end
 443
 444                 # We need to unify not-distinguished states.
 445                 # Just add an epsilon-transition and DFAize the automaton.
 446                 for s1 in states do for s2 in states do
 447                         if distincts[s1].has(s2) then continue
 448                         s1.add_trans(s2, null)
 449                 end
 450
 451                 return to_dfa
 452         end
 453
 454         # Assert that `self` is a valid automaton or abort
 455         fun assert_valid
 456         do
 457                 assert states.has(start)
 458                 assert states.has_all(accept)
 459                 for s in states do
 460                         for t in s.outs do assert states.has(t.to)
 461                         for t in s.ins do assert states.has(t.from)
 462                 end
 463                 assert states.has_all(tags.keys)
 464                 for t, ss in retrotags do
 465                         assert states.has_all(ss)
 466                 end
 467         end
 468
 469         # Produce a graphviz string from the automatom
 470         #
 471         # Set `merge_transitions = false` to generate one edge by transition (default true).
 472         fun to_dot(merge_transitions: nullable Bool): Writable
 473         do
 474                 var names = new HashMap[State, String]
 475                 var ni = 0
 476                 for s in states do
 477                         names[s] = ni.to_s
 478                         ni += 1
 479                 end
 480
 481                 var f = new Buffer
 482                 f.append("digraph g \{\n")
 483                 f.append("rankdir=LR;")
 484
 485                 var state_nb = 0
 486                 for s in states do
 487                         f.append("s{names[s]}[shape=circle")
 488                         #f.write("label=\"\",")
 489                         if accept.has(s) then
 490                                 f.append(",shape=doublecircle")
 491                         end
 492                         if tags.has_key(s) then
 493                                 f.append(",label=\"")
 494                                 for token in tags[s] do
 495                                         f.append("{token.name.escape_to_dot}\\n")
 496                                 end
 497                                 f.append("\"")
 498                         else
 499                                 f.append(",label=\"{state_nb}\"")
 500                         end
 501                         f.append("];\n")
 502                         var outs = new HashMap[State, Array[nullable TSymbol]]
 503                         for t in s.outs do
 504                                 var a
 505                                 var s2 = t.to
 506                                 var c = t.symbol
 507                                 if outs.has_key(s2) then
 508                                         a = outs[s2]
 509                                 else
 510                                         a = new Array[nullable TSymbol]
 511                                         outs[s2] = a
 512                                 end
 513                                 a.add(c)
 514                         end
 515                         for s2, a in outs do
 516                                 var labe = ""
 517                                 for c in a do
 518                                         if merge_transitions == false then labe = ""
 519                                         if not labe.is_empty then labe += "\n"
 520                                         if c == null then
 521                                                 labe += "ε"
 522                                         else
 523                                                 labe += c.to_s
 524                                         end
 525                                         if merge_transitions == false then
 526                                                 f.append("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_dot}\"];\n")
 527                                         end
 528                                 end
 529                                 if merge_transitions == null or merge_transitions == true then
 530                                         f.append("s{names[s]}->s{names[s2]} [label=\"{labe.escape_to_c}\"];\n")
 531                                 end
 532                         end
 533                         state_nb += 1
 534                 end
 535                 f.append("empty->s{names[start]}; empty[label=\"\",shape=none];\n")
 536                 f.append("\}\n")
 537                 return f
 538         end
 539
 540         # Transform a NFA to a DFA.
 541         # note: the DFA is not minimized.
 542         fun to_dfa: Automaton
 543         do
 544                 assert_valid
 545
 546                 trim
 547
 548                 var dfa = new Automaton.empty
 549                 var n2d = new ArrayMap[Set[State], State]
 550                 var seen = new ArraySet[Set[State]]
 551                 var alphabet = new HashSet[Int]
 552                 var st = eclosure([start])
 553                 var todo = [st]
 554                 n2d[st] = dfa.start
 555                 seen.add(st)
 556                 while not todo.is_empty do
 557                         var nfa_states = todo.pop
 558                         #print "* work on {nfa_states.inspect}={nfa_states} (remains {todo.length}/{seen.length})"
 559                         var dfa_state = n2d[nfa_states]
 560                         alphabet.clear
 561                         for s in nfa_states do
 562                                 # Collect important values to build the alphabet
 563                                 for t in s.outs do
 564                                         var sym = t.symbol
 565                                         if sym == null then continue
 566                                         alphabet.add(sym.first)
 567                                         var l = sym.last
 568                                         if l != null then alphabet.add(l)
 569                                 end
 570
 571                                 # Mark accept and tags
 572                                 if accept.has(s) then
 573                                         if tags.has_key(s) then
 574                                                 for t in tags[s] do
 575                                                         dfa.add_tag(dfa_state, t)
 576                                                 end
 577                                         end
 578                                         dfa.accept.add(dfa_state)
 579                                 end
 580                         end
 581
 582                         # From the important values, build a sequence of TSymbols
 583                         var a = alphabet.to_a
 584                         default_comparator.sort(a)
 585                         var tsyms = new Array[TSymbol]
 586                         var last = 0
 587                         for i in a do
 588                                 if last > 0 and last <= i-1 then
 589                                         tsyms.add(new TSymbol(last,i-1))
 590                                 end
 591                                 tsyms.add(new TSymbol(i,i))
 592                                 last = i+1
 593                         end
 594                         if last > 0 then
 595                                 tsyms.add(new TSymbol(last,null))
 596                         end
 597                         #print "Alphabet: {tsyms.join(", ")}"
 598
 599                         var lastst: nullable Transition = null
 600                         for sym in tsyms do
 601                                 var nfa_dest = eclosure(trans(nfa_states, sym.first))
 602                                 if nfa_dest.is_empty then
 603                                         lastst = null
 604                                         continue
 605                                 end
 606                                 #print "{nfa_states} -> {sym} -> {nfa_dest}"
 607                                 var dfa_dest
 608                                 if seen.has(nfa_dest) then
 609                                         #print "* reuse {nfa_dest.inspect}={nfa_dest}"
 610                                         dfa_dest = n2d[nfa_dest]
 611                                 else
 612                                         #print "* new {nfa_dest.inspect}={nfa_dest}"
 613                                         dfa_dest = new State
 614                                         dfa.states.add(dfa_dest)
 615                                         n2d[nfa_dest] = dfa_dest
 616                                         todo.add(nfa_dest)
 617                                         seen.add(nfa_dest)
 618                                 end
 619                                 if lastst != null and lastst.to == dfa_dest then
 620                                         lastst.symbol.as(not null).last = sym.last
 621                                 else
 622                                         lastst = dfa_state.add_trans(dfa_dest, sym)
 623                                 end
 624                         end
 625                 end
 626                 return dfa
 627         end
 628
 629         # Epsilon-closure on a state of states.
 630         # Used by `to_dfa`.
 631         private fun eclosure(states: Collection[State]): Set[State]
 632         do
 633                 var res = new ArraySet[State]
 634                 res.add_all(states)
 635                 var todo = states.to_a
 636                 while not todo.is_empty do
 637                         var s = todo.pop
 638                         for t in s.outs do
 639                                 if t.symbol != null then continue
 640                                 var to = t.to
 641                                 if res.has(to) then continue
 642                                 res.add(to)
 643                                 todo.add(to)
 644                         end
 645                 end
 646                 return res
 647         end
 648
 649         # Trans on a set of states.
 650         # Used by `to_dfa`.
 651         fun trans(states: Collection[State], symbol: Int): Set[State]
 652         do
 653                 var res = new ArraySet[State]
 654                 for s in states do
 655                         for t in s.outs do
 656                                 var sym = t.symbol
 657                                 if sym == null then continue
 658                                 if sym.first > symbol then continue
 659                                 var l = sym.last
 660                                 if l != null and l < symbol then continue
 661                                 var to = t.to
 662                                 if res.has(to) then continue
 663                                 res.add(to)
 664                         end
 665                 end
 666                 return res
 667         end
 668
 669         # Generate the Nit source code of the lexer.
 670         # `filepath` is the name of the output file.
 671         # `parser` is the name of the parser module (used to import the token classes).
 672         fun gen_to_nit(filepath: String, name: String, parser: nullable String)
 673         do
 674                 var gen = new DFAGenerator(filepath, name, self, parser)
 675                 gen.gen_to_nit
 676         end
 677 end
 678
 679 # Generate the Nit source code of the lexer
 680 private class DFAGenerator
 681         var filepath: String
 682         var name: String
 683         var automaton: Automaton
 684         var parser: nullable String
 685
 686         var out: Writer is noinit
 687
 688         init do
 689                 self.out = new FileWriter.open(filepath)
 690         end
 691
 692         fun add(s: String) do out.write(s)
 693
 694         fun gen_to_nit
 695         do
 696                 var names = new HashMap[State, String]
 697                 var i = 0
 698                 for s in automaton.states do
 699                         names[s] = i.to_s
 700                         i += 1
 701                 end
 702
 703                 add "# Lexer generated by nitcc for the grammar {name}\n"
 704                 add "module {name}_lexer is generated, no_warning \"missing-doc\"\n"
 705                 add("import nitcc_runtime\n")
 706
 707                 var p = parser
 708                 if p != null then add("import {p}\n")
 709
 710                 add("class Lexer_{name}\n")
 711                 add("\tsuper Lexer\n")
 712                 add("\tredef fun start_state do return dfastate_{names[automaton.start]}\n")
 713                 add("end\n")
 714
 715                 for s in automaton.states do
 716                         var n = names[s]
 717                         add("private fun dfastate_{n}: DFAState{n} do return once new DFAState{n}\n")
 718                 end
 719
 720                 add("class MyNToken\n")
 721                 add("\tsuper NToken\n")
 722                 add("end\n")
 723
 724                 for s in automaton.states do
 725                         var  n = names[s]
 726                         add("private class DFAState{n}\n")
 727                         add("\tsuper DFAState\n")
 728                         if automaton.accept.has(s) then
 729                                 var token
 730                                 if automaton.tags.has_key(s) then
 731                                         token = automaton.tags[s].first
 732                                 else
 733                                         token = null
 734                                 end
 735                                 add("\tredef fun is_accept do return true\n")
 736                                 var is_ignored = false
 737                                 if token != null and token.name == "Ignored" then
 738                                         is_ignored = true
 739                                         add("\tredef fun is_ignored do return true\n")
 740                                 end
 741                                 add("\tredef fun make_token(position, source) do\n")
 742                                 if is_ignored then
 743                                         add("\t\treturn null\n")
 744                                 else
 745                                         if token == null then
 746                                                 add("\t\tvar t = new MyNToken\n")
 747                                                 add("\t\tt.text = position.extract(source)\n")
 748                                         else
 749                                                 add("\t\tvar t = new {token.cname}\n")
 750                                                 var ttext = token.text
 751                                                 if ttext == null then
 752                                                         add("\t\tt.text = position.extract(source)\n")
 753                                                 else
 754                                                         add("\t\tt.text = \"{ttext.escape_to_nit}\"\n")
 755                                                 end
 756                                         end
 757                                         add("\t\tt.position = position\n")
 758                                         add("\t\treturn t\n")
 759                                 end
 760                                 add("\tend\n")
 761                         end
 762                         var trans = new ArrayMap[TSymbol, State]
 763                         for t in s.outs do
 764                                 var sym = t.symbol
 765                                 assert sym != null
 766                                 trans[sym] = t.to
 767                         end
 768                         if trans.is_empty then
 769                                 # Do nothing, inherit the trans
 770                         else
 771                                 add("\tredef fun trans(char) do\n")
 772
 773                                 # Collect the sequence of tests in the dispatch sequence
 774                                 # The point here is that for each transition, there is a first and a last
 775                                 # So holes have to be identified
 776                                 var dispatch = new HashMap[Int, nullable State]
 777                                 var haslast: nullable State = null
 778
 779                                 var last = -1
 780                                 for sym, next in trans do
 781                                         assert haslast == null
 782                                         assert sym.first > last
 783                                         if sym.first > last + 1 then
 784                                                 dispatch[sym.first-1] = null
 785                                         end
 786                                         var l = sym.last
 787                                         if l == null then
 788                                                 haslast = next
 789                                         else
 790                                                 dispatch[l] = next
 791                                                 last = l
 792                                         end
 793                                 end
 794
 795                                 if dispatch.is_empty and haslast != null then
 796                                         # Only one transition that accepts everything (quite rare)
 797                                 else
 798                                         # We need to check
 799                                         add("\t\tvar c = char.code_point\n")
 800                                 end
 801
 802                                 # Generate a sequence of `if` for the dispatch
 803                                 if haslast != null and last >= 0 then
 804                                         # Special case: handle up-bound first if not an error
 805                                         add("\t\tif c > {last} then return dfastate_{names[haslast]}\n")
 806                                         # previous become the new last case
 807                                         haslast = dispatch[last]
 808                                         dispatch.keys.remove(last)
 809                                 end
 810                                 for c, next in dispatch do
 811                                         if next == null then
 812                                                 add("\t\tif c <= {c} then return null\n")
 813                                         else
 814                                                 add("\t\tif c <= {c} then return dfastate_{names[next]}\n")
 815                                         end
 816                                 end
 817                                 if haslast == null then
 818                                         add("\t\treturn null\n")
 819                                 else
 820                                         add("\t\treturn dfastate_{names[haslast]}\n")
 821                                 end
 822
 823                                 add("\tend\n")
 824                         end
 825                         add("end\n")
 826                 end
 827
 828                 self.out.close
 829         end
 830 end
 831
 832 redef class Token
 833         # The associated text (if any, ie defined in the parser part)
 834         var text: nullable String is noautoinit, writable
 835 end
 836
 837 # A state in a finite automaton
 838 class State
 839         # Outgoing transitions
 840         var outs = new Array[Transition]
 841
 842         # Ingoing transitions
 843         var ins = new Array[Transition]
 844
 845         # Add a transitions to `to` on `symbol` (null means epsilon)
 846         fun add_trans(to: State, symbol: nullable TSymbol): Transition
 847         do
 848                 var t = new Transition(self, to, symbol)
 849                 outs.add(t)
 850                 to.ins.add(t)
 851                 return t
 852         end
 853
 854         # Get the first state following the transition `i`.
 855         # Null if no transition for `i`.
 856         fun trans(i: Int): nullable State
 857         do
 858                 for t in outs do
 859                         var sym = t.symbol
 860                         assert sym != null
 861                         var f = sym.first
 862                         var l = sym.last
 863                         if i < f then continue
 864                         if l != null and i > l then continue
 865                         return t.to
 866                 end
 867                 return null
 868         end
 869 end
 870
 871 # A range of symbols on a transition
 872 class TSymbol
 873         # The first symbol in the range
 874         var first: Int
 875
 876         # The last symbol if any.
 877         #
 878         # `null` means infinity.
 879         var last: nullable Int
 880
 881         redef fun to_s
 882         do
 883                 var res
 884                 var f = first
 885                 if f <= 32 then
 886                         res = "#{f}"
 887                 else
 888                         res = f.code_point.to_s
 889                 end
 890                 var l = last
 891                 if f == l then return res
 892                 res += " .. "
 893                 if l == null then return res
 894                 if l <= 32 or l >= 127 then return res + "#{l}"
 895                 return res + l.code_point.to_s
 896         end
 897 end
 898
 899 # A transition in a finite automaton
 900 class Transition
 901         # The source state
 902         var from: State
 903         # The destination state
 904         var to: State
 905         # The symbol on the transition (null means epsilon)
 906         var symbol: nullable TSymbol
 907
 908         # Remove the transition from the automaton.
 909         # Detach from `from` and `to`.
 910         fun delete
 911         do
 912                 from.outs.remove(self)
 913                 to.ins.remove(self)
 914         end
 915 end