contrib/nitcc/src/nitcc_lexer0.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Ad-hoc hand-written lexer for nitcc
  16 # This avoid to commit (and rely on) a generated lexer
  17 module nitcc_lexer0
  18
  19 # Required for the tokens definitions
  20 import nitcc_parser
  21
  22 # Hand-written lexer of nitcc.
  23 # Used only for the bootstrap of the tool.
  24 class Lexer_nitcc
  25         # The text to tokenize
  26         var text: String
  27
  28         # The iterator on text
  29         private var iter: Iterator[Char] is noinit
  30
  31         # The current position
  32         var pos = 0
  33
  34         # The tokens currently produced
  35         private var tokens = new Array[NToken]
  36
  37         # Tokenize and returns the tokens
  38         fun lex: Array[NToken]
  39         do
  40                 iter = text.chars.iterator
  41                 while iter.is_ok do
  42                         trim
  43                         if not iter.is_ok then break
  44                         var c = iter.item
  45                         iter.next
  46                         pos += 1
  47                         if c == '*' then
  48                                 tokens.add new Nstar
  49                         else if c == '?' then
  50                                 tokens.add new Nques
  51                         else if c == '+' then
  52                                 tokens.add new Nplus
  53                         else if c == '-' then
  54                                 if iter.item == '>' then
  55                                         iter.next
  56                                         tokens.add new Narrow
  57                                 else
  58                                         tokens.add new Nminus
  59                                 end
  60                         else if c == '(' then
  61                                 tokens.add new Nopar
  62                         else if c == ')' then
  63                                 tokens.add new Ncpar
  64                         else if c == '{' then
  65                                 tokens.add new Nocur
  66                         else if c == '}' then
  67                                 tokens.add new Nccur
  68                         else if c == '|' then
  69                                 tokens.add new Npipe
  70                         else if c == ',' then
  71                                 tokens.add new Ncomma
  72                         else if c == ':' then
  73                                 tokens.add new Ncolo
  74                         else if c == ';' then
  75                                 tokens.add new Nsemi
  76                         else if c == '.' then
  77                                 tokens.add new Ndot
  78                         else if c == '=' then
  79                                 tokens.add new Neq
  80                         else if c == '\'' then
  81                                 str
  82                         else if c >= 'a' and c <= 'z' then
  83                                 id(c)
  84                         else if c >= 'A' and c <= 'Z' then
  85                                 kw(c)
  86                         else if c == '/' and iter.is_ok and iter.item == '/' then
  87                                 while iter.is_ok and iter.item != '\n' do iter.next
  88                         else
  89                                 error(c)
  90                         end
  91                 end
  92                 tokens.add new NEof
  93                 return tokens
  94         end
  95
  96         private fun error(c: Char)
  97         do
  98                 print "pos {pos}: Lexer error on '{c}'."
  99                 abort
 100         end
 101
 102         private fun str
 103         do
 104                 var b = new FlatBuffer
 105                 b.add('\'')
 106                 while iter.is_ok do
 107                         var c = iter.item
 108                         iter.next
 109                         if c == '\\' then
 110                                 if not iter.is_ok then
 111                                         error(c)
 112                                 end
 113                                 b.add(c)
 114                                 c = iter.item
 115                                 iter.next
 116                         else if c == '\'' then
 117                                 b.add(c)
 118                                 var token = new Nstr
 119                                 token.text = b.to_s
 120                                 tokens.add token
 121                                 return
 122                         end
 123                         b.add c
 124                 end
 125                 error('\n')
 126                 abort
 127         end
 128
 129         private fun id(c: Char)
 130         do
 131                 var b = new FlatBuffer
 132                 b.add c
 133                 while iter.is_ok do
 134                         c = iter.item
 135                         if c != '_' and (c<'a' or c >'z') and (c<'0' or c>'9') then
 136                                 break
 137                         end
 138                         b.add c
 139                         iter.next
 140                 end
 141                 var token = new Nid
 142                 token.text = b.to_s
 143                 tokens.add token
 144         end
 145
 146         private fun kw(c: Char)
 147         do
 148                 var b = new FlatBuffer
 149                 b.add c
 150                 while iter.is_ok do
 151                         c = iter.item
 152                         if c != '_' and (c<'a' or c >'z') and (c<'0' or c>'9') then
 153                                 break
 154                         end
 155                         b.add c
 156                         iter.next
 157                 end
 158                 var token = new Nkw
 159                 token.text = b.to_s
 160                 tokens.add token
 161         end
 162
 163         private fun trim
 164         do
 165                 while iter.is_ok and iter.item <= ' ' do
 166                         iter.next
 167                         pos += 1
 168                 end
 169         end
 170 end