nitcc: introduce nitcc
[nit.git] / contrib / nitcc / nitcc_lexer0.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Ad-hoc hand-writen lexer for nitcc
16 # This avoid to commit (and relyon ) a generated lexer
17 #
18 module nitcc_lexer0
19
20 # Required for the tokens definitions
21 import nitcc_parser
22
23 # Hand-writen lexer of nitcc
24 # Used only for the boostrap of the tool.
25 class MyLexer
26 var text: String
27
28 var iter: Iterator[Char] = "".iterator
29 var pos = 0
30
31 var tokens = new Array[NToken]
32
33 fun lex: Array[NToken]
34 do
35 iter = text.iterator
36 while iter.is_ok do
37 trim
38 if not iter.is_ok then break
39 var c = iter.item
40 iter.next
41 pos += 1
42 if c == '*' then
43 tokens.add new Nstar
44 else if c == '?' then
45 tokens.add new Nques
46 else if c == '+' then
47 tokens.add new Nplus
48 else if c == '-' then
49 if iter.item == '>' then
50 iter.next
51 tokens.add new Narrow
52 else
53 tokens.add new Nminus
54 end
55 else if c == '(' then
56 tokens.add new Nopar
57 else if c == ')' then
58 tokens.add new Ncpar
59 else if c == '{' then
60 tokens.add new Nocur
61 else if c == '}' then
62 tokens.add new Nccur
63 else if c == '|' then
64 tokens.add new Npipe
65 else if c == ':' then
66 tokens.add new Ncolo
67 else if c == ';' then
68 tokens.add new Nsemi
69 else if c == '.' then
70 tokens.add new Ndot
71 else if c == '=' then
72 tokens.add new Neq
73 else if c == '\'' then
74 str
75 else if c >= 'a' and c <= 'z' then
76 id(c)
77 else if c >= 'A' and c <= 'Z' then
78 kw(c)
79 else if c == '/' and iter.is_ok and iter.item == '/' then
80 while iter.is_ok and iter.item != '\n' do iter.next
81 else
82 error(c)
83 end
84 end
85 tokens.add new NEof
86 return tokens
87 end
88
89 fun error(c: Char)
90 do
91 print "pos {pos}: Lexer error on '{c}'."
92 abort
93 end
94
95 fun str
96 do
97 var b = new Buffer
98 b.add('\'')
99 while iter.is_ok do
100 var c = iter.item
101 iter.next
102 if c == '\\' then
103 if not iter.is_ok then
104 error(c)
105 end
106 b.add(c)
107 c = iter.item
108 iter.next
109 else if c == '\'' then
110 b.add(c)
111 var token = new Nstr
112 token.text = b.to_s
113 tokens.add token
114 return
115 end
116 b.add c
117 end
118 error('\n')
119 abort
120 end
121
122 fun id(c: Char)
123 do
124 var b = new Buffer
125 b.add c
126 while iter.is_ok do
127 c = iter.item
128 if c != '_' and (c<'a' or c >'z') and (c<'0' or c>'9') then
129 break
130 end
131 b.add c
132 iter.next
133 end
134 var token = new Nid
135 token.text = b.to_s
136 tokens.add token
137 end
138
139 fun kw(c: Char)
140 do
141 var b = new Buffer
142 b.add c
143 while iter.is_ok do
144 c = iter.item
145 if c != '_' and (c<'a' or c >'z') and (c<'0' or c>'9') then
146 break
147 end
148 b.add c
149 iter.next
150 end
151 var token = new Nkw
152 token.text = b.to_s
153 tokens.add token
154 end
155
156 fun trim
157 do
158 while iter.is_ok and iter.item <= ' ' do
159 iter.next
160 pos += 1
161 end
162 end
163 end