parser: remove warning about useless cast
[nit.git] / src / parser / xss / lexer.xss
1 $ // This file is part of NIT ( http://www.nitlanguage.org ).
2 $ //
3 $ // Copyright 2008 Jean Privat <jean@pryen.org>
4 $ // Based on algorithms developped for ( http://www.sablecc.org/ ).
5 $ //
6 $ // Licensed under the Apache License, Version 2.0 (the "License");
7 $ // you may not use this file except in compliance with the License.
8 $ // You may obtain a copy of the License at
9 $ //
10 $ //     http://www.apache.org/licenses/LICENSE-2.0
11 $ //
12 $ // Unless required by applicable law or agreed to in writing, software
13 $ // distributed under the License is distributed on an "AS IS" BASIS,
14 $ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 $ // See the License for the specific language governing permissions and
16 $ // limitations under the License.
17
18 $ template make_lexer()
19
20 # The lexer extract NIT tokens from an input stream.
21 # It is better user with the Parser
22 class Lexer
23 special TablesCapable
24         # Last peeked token
25         var _token: nullable Token
26
27         # Lexer current state
28         var _state: Int = 0
29
30         # Name of the stream (as given to tokens)
31         readable var _filename: String
32
33         # Input stream where character are read
34         var _stream: IStream
35
36         # Pushback buffer to store unread character
37         var _stream_buf: Buffer
38
39         # Number of character stored in the pushback buffer
40         var _stream_pos: Int
41
42         # Current line number in the input stream
43         var _line: Int = 0
44
45         # Current column in the input stream
46         var _pos: Int = 0
47
48         # Was the last character a cariage-return?
49         var _cr: Bool = false
50
51         # If the end of stream?
52         var _eof: Bool = false
53
54         # Current working text read from the input stream
55         var _text: Buffer
56
57 $ foreach {lexer_data/state}
58         # Constante state values
59         private fun state_${translate(@name,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}: Int do return @id end
60 $ end foreach
61
62         # Create a new lexer for a stream (and a name)
63         init(stream: IStream, fname: String)
64         do
65                 _filename = fname
66                 _text = new Buffer
67                 _stream = stream
68                 _stream_pos = -1
69                 _stream_buf = new Buffer
70         end
71
72         # Give the next token (but do not consume it)
73         fun peek: Token
74         do
75                 while _token == null do
76                         _token = get_token
77                 end
78                 return _token.as(not null)
79         end
80
81         # Give and consume the next token
82         fun next: Token
83         do
84                 var result = _token
85                 while result == null do
86                         result = get_token
87                 end
88                 _token = null
89                 return result
90         end
91
92         # Get a token, or null if it is discarded
93         private fun get_token: nullable Token
94         do
95                 var dfa_state = 0
96
97                 var start_pos = _pos
98                 var start_line = _line
99
100                 var accept_state = -1
101                 var accept_token = -1
102                 var accept_length = -1
103                 var accept_pos = -1
104                 var accept_line = -1
105
106                 var text = _text
107                 text.clear
108
109                 loop
110                         var c = get_char
111
112                         if c != -1 then
113                                 var cr = _cr
114                                 var line = _line
115                                 var pos = _pos
116                                 if c == 10 then
117                                         if cr then
118                                                 cr = false
119                                         else
120                                                 line = line + 1
121                                                 pos = 0
122                                         end
123                                 else if c == 13 then
124                                         line = line + 1
125                                         pos = 0
126                                         cr = true
127                                 else
128                                         pos = pos + 1
129                                         cr = false
130                                 end
131
132                                 text.add(c.ascii)
133
134                                 loop
135                                         var old_state = dfa_state
136                                         if dfa_state < -1 then
137                                                 old_state = -2 - dfa_state
138                                         end
139
140                                         dfa_state = -1
141
142                                         var low = 0
143                                         var high = lexer_goto(old_state, 0) - 1
144
145                                         if high >= 0 then
146                                                 while low <= high do
147                                                         var middle = (low + high) / 2
148                                                         var offset = middle * 3 + 1 # +1 because length is at 0
149
150                                                         if c < lexer_goto(old_state, offset) then
151                                                                 high = middle - 1
152                                                         else if c > lexer_goto(old_state, offset+1) then
153                                                                 low = middle + 1
154                                                         else
155                                                                 dfa_state = lexer_goto(old_state, offset+2)
156                                                                 break
157                                                         end
158                                                 end
159                                         end
160                                         if dfa_state > -2 then break
161                                 end
162
163                                 _cr = cr
164                                 _line = line
165                                 _pos = pos
166                         else
167                                 dfa_state = -1
168                         end
169
170                         if dfa_state >= 0 then
171                                 var tok = lexer_accept(dfa_state)
172                                 if tok != -1 then
173                                         accept_state = dfa_state
174                                         accept_token = tok
175                                         accept_length = text.length
176                                         accept_pos = _pos
177                                         accept_line = _line
178                                 end
179                         else
180                                 if accept_state != -1 then
181                                         var location = new Location(_filename, start_line + 1, accept_line + 1, start_pos + 1, accept_pos)
182                                         _pos = accept_pos
183                                         _line = accept_line
184                                         push_back(accept_length)
185 $ foreach {//token}
186                                         if accept_token == ${position()-1} then
187 $    if {count(transition[@from!=@to])!=0}
188                                                 var state_id = _state
189 $        foreach transition in {transition[@from!=@to]}
190                                                 if state_id == ${/parser/lexer_data/state[@name=$transition/@from]/@id} then
191                                                         _state = state_${translate(@to,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
192                                                 end
193 $        end
194 $    end if
195 $    if {@parser_index}
196 $        if {not(@text)}
197                                                 var token_text = text.substring(0, accept_length)
198                                                 return new @ename.init_tk(token_text, location)
199 $        else
200                                                 return new @ename.init_tk(location)
201 $        end
202 $    else
203                                                 return null
204 $    end
205                                         end
206 $ end foreach
207                                 else
208                                         var location = new Location(_filename, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1)
209                                         if text.length > 0 then
210                                                 var token = new PError.init_error("Syntax error: unknown token {text}.", location)
211                                                 return token
212                                         else
213                                                 var token = new EOF(location)
214                                                 return token
215                                         end
216                                 end
217                         end
218                 end
219         end
220
221         # Read the next character.
222         # The character is read from the stream of from the pushback buffer.
223         private fun get_char: Int
224         do
225                 if _eof then
226                         return -1
227                 end
228
229                 var result: Int
230
231                 var sp = _stream_pos
232                 if sp >= 0 then
233                         var res = _stream_buf[_stream_pos]
234                         _stream_pos = sp - 1
235                         result = res.ascii
236                 else
237                         result = _stream.read_char
238                 end
239
240                 if result == -1 then
241                         _eof = true
242                 end
243
244                 return result
245         end
246
247         # Unread some characters.
248         # Unread characters are stored in the pushback buffer.
249         private fun push_back(accept_length: Int)
250         do
251                 var length = _text.length
252                 var i = length - 1
253                 while i >= accept_length do
254                         _eof = false
255                         _stream_pos = _stream_pos + 1
256                         _stream_buf[_stream_pos] = _text[i]
257                         i = i - 1
258                 end
259         end
260 end
261
262 $ end template
263
264
265
266 $ template make_lexer_table()
267 $ foreach {lexer_data/goto_table/state}
268 $     foreach {row}
269 $         if {count(goto)!=0}
270 static const int lexer_goto_row${position()}[] = {
271         ${count(goto)},
272 $             foreach {goto}
273         @low, @high, @state[-sep ','-]
274 $             end foreach
275 };
276 $         end
277 $     end foreach
278 static const int lexer_goto_row_null[] = {0};
279 const int* const lexer_goto_table[] = {
280 $     foreach {row}
281 $         if {count(goto)!=0}
282         lexer_goto_row${position()}[-sep ','-]
283 $         else
284         lexer_goto_row_null[-sep ','-]
285 $         end
286 $     end foreach
287 };
288 $ end foreach
289
290 $ foreach {lexer_data/accept_table/state}
291 const int lexer_accept_table[] = {
292         [-foreach {i}-]${.}[-sep ','-][-end foreach-]
293 };
294 $ end foreach
295
296 $ end template