parser: display colored lines with error messages
[nit.git] / src / parser / xss / lexer.xss
1 $ // This file is part of NIT ( http://www.nitlanguage.org ).
2 $ //
3 $ // Copyright 2008 Jean Privat <jean@pryen.org>
4 $ // Based on algorithms developped for ( http://www.sablecc.org/ ).
5 $ //
6 $ // Licensed under the Apache License, Version 2.0 (the "License");
7 $ // you may not use this file except in compliance with the License.
8 $ // You may obtain a copy of the License at
9 $ //
10 $ //     http://www.apache.org/licenses/LICENSE-2.0
11 $ //
12 $ // Unless required by applicable law or agreed to in writing, software
13 $ // distributed under the License is distributed on an "AS IS" BASIS,
14 $ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 $ // See the License for the specific language governing permissions and
16 $ // limitations under the License.
17
18 $ template make_lexer()
19
20 # The lexer extract NIT tokens from an input stream.
21 # It is better user with the Parser
22 class Lexer
23         super TablesCapable
24         # Last peeked token
25         var _token: nullable Token
26
27         # Lexer current state
28         var _state: Int = 0
29
30         # The source file
31         readable var _file: SourceFile
32
33         # Current character in the stream
34         var _stream_pos: Int = 0
35
36         # Current line number in the input stream
37         var _line: Int = 0
38
39         # Current column in the input stream
40         var _pos: Int = 0
41
42         # Was the last character a cariage-return?
43         var _cr: Bool = false
44
45 $ foreach {lexer_data/state}
46         # Constante state values
47         private fun state_${translate(@name,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}: Int do return @id end
48 $ end foreach
49
50         # Create a new lexer for a stream (and a name)
51         init(file: SourceFile)
52         do
53                 _file = file
54         end
55
56         # Give the next token (but do not consume it)
57         fun peek: Token
58         do
59                 while _token == null do
60                         _token = get_token
61                 end
62                 return _token.as(not null)
63         end
64
65         # Give and consume the next token
66         fun next: Token
67         do
68                 var result = _token
69                 while result == null do
70                         result = get_token
71                 end
72                 _token = null
73                 return result
74         end
75
76         # Get a token, or null if it is discarded
77         private fun get_token: nullable Token
78         do
79                 var dfa_state = 0
80
81                 var sp = _stream_pos
82                 var start_stream_pos = sp
83                 var start_pos = _pos
84                 var start_line = _line
85                 var string = _file.string
86                 var string_len = string.length
87
88                 var accept_state = -1
89                 var accept_token = -1
90                 var accept_length = -1
91                 var accept_pos = -1
92                 var accept_line = -1
93
94                 loop
95                         if sp >= string_len then
96                                 dfa_state = -1
97                         else
98                                 var c = string[sp].ascii
99                                 sp += 1
100
101                                 var cr = _cr
102                                 var line = _line
103                                 var pos = _pos
104                                 if c == 10 then
105                                         if cr then
106                                                 cr = false
107                                                 _file.line_starts[line] = sp
108                                         else
109                                                 line = line + 1
110                                                 pos = 0
111                                                 _file.line_starts[line] = sp
112                                         end
113                                 else if c == 13 then
114                                         line = line + 1
115                                         pos = 0
116                                         cr = true
117                                         _file.line_starts[line] = sp
118                                 else
119                                         pos = pos + 1
120                                         cr = false
121                                 end
122
123                                 loop
124                                         var old_state = dfa_state
125                                         if dfa_state < -1 then
126                                                 old_state = -2 - dfa_state
127                                         end
128
129                                         dfa_state = -1
130
131                                         var low = 0
132                                         var high = lexer_goto(old_state, 0) - 1
133
134                                         if high >= 0 then
135                                                 while low <= high do
136                                                         var middle = (low + high) / 2
137                                                         var offset = middle * 3 + 1 # +1 because length is at 0
138
139                                                         if c < lexer_goto(old_state, offset) then
140                                                                 high = middle - 1
141                                                         else if c > lexer_goto(old_state, offset+1) then
142                                                                 low = middle + 1
143                                                         else
144                                                                 dfa_state = lexer_goto(old_state, offset+2)
145                                                                 break
146                                                         end
147                                                 end
148                                         end
149                                         if dfa_state > -2 then break
150                                 end
151
152                                 _cr = cr
153                                 _line = line
154                                 _pos = pos
155                         end
156
157                         if dfa_state >= 0 then
158                                 var tok = lexer_accept(dfa_state)
159                                 if tok != -1 then
160                                         accept_state = dfa_state
161                                         accept_token = tok
162                                         accept_length = sp - start_stream_pos
163                                         accept_pos = _pos
164                                         accept_line = _line
165                                 end
166                         else
167                                 if accept_state != -1 then
168                                         var location = new Location(_file, start_line + 1, accept_line + 1, start_pos + 1, accept_pos)
169                                         _pos = accept_pos
170                                         _line = accept_line
171                                         _stream_pos = start_stream_pos + accept_length
172 $ foreach {//token}
173                                         if accept_token == ${position()-1} then
174 $    if {count(transition[@from!=@to])!=0}
175                                                 var state_id = _state
176 $        foreach transition in {transition[@from!=@to]}
177                                                 if state_id == ${/parser/lexer_data/state[@name=$transition/@from]/@id} then
178                                                         _state = state_${translate(@to,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
179                                                 end
180 $        end
181 $    end if
182 $    if {@parser_index}
183 $        if {not(@text)}
184                                                 var token_text = string.substring(start_stream_pos, accept_length)
185                                                 return new @ename.init_tk(token_text, location)
186 $        else
187                                                 return new @ename.init_tk(location)
188 $        end
189 $    else
190                                                 return null
191 $    end
192                                         end
193 $ end foreach
194                                 else
195                                         _stream_pos = sp
196                                         var location = new Location(_file, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1)
197                                         if sp > start_stream_pos then
198                                                 var text = string.substring(start_stream_pos, sp-start_stream_pos)
199                                                 var token = new PError.init_error("Syntax error: unknown token {text}.", location)
200                                                 return token
201                                         else
202                                                 var token = new EOF(location)
203                                                 return token
204                                         end
205                                 end
206                         end
207                 end
208         end
209 end
210
211 $ end template
212
213
214
215 $ template make_lexer_table()
216 $ foreach {lexer_data/goto_table/state}
217 $     foreach {row}
218 $         if {count(goto)!=0}
219 static const int lexer_goto_row${position()}[] = {
220         ${count(goto)},
221 $             foreach {goto}
222         @low, @high, @state[-sep ','-]
223 $             end foreach
224 };
225 $         end
226 $     end foreach
227 static const int lexer_goto_row_null[] = {0};
228 const int* const lexer_goto_table[] = {
229 $     foreach {row}
230 $         if {count(goto)!=0}
231         lexer_goto_row${position()}[-sep ','-]
232 $         else
233         lexer_goto_row_null[-sep ','-]
234 $         end
235 $     end foreach
236 };
237 $ end foreach
238
239 $ foreach {lexer_data/accept_table/state}
240 const int lexer_accept_table[] = {
241         [-foreach {i}-]${.}[-sep ','-][-end foreach-]
242 };
243 $ end foreach
244
245 $ end template