6a4d1d08abbab32ee2b8a1ae9b61987f1934f561
[nit.git] / contrib / pep8analysis / src / parser / xss / lexer.xss
1 $ // This file is part of NIT ( http://www.nitlanguage.org ).
2 $ //
3 $ // Copyright 2008 Jean Privat <jean@pryen.org>
4 $ // Based on algorithms developped for ( http://www.sablecc.org/ ).
5 $ //
6 $ // Licensed under the Apache License, Version 2.0 (the "License");
7 $ // you may not use this file except in compliance with the License.
8 $ // You may obtain a copy of the License at
9 $ //
10 $ //     http://www.apache.org/licenses/LICENSE-2.0
11 $ //
12 $ // Unless required by applicable law or agreed to in writing, software
13 $ // distributed under the License is distributed on an "AS IS" BASIS,
14 $ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 $ // See the License for the specific language governing permissions and
16 $ // limitations under the License.
17
18 $ template make_lexer()
19
20 # The lexer extract NIT tokens from an input stream.
21 # It is better user with the Parser
22 class Lexer
23         super TablesCapable
24         # Last peeked token
25         var _token: nullable Token
26
27         # Lexer current state
28         var _state: Int = 0
29
30         # The source file
31         readable var _file: SourceFile
32
33         # Current character in the stream
34         var _stream_pos: Int = 0
35
36         # Current line number in the input stream
37         var _line: Int = 0
38
39         # Current column in the input stream
40         var _pos: Int = 0
41
42         # Was the last character a cariage-return?
43         var _cr: Bool = false
44
45 $ foreach {lexer_data/state}
46         # Constante state values
47         private fun state_${translate(@name,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}: Int do return @id end
48 $ end foreach
49
50         # Create a new lexer for a stream (and a name)
51         init(file: SourceFile)
52         do
53                 _file = file
54         end
55
56         # The last peeked token to chain them
57         private var last_token: nullable Token = null
58
59         # Give the next token (but do not consume it)
60         fun peek: Token
61         do
62                 var t = _token
63                 if t != null then return t
64
65                 t = get_token
66                 while t == null do t = get_token
67
68                 var l = last_token
69                 if l != null then
70                         l.next_token = t
71                         t.prev_token = l
72                 end
73
74                 last_token = t
75                 _token = t
76                 return t
77         end
78
79         # Give and consume the next token
80         fun next: Token
81         do
82                 var result = peek
83                 _token = null
84                 return result
85         end
86
87         # Primitive method to return a token, or return null if it is discarded
88         # Is used to implement `peek` and `next`
89         protected fun get_token: nullable Token
90         do
91                 var dfa_state = 0
92
93                 var sp = _stream_pos
94                 var start_stream_pos = sp
95                 var start_pos = _pos
96                 var start_line = _line
97                 var string = _file.string
98                 var string_len = string.length
99
100                 var accept_state = -1
101                 var accept_token = -1
102                 var accept_length = -1
103                 var accept_pos = -1
104                 var accept_line = -1
105
106                 loop
107                         if sp >= string_len then
108                                 dfa_state = -1
109                         else
110                                 var c = string[sp].ascii
111                                 sp += 1
112
113                                 var cr = _cr
114                                 var line = _line
115                                 var pos = _pos
116                                 if c == 10 then
117                                         if cr then
118                                                 cr = false
119                                                 _file.line_starts[line] = sp
120                                         else
121                                                 line = line + 1
122                                                 pos = 0
123                                                 _file.line_starts[line] = sp
124                                         end
125                                 else if c == 13 then
126                                         line = line + 1
127                                         pos = 0
128                                         cr = true
129                                         _file.line_starts[line] = sp
130                                 else
131                                         pos = pos + 1
132                                         cr = false
133                                 end
134
135                                 loop
136                                         var old_state = dfa_state
137                                         if dfa_state < -1 then
138                                                 old_state = -2 - dfa_state
139                                         end
140
141                                         dfa_state = -1
142
143                                         var low = 0
144                                         var high = lexer_goto(old_state, 0) - 1
145
146                                         if high >= 0 then
147                                                 while low <= high do
148                                                         var middle = (low + high) / 2
149                                                         var offset = middle * 3 + 1 # +1 because length is at 0
150
151                                                         if c < lexer_goto(old_state, offset) then
152                                                                 high = middle - 1
153                                                         else if c > lexer_goto(old_state, offset+1) then
154                                                                 low = middle + 1
155                                                         else
156                                                                 dfa_state = lexer_goto(old_state, offset+2)
157                                                                 break
158                                                         end
159                                                 end
160                                         end
161                                         if dfa_state > -2 then break
162                                 end
163
164                                 _cr = cr
165                                 _line = line
166                                 _pos = pos
167                         end
168
169                         if dfa_state >= 0 then
170                                 var tok = lexer_accept(dfa_state)
171                                 if tok != -1 then
172                                         accept_state = dfa_state
173                                         accept_token = tok
174                                         accept_length = sp - start_stream_pos
175                                         accept_pos = _pos
176                                         accept_line = _line
177                                 end
178                         else
179                                 if accept_state != -1 then
180                                         var location = new Location(_file, start_line + 1, accept_line + 1, start_pos + 1, accept_pos)
181                                         _pos = accept_pos
182                                         _line = accept_line
183                                         _stream_pos = start_stream_pos + accept_length
184 $ foreach {//token}
185                                         if accept_token == ${position()-1} then
186 $    if {count(transition[@from!=@to])!=0}
187                                                 var state_id = _state
188 $        foreach transition in {transition[@from!=@to]}
189                                                 if state_id == ${/parser/lexer_data/state[@name=$transition/@from]/@id} then
190                                                         _state = state_${translate(@to,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")}
191                                                 end
192 $        end
193 $    end if
194 $    if {@parser_index}
195                                                 return new @ename.init_tk(location)
196 $    else
197                                                 return null
198 $    end
199                                         end
200 $ end foreach
201                                 else
202                                         _stream_pos = sp
203                                         var location = new Location(_file, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1)
204                                         if sp > start_stream_pos then
205                                                 var text = string.substring(start_stream_pos, sp-start_stream_pos)
206                                                 var token = new PLexerError.init_lexer_error("Syntax error: unknown token {text}.", location, text)
207                                                 return token
208                                         else
209                                                 var token = new EOF.init_tk(location)
210                                                 return token
211                                         end
212                                 end
213                         end
214                 end
215         end
216 end
217
218 $ end template
219
220
221
222 $ template make_lexer_table()
223 $ foreach {lexer_data/goto_table/state}
224 $     foreach {row}
225 $         if {count(goto)!=0}
226 static const int lexer_goto_row${position()}[] = {
227         ${count(goto)},
228 $             foreach {goto}
229         @low, @high, @state[-sep ','-]
230 $             end foreach
231 };
232 $         end
233 $     end foreach
234 static const int lexer_goto_row_null[] = {0};
235 const int* const lexer_goto_table[] = {
236 $     foreach {row}
237 $         if {count(goto)!=0}
238         lexer_goto_row${position()}[-sep ','-]
239 $         else
240         lexer_goto_row_null[-sep ','-]
241 $         end
242 $     end foreach
243 };
244 $ end foreach
245
246 $ foreach {lexer_data/accept_table/state}
247 const int lexer_accept_table[] = {
248         [-foreach {i}-]${.}[-sep ','-][-end foreach-]
249 };
250 $ end foreach
251
252 $ end template