1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Internal algorithm and data structures for the Nit lexer
18 intrude import parser_nodes
22 private var cached_text
: nullable String
26 var res
= _cached_text
27 if res
!= null then return res
38 fun parser_index
: Int is abstract
42 init init_tk
(loc
: Location)
52 init init_error
(message
: String, loc
: Location)
55 self.message
= message
59 redef class ALexerError
62 init init_lexer_error
(message
: String, loc
: Location, string
: String)
64 init_error
(message
, loc
)
69 redef class AParserError
72 init init_parser_error
(message
: String, loc
: Location, token
: Token)
74 init_error
(message
, loc
)
79 # The lexer extract NIT tokens from an input stream.
80 # It is better user with the Parser
85 var token
: nullable Token = null
88 private var state
: Int = 0
93 # Current character in the stream
94 var stream_pos
: Int = 0
96 # Current line number in the input stream
99 # Current column in the input stream
102 # Was the last character a carriage-return?
105 # Constante state values
106 private fun state_initial
: Int do return 0 end
108 # The last peeked token to chain them
109 private var last_token
: nullable Token = null
111 # Give the next token (but do not consume it)
115 if t
!= null then return t
118 while t
== null do t
= get_token
120 if isset t
._location
then
135 # Give and consume the next token
143 # Primitive method to return a token, or return null if it is discarded
144 # Is used to implement `peek` and `next`
145 protected fun get_token
: nullable Token
150 var start_stream_pos
= sp
152 var start_line
= _line
154 var string
= file
.string
155 var string_len
= string
.length
157 var accept_state
= -1
158 var accept_token
= -1
159 var accept_length
= -1
164 if sp
>= string_len
then
167 var c
= string
[sp
].ascii
176 file
.line_starts
[line
] = sp
180 file
.line_starts
[line
] = sp
186 file
.line_starts
[line
] = sp
193 var old_state
= dfa_state
194 if dfa_state
< -1 then
195 old_state
= -2 - dfa_state
201 var high
= lexer_goto
(old_state
, 0) - 1
205 var middle
= (low
+ high
) / 2
206 var offset
= middle
* 3 + 1 # +1 because length is at 0
208 if c
< lexer_goto
(old_state
, offset
) then
210 else if c
> lexer_goto
(old_state
, offset
+1) then
213 dfa_state
= lexer_goto
(old_state
, offset
+2)
218 if dfa_state
> -2 then break
226 if dfa_state
>= 0 then
227 var tok
= lexer_accept
(dfa_state
)
229 accept_state
= dfa_state
231 accept_length
= sp
- start_stream_pos
236 if accept_state
!= -1 then
237 var location
= new Location(file
, start_line
+ 1, accept_line
+ 1, start_pos
+ 1, accept_pos
)
240 _stream_pos
= start_stream_pos
+ accept_length
241 if accept_token
== 0 then
244 return make_token
(accept_token
, location
)
247 var location
= new Location(file
, start_line
+ 1, start_line
+ 1, start_pos
+ 1, start_pos
+ 1)
248 if sp
> start_stream_pos
then
249 var text
= string
.substring
(start_stream_pos
, sp-start_stream_pos
)
250 var token
= new ALexerError.init_lexer_error
("Syntax error: unknown token {text}.", location
, text
)
251 file
.last_token
= token
254 var token
= new EOF.init_tk
(location
)
255 file
.last_token
= token
263 # Allocate the right Token object for a given identifier
264 protected fun make_token
(accept_token
: Int, location
: Location): Token is abstract