1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Internal algorithm and data structures for the Nit lexer
18 intrude import parser_nodes
22 var _text
: nullable String
27 if res
!= null then return res
38 fun parser_index
: Int is abstract
42 redef fun parser_index
: Int
47 init init_tk
(loc
: Location)
57 init init_error
(message
: String, loc
: Location)
60 self.message
= message
64 redef class ALexerError
67 init init_lexer_error
(message
: String, loc
: Location, string
: String)
69 init_error
(message
, loc
)
74 redef class AParserError
77 init init_parser_error
(message
: String, loc
: Location, token
: Token)
79 init_error
(message
, loc
)
84 # The lexer extract NIT tokens from an input stream.
85 # It is better user with the Parser
89 var _token
: nullable Token
97 # Current character in the stream
98 var _stream_pos
: Int = 0
100 # Current line number in the input stream
103 # Current column in the input stream
106 # Was the last character a cariage-return?
107 var _cr
: Bool = false
109 # Constante state values
110 private fun state_initial
: Int do return 0 end
112 # Create a new lexer for a stream (and a name)
113 init(file
: SourceFile)
118 # The last peeked token to chain them
119 private var last_token
: nullable Token = null
121 # Give the next token (but do not consume it)
125 if t
!= null then return t
128 while t
== null do t
= get_token
130 if t
._location
!= null then
145 # Give and consume the next token
153 # Primitive method to return a token, or return null if it is discarded
154 # Is used to implement `peek` and `next`
155 protected fun get_token
: nullable Token
160 var start_stream_pos
= sp
162 var start_line
= _line
164 var string
= file
.string
165 var string_len
= string
.length
167 var accept_state
= -1
168 var accept_token
= -1
169 var accept_length
= -1
174 if sp
>= string_len
then
177 var c
= string
.chars
[sp
].ascii
186 file
.line_starts
[line
] = sp
190 file
.line_starts
[line
] = sp
196 file
.line_starts
[line
] = sp
203 var old_state
= dfa_state
204 if dfa_state
< -1 then
205 old_state
= -2 - dfa_state
211 var high
= lexer_goto
(old_state
, 0) - 1
215 var middle
= (low
+ high
) / 2
216 var offset
= middle
* 3 + 1 # +1 because length is at 0
218 if c
< lexer_goto
(old_state
, offset
) then
220 else if c
> lexer_goto
(old_state
, offset
+1) then
223 dfa_state
= lexer_goto
(old_state
, offset
+2)
228 if dfa_state
> -2 then break
236 if dfa_state
>= 0 then
237 var tok
= lexer_accept
(dfa_state
)
239 accept_state
= dfa_state
241 accept_length
= sp
- start_stream_pos
246 if accept_state
!= -1 then
247 var location
= new Location(file
, start_line
+ 1, accept_line
+ 1, start_pos
+ 1, accept_pos
)
250 _stream_pos
= start_stream_pos
+ accept_length
251 if accept_token
== 0 then
254 return make_token
(accept_token
, location
)
257 var location
= new Location(file
, start_line
+ 1, start_line
+ 1, start_pos
+ 1, start_pos
+ 1)
258 if sp
> start_stream_pos
then
259 var text
= string
.substring
(start_stream_pos
, sp-start_stream_pos
)
260 var token
= new ALexerError.init_lexer_error
("Syntax error: unknown token {text}.", location
, text
)
261 file
.last_token
= token
264 var token
= new EOF.init_tk
(location
)
265 file
.last_token
= token
273 # Allocate the right Token object for a given identifier
274 protected fun make_token
(accept_token
: Int, location
: Location): Token is abstract