1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Internal algorithm and data structures for the Nit lexer
18 intrude import parser_nodes
22 var _text
: nullable String
27 if res
!= null then return res
38 fun parser_index
: Int is abstract
42 redef fun parser_index
: Int
47 init init_tk
(loc
: Location)
55 readable var _message
: String
57 init init_error
(message
: String, loc
: Location)
64 redef class ALexerError
65 readable var _string
: String
67 init init_lexer_error
(message
: String, loc
: Location, string
: String)
69 init_error
(message
, loc
)
74 redef class AParserError
75 readable var _token
: Token
77 init init_parser_error
(message
: String, loc
: Location, token
: Token)
79 init_error
(message
, loc
)
84 # The lexer extract NIT tokens from an input stream.
85 # It is better user with the Parser
89 var _token
: nullable Token
95 readable var _file
: SourceFile
97 # Current character in the stream
98 var _stream_pos
: Int = 0
100 # Current line number in the input stream
103 # Current column in the input stream
106 # Was the last character a cariage-return?
107 var _cr
: Bool = false
109 # Constante state values
110 private fun state_initial
: Int do return 0 end
112 # Create a new lexer for a stream (and a name)
113 init(file
: SourceFile)
118 # The last peeked token to chain them
119 private var last_token
: nullable Token = null
121 # Give the next token (but do not consume it)
125 if t
!= null then return t
128 while t
== null do t
= get_token
130 if t
._location
!= null then
136 _file
.first_token
= t
145 # Give and consume the next token
153 # Primitive method to return a token, or return null if it is discarded
154 # Is used to implement `peek` and `next`
155 protected fun get_token
: nullable Token
160 var start_stream_pos
= sp
162 var start_line
= _line
163 var string
= _file
.string
164 var string_len
= string
.length
166 var accept_state
= -1
167 var accept_token
= -1
168 var accept_length
= -1
173 if sp
>= string_len
then
176 var c
= string
.chars
[sp
].ascii
185 _file
.line_starts
[line
] = sp
189 _file
.line_starts
[line
] = sp
195 _file
.line_starts
[line
] = sp
202 var old_state
= dfa_state
203 if dfa_state
< -1 then
204 old_state
= -2 - dfa_state
210 var high
= lexer_goto
(old_state
, 0) - 1
214 var middle
= (low
+ high
) / 2
215 var offset
= middle
* 3 + 1 # +1 because length is at 0
217 if c
< lexer_goto
(old_state
, offset
) then
219 else if c
> lexer_goto
(old_state
, offset
+1) then
222 dfa_state
= lexer_goto
(old_state
, offset
+2)
227 if dfa_state
> -2 then break
235 if dfa_state
>= 0 then
236 var tok
= lexer_accept
(dfa_state
)
238 accept_state
= dfa_state
240 accept_length
= sp
- start_stream_pos
245 if accept_state
!= -1 then
246 var location
= new Location(_file
, start_line
+ 1, accept_line
+ 1, start_pos
+ 1, accept_pos
)
249 _stream_pos
= start_stream_pos
+ accept_length
250 if accept_token
== 0 then
253 return make_token
(accept_token
, location
)
256 var location
= new Location(_file
, start_line
+ 1, start_line
+ 1, start_pos
+ 1, start_pos
+ 1)
257 if sp
> start_stream_pos
then
258 var text
= string
.substring
(start_stream_pos
, sp-start_stream_pos
)
259 var token
= new ALexerError.init_lexer_error
("Syntax error: unknown token {text}.", location
, text
)
260 _file
.last_token
= token
263 var token
= new EOF.init_tk
(location
)
264 _file
.last_token
= token
272 # Allocate the right Token object for a given identifier
273 protected fun make_token
(accept_token
: Int, location
: Location): Token is abstract