d8c287ae532c7e827372837072018e56ca1f115b
1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Internal algorithm and data structures for the Nit lexer
18 intrude import parser_nodes
22 private var cached_text
: nullable String
26 var res
= _cached_text
27 if res
!= null then return res
38 fun parser_index
: Int is abstract
42 init init_tk
(loc
: Location)
52 init init_error
(message
: String, loc
: Location)
55 self.message
= message
59 redef class ALexerError
62 init init_lexer_error
(message
: String, loc
: Location, string
: String)
64 init_error
(message
, loc
)
69 redef class AParserError
72 init init_parser_error
(message
: String, loc
: Location, token
: Token)
74 init_error
(message
, loc
)
79 # The lexer extract NIT tokens from an input stream.
80 # It is better user with the Parser
84 var token
: nullable Token
87 private var state
: Int = 0
92 # Current character in the stream
93 var stream_pos
: Int = 0
95 # Current line number in the input stream
98 # Current column in the input stream
101 # Was the last character a carriage-return?
104 # Constante state values
105 private fun state_initial
: Int do return 0 end
107 # Create a new lexer for a stream (and a name)
108 init(file
: SourceFile)
113 # The last peeked token to chain them
114 private var last_token
: nullable Token = null
116 # Give the next token (but do not consume it)
120 if t
!= null then return t
123 while t
== null do t
= get_token
125 if isset t
._location
then
140 # Give and consume the next token
148 # Primitive method to return a token, or return null if it is discarded
149 # Is used to implement `peek` and `next`
150 protected fun get_token
: nullable Token
155 var start_stream_pos
= sp
157 var start_line
= _line
159 var string
= file
.string
160 var string_len
= string
.length
162 var accept_state
= -1
163 var accept_token
= -1
164 var accept_length
= -1
169 if sp
>= string_len
then
172 var c
= string
[sp
].ascii
181 file
.line_starts
[line
] = sp
185 file
.line_starts
[line
] = sp
191 file
.line_starts
[line
] = sp
198 var old_state
= dfa_state
199 if dfa_state
< -1 then
200 old_state
= -2 - dfa_state
206 var high
= lexer_goto
(old_state
, 0) - 1
210 var middle
= (low
+ high
) / 2
211 var offset
= middle
* 3 + 1 # +1 because length is at 0
213 if c
< lexer_goto
(old_state
, offset
) then
215 else if c
> lexer_goto
(old_state
, offset
+1) then
218 dfa_state
= lexer_goto
(old_state
, offset
+2)
223 if dfa_state
> -2 then break
231 if dfa_state
>= 0 then
232 var tok
= lexer_accept
(dfa_state
)
234 accept_state
= dfa_state
236 accept_length
= sp
- start_stream_pos
241 if accept_state
!= -1 then
242 var location
= new Location(file
, start_line
+ 1, accept_line
+ 1, start_pos
+ 1, accept_pos
)
245 _stream_pos
= start_stream_pos
+ accept_length
246 if accept_token
== 0 then
249 return make_token
(accept_token
, location
)
252 var location
= new Location(file
, start_line
+ 1, start_line
+ 1, start_pos
+ 1, start_pos
+ 1)
253 if sp
> start_stream_pos
then
254 var text
= string
.substring
(start_stream_pos
, sp-start_stream_pos
)
255 var token
= new ALexerError.init_lexer_error
("Syntax error: unknown token {text}.", location
, text
)
256 file
.last_token
= token
259 var token
= new EOF.init_tk
(location
)
260 file
.last_token
= token
268 # Allocate the right Token object for a given identifier
269 protected fun make_token
(accept_token
: Int, location
: Location): Token is abstract