4a0820e394e7ac5b5c9a18cc4402457011bc7836
1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Internal algorithm and data structures for the Nit lexer
18 intrude import parser_nodes
22 var _text
: nullable String
27 if res
!= null then return res
38 fun parser_index
: Int is abstract
42 redef fun parser_index
: Int
47 init init_tk
(loc
: Location)
55 readable var _message
: String
57 init init_error
(message
: String, loc
: Location)
64 redef class ALexerError
65 readable var _string
: String
67 init init_lexer_error
(message
: String, loc
: Location, string
: String)
69 init_error
(message
, loc
)
74 redef class AParserError
75 readable var _token
: Token
77 init init_parser_error
(message
: String, loc
: Location, token
: Token)
79 init_error
(message
, loc
)
84 # The lexer extract NIT tokens from an input stream.
85 # It is better user with the Parser
89 var _token
: nullable Token
95 readable var _file
: SourceFile
97 # Current character in the stream
98 var _stream_pos
: Int = 0
100 # Current line number in the input stream
103 # Current column in the input stream
106 # Was the last character a cariage-return?
107 var _cr
: Bool = false
109 # Constante state values
110 private fun state_initial
: Int do return 0 end
112 # Create a new lexer for a stream (and a name)
113 init(file
: SourceFile)
118 # The last peeked token to chain them
119 private var last_token
: nullable Token = null
121 # Give the next token (but do not consume it)
125 if t
!= null then return t
128 while t
== null do t
= get_token
135 _file
.first_token
= t
143 # Give and consume the next token
151 # Primitive method to return a token, or return null if it is discarded
152 # Is used to implement `peek` and `next`
153 protected fun get_token
: nullable Token
158 var start_stream_pos
= sp
160 var start_line
= _line
161 var string
= _file
.string
162 var string_len
= string
.length
164 var accept_state
= -1
165 var accept_token
= -1
166 var accept_length
= -1
171 if sp
>= string_len
then
174 var c
= string
.chars
[sp
].ascii
183 _file
.line_starts
[line
] = sp
187 _file
.line_starts
[line
] = sp
193 _file
.line_starts
[line
] = sp
200 var old_state
= dfa_state
201 if dfa_state
< -1 then
202 old_state
= -2 - dfa_state
208 var high
= lexer_goto
(old_state
, 0) - 1
212 var middle
= (low
+ high
) / 2
213 var offset
= middle
* 3 + 1 # +1 because length is at 0
215 if c
< lexer_goto
(old_state
, offset
) then
217 else if c
> lexer_goto
(old_state
, offset
+1) then
220 dfa_state
= lexer_goto
(old_state
, offset
+2)
225 if dfa_state
> -2 then break
233 if dfa_state
>= 0 then
234 var tok
= lexer_accept
(dfa_state
)
236 accept_state
= dfa_state
238 accept_length
= sp
- start_stream_pos
243 if accept_state
!= -1 then
244 var location
= new Location(_file
, start_line
+ 1, accept_line
+ 1, start_pos
+ 1, accept_pos
)
247 _stream_pos
= start_stream_pos
+ accept_length
248 if accept_token
== 0 then
251 return make_token
(accept_token
, location
)
254 var location
= new Location(_file
, start_line
+ 1, start_line
+ 1, start_pos
+ 1, start_pos
+ 1)
255 if sp
> start_stream_pos
then
256 var text
= string
.substring
(start_stream_pos
, sp-start_stream_pos
)
257 var token
= new ALexerError.init_lexer_error
("Syntax error: unknown token {text}.", location
, text
)
258 _file
.last_token
= token
261 var token
= new EOF.init_tk
(location
)
262 _file
.last_token
= token
270 # Allocate the right Token object for a given identifier
271 protected fun make_token
(accept_token
: Int, location
: Location): Token is abstract