# It is better user with the Parser
class Lexer
super TablesCapable
+
# Last peeked token
- var token: nullable Token
+ var token: nullable Token = null
# Lexer current state
private var state: Int = 0
# Constante state values
private fun state_initial: Int do return 0 end
- # Create a new lexer for a stream (and a name)
- init(file: SourceFile)
- do
- self.file = file
- end
-
# The last peeked token to chain them
private var last_token: nullable Token = null
if sp >= string_len then
dfa_state = -1
else
- var c = string[sp].ascii
+ # Very ugly hack, this is because of the way SableCC generates its tables.
+ # Due to the 0xFFFF limit of a Java char, when a big Nit char is read (i.e.
+ # code point > 65535), it crashes.
+ #
+ # Hence, if a char has a code point <= 255 (ISO8859 range), it is left as is.
+ # Else, it is replaced by 255.
+ # This does not corrupt the lexer and works perfectly on any character.
+ #
+ # TL;DR: Java fucked up, need retarded solution to cope for retarded decision
+ var c = string[sp].code_point
+ if c >= 256 then c = 255
sp += 1
var cr = _cr
end
else
if accept_state != -1 then
- var location = new Location(file, start_line + 1, accept_line + 1, start_pos + 1, accept_pos)
_pos = accept_pos
_line = accept_line
_stream_pos = start_stream_pos + accept_length
if accept_token == 0 then
+ # Ignored token (whitespaces)
return null
end
+ var location = new Location(file, start_line + 1, accept_line + 1, start_pos + 1, accept_pos)
return make_token(accept_token, location)
else
_stream_pos = sp
var location = new Location(file, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1)
if sp > start_stream_pos then
var text = string.substring(start_stream_pos, sp-start_stream_pos)
- var token = new ALexerError.init_lexer_error("Syntax error: unknown token {text}.", location, text)
+ var token = new ALexerError.init_lexer_error("Syntax Error: unknown token `{text}`.", location, text)
file.last_token = token
return token
else