private import tables
redef class Token
- var _text: nullable String
+ private var cached_text: nullable String
redef fun text
do
- var res = _text
+ var res = _cached_text
if res != null then return res
res = location.text
- _text = res
+ _cached_text = res
return res
end
redef fun text=(text)
do
- _text = text
+ _cached_text = text
end
fun parser_index: Int is abstract
end
redef class EOF
- redef fun parser_index: Int
- do
- return 97
- end
-
init init_tk(loc: Location)
do
- _text = ""
+ _cached_text = ""
_location = loc
end
end
redef class AError
- readable var _message: String
+ var message: String
init init_error(message: String, loc: Location)
do
init_tk(loc)
- _message = message
+ self.message = message
end
end
redef class ALexerError
- readable var _string: String
+ var string: String
init init_lexer_error(message: String, loc: Location, string: String)
do
init_error(message, loc)
- _string = string
+ self.string = string
end
end
redef class AParserError
- readable var _token: Token
+ var token: Token
init init_parser_error(message: String, loc: Location, token: Token)
do
init_error(message, loc)
- _token = token
+ self.token = token
end
end
# It is better user with the Parser
class Lexer
super TablesCapable
+
# Last peeked token
- var _token: nullable Token
+ var token: nullable Token = null
# Lexer current state
- var _state: Int = 0
+ private var state: Int = 0
# The source file
- readable var _file: SourceFile
+ var file: SourceFile
# Current character in the stream
- var _stream_pos: Int = 0
+ var stream_pos: Int = 0
# Current line number in the input stream
- var _line: Int = 0
+ var line: Int = 0
# Current column in the input stream
- var _pos: Int = 0
+ var pos: Int = 0
- # Was the last character a cariage-return?
- var _cr: Bool = false
+ # Was the last character a carriage-return?
+ var cr: Bool = false
# Constante state values
private fun state_initial: Int do return 0 end
- # Create a new lexer for a stream (and a name)
- init(file: SourceFile)
- do
- _file = file
- end
-
# The last peeked token to chain them
private var last_token: nullable Token = null
t = get_token
while t == null do t = get_token
- var l = last_token
- if l != null then
- l.next_token = t
- t.prev_token = l
- else
- _file.first_token = t
+ if isset t._location then
+ var l = last_token
+ if l != null then
+ l.next_token = t
+ t.prev_token = l
+ else
+ file.first_token = t
+ end
+ last_token = t
end
- last_token = t
_token = t
return t
end
var start_stream_pos = sp
var start_pos = _pos
var start_line = _line
- var string = _file.string
+ var file = self.file
+ var string = file.string
var string_len = string.length
var accept_state = -1
if sp >= string_len then
dfa_state = -1
else
- var c = string.chars[sp].ascii
+ # Very ugly hack, this is because of the way SableCC generates its tables.
+ # Due to the 0xFFFF limit of a Java char, when a big Nit char is read (i.e.
+ # code point > 65535), it crashes.
+ #
+ # Hence, if a char has a code point <= 255 (ISO8859 range), it is left as is.
+ # Else, it is replaced by 255.
+ # This does not corrupt the lexer and works perfectly on any character.
+ #
+ # TL;DR: Java fucked up, need retarded solution to cope for retarded decision
+ var c = string[sp].code_point
+ if c >= 256 then c = 255
sp += 1
var cr = _cr
if c == 10 then
if cr then
cr = false
- _file.line_starts[line] = sp
+ file.line_starts[line] = sp
else
line = line + 1
pos = 0
- _file.line_starts[line] = sp
+ file.line_starts[line] = sp
end
else if c == 13 then
line = line + 1
pos = 0
cr = true
- _file.line_starts[line] = sp
+ file.line_starts[line] = sp
else
pos = pos + 1
cr = false
end
else
if accept_state != -1 then
- var location = new Location(_file, start_line + 1, accept_line + 1, start_pos + 1, accept_pos)
_pos = accept_pos
_line = accept_line
_stream_pos = start_stream_pos + accept_length
if accept_token == 0 then
+ # Ignored token (whitespaces)
return null
end
+ var location = new Location(file, start_line + 1, accept_line + 1, start_pos + 1, accept_pos)
return make_token(accept_token, location)
else
_stream_pos = sp
- var location = new Location(_file, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1)
+ var location = new Location(file, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1)
if sp > start_stream_pos then
var text = string.substring(start_stream_pos, sp-start_stream_pos)
- var token = new ALexerError.init_lexer_error("Syntax error: unknown token {text}.", location, text)
- _file.last_token = token
+ var token = new ALexerError.init_lexer_error("Syntax Error: unknown token `{text}`.", location, text)
+ file.last_token = token
return token
else
var token = new EOF.init_tk(location)
- _file.last_token = token
+ file.last_token = token
return token
end
end