X-Git-Url: http://nitlanguage.org diff --git a/src/parser/lexer_work.nit b/src/parser/lexer_work.nit index e7102b6..e061613 100644 --- a/src/parser/lexer_work.nit +++ b/src/parser/lexer_work.nit @@ -39,11 +39,6 @@ redef class Token end redef class EOF - redef fun parser_index: Int - do - return 97 - end - init init_tk(loc: Location) do _cached_text = "" @@ -85,8 +80,9 @@ end # It is better user with the Parser class Lexer super TablesCapable + # Last peeked token - var token: nullable Token + var token: nullable Token = null # Lexer current state private var state: Int = 0 @@ -103,18 +99,12 @@ class Lexer # Current column in the input stream var pos: Int = 0 - # Was the last character a cariage-return? + # Was the last character a carriage-return? var cr: Bool = false # Constante state values private fun state_initial: Int do return 0 end - # Create a new lexer for a stream (and a name) - init(file: SourceFile) - do - self.file = file - end - # The last peeked token to chain them private var last_token: nullable Token = null @@ -174,7 +164,17 @@ class Lexer if sp >= string_len then dfa_state = -1 else - var c = string[sp].ascii + # Very ugly hack, this is because of the way SableCC generates its tables. + # Due to the 0xFFFF limit of a Java char, when a big Nit char is read (i.e. + # code point > 65535), it crashes. + # + # Hence, if a char has a code point <= 255 (ISO8859 range), it is left as is. + # Else, it is replaced by 255. + # This does not corrupt the lexer and works perfectly on any character. + # + # TL;DR: Java fucked up, need retarded solution to cope for retarded decision + var c = string[sp].code_point + if c >= 256 then c = 255 sp += 1 var cr = _cr @@ -244,20 +244,21 @@ class Lexer end else if accept_state != -1 then - var location = new Location(file, start_line + 1, accept_line + 1, start_pos + 1, accept_pos) _pos = accept_pos _line = accept_line _stream_pos = start_stream_pos + accept_length if accept_token == 0 then + # Ignored token (whitespaces) return null end + var location = new Location(file, start_line + 1, accept_line + 1, start_pos + 1, accept_pos) return make_token(accept_token, location) else _stream_pos = sp var location = new Location(file, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1) if sp > start_stream_pos then var text = string.substring(start_stream_pos, sp-start_stream_pos) - var token = new ALexerError.init_lexer_error("Syntax error: unknown token {text}.", location, text) + var token = new ALexerError.init_lexer_error("Syntax Error: unknown token `{text}`.", location, text) file.last_token = token return token else