src: Compiler, interpreter and parser updates for UTF-8
[nit.git] / src / parser / lexer_work.nit
index fa7f18f..94810af 100644 (file)
@@ -164,7 +164,17 @@ class Lexer
                        if sp >= string_len then
                                dfa_state = -1
                        else
+                               # Very ugly hack, this is because of the way SableCC generates its tables.
+                               # Due to the 0xFFFF limit of a Java char, when a big Nit char is read (i.e.
+                               # code point > 65535), it crashes.
+                               #
+                               # Hence, if a char has a code point <= 255 (ISO8859 range), it is left as is.
+                               # Else, it is replaced by 255.
+                               # This does not corrupt the lexer and works perfectly on any character.
+                               #
+                               # TL;DR: Java fucked up, need retarded solution to cope for retarded decision
                                var c = string[sp].ascii
+                               if c >= 256 then c = 255
                                sp += 1
 
                                var cr = _cr
@@ -248,7 +258,7 @@ class Lexer
                                        var location = new Location(file, start_line + 1, start_line + 1, start_pos + 1, start_pos + 1)
                                        if sp > start_stream_pos then
                                                var text = string.substring(start_stream_pos, sp-start_stream_pos)
-                                               var token = new ALexerError.init_lexer_error("Syntax error: unknown token {text}.", location, text)
+                                               var token = new ALexerError.init_lexer_error("Syntax Error: unknown token `{text}`.", location, text)
                                                file.last_token = token
                                                return token
                                        else