From: Lucas Bajolet Date: Fri, 10 Jul 2015 20:15:52 +0000 (-0400) Subject: src: Compiler, interpreter and parser updates for UTF-8 X-Git-Tag: v0.7.7~10^2~1 X-Git-Url: http://nitlanguage.org src: Compiler, interpreter and parser updates for UTF-8 Signed-off-by: Lucas Bajolet --- diff --git a/src/compiler/abstract_compiler.nit b/src/compiler/abstract_compiler.nit index 43686ae..3dcfab1 100644 --- a/src/compiler/abstract_compiler.nit +++ b/src/compiler/abstract_compiler.nit @@ -1494,8 +1494,12 @@ abstract class AbstractCompilerVisitor fun char_instance(value: Char): RuntimeVariable do var t = mmodule.char_type - var res = new RuntimeVariable("'{value.to_s.escape_to_c}'", t, t) - return res + + if value.ascii < 128 then + return new RuntimeVariable("'{value.to_s.escape_to_c}'", t, t) + else + return new RuntimeVariable("{value.ascii}", t, t) + end end # Generate a float value @@ -1537,7 +1541,7 @@ abstract class AbstractCompilerVisitor var native_mtype = mmodule.native_string_type var nat = self.new_var(native_mtype) self.add("{nat} = \"{string.escape_to_c}\";") - var length = self.int_instance(string.length) + var length = self.int_instance(string.bytelen) self.add("{res} = {self.send(self.get_property("to_s_with_length", native_mtype), [nat, length]).as(not null)};") self.add("{name} = {res};") self.add("\}") @@ -2157,10 +2161,7 @@ redef class AMethPropdef return true end else if cname == "Char" then - if pname == "output" then - v.add("printf(\"%c\", ((unsigned char){arguments.first}));") - return true - else if pname == "object_id" then + if pname == "object_id" then v.ret(v.new_expr("(long){arguments.first}", ret.as(not null))) return true else if pname == "successor" then diff --git a/src/interpreter/naive_interpreter.nit b/src/interpreter/naive_interpreter.nit index 78bd4e2..61889c7 100644 --- a/src/interpreter/naive_interpreter.nit +++ b/src/interpreter/naive_interpreter.nit @@ -275,10 +275,10 @@ class NaiveInterpreter # Return a new native string initialized with `txt` fun native_string_instance(txt: String): Instance do - var instance = native_string_instance_len(txt.length+1) + var instance = native_string_instance_len(txt.bytelen+1) var val = instance.val - val[txt.length] = 0u8 - txt.to_cstring.copy_to(val, txt.length, 0, 0) + val[txt.bytelen] = 0u8 + txt.to_cstring.copy_to(val, txt.bytelen, 0, 0) return instance end @@ -298,7 +298,7 @@ class NaiveInterpreter fun string_instance(txt: String): Instance do var nat = native_string_instance(txt) - var res = self.send(self.force_get_primitive_method("to_s_with_length", nat.mtype), [nat, self.int_instance(txt.length)]) + var res = self.send(self.force_get_primitive_method("to_s_with_length", nat.mtype), [nat, self.int_instance(txt.bytelen)]) assert res != null return res end diff --git a/src/parser/lexer_work.nit b/src/parser/lexer_work.nit index 0f06bce..94810af 100644 --- a/src/parser/lexer_work.nit +++ b/src/parser/lexer_work.nit @@ -164,7 +164,17 @@ class Lexer if sp >= string_len then dfa_state = -1 else + # Very ugly hack, this is because of the way SableCC generates its tables. + # Due to the 0xFFFF limit of a Java char, when a big Nit char is read (i.e. + # code point > 65535), it crashes. + # + # Hence, if a char has a code point <= 255 (ISO8859 range), it is left as is. + # Else, it is replaced by 255. + # This does not corrupt the lexer and works perfectly on any character. + # + # TL;DR: Java fucked up, need retarded solution to cope for retarded decision var c = string[sp].ascii + if c >= 256 then c = 255 sp += 1 var cr = _cr