Merge: lib/core: provide default codec-aware read_char
authorJean Privat <jean@pryen.org>
Thu, 10 May 2018 22:46:06 +0000 (18:46 -0400)
committerJean Privat <jean@pryen.org>
Thu, 10 May 2018 22:46:06 +0000 (18:46 -0400)
Previous implementations of read_char were unaware of codec issues, and
used to read a byte and convert it to a code point.

For ASCII characters this was enough, but once unicode characters were
read on a char-by-char basis, wrong characters would appear.

This commit fixes this issue by using the Codec API to read a character
intelligently, and properly support multibyte encodings.

Signed-off-by: Lucas Bajolet <lucas.bajolet@gmail.com>

Pull-Request: #2648
Reviewed-by: Jean Privat <jean@pryen.org>

1  2 
src/interpreter/naive_interpreter.nit

@@@ -21,6 -21,7 +21,6 @@@ import litera
  import semantize
  private import parser::tables
  import mixin
 -import primitive_types
  private import model::serialize_model
  private import frontend::explain_assert_api
  
@@@ -341,6 -342,18 +341,18 @@@ class NaiveInterprete
                return instance
        end
  
+       # Return a new C string instance sharing the same data space as `txt`
+       fun c_string_instance_fast_cstr(txt: CString, from: Int): Instance
+       do
+               var ncstr = txt.fast_cstring(from)
+               var t = mainmodule.c_string_type
+               var instance = new PrimitiveInstance[CString](t, ncstr)
+               init_instance_primitive(instance)
+               return instance
+       end
        # Return a new C string initialized of `length`
        fun c_string_instance_len(length: Int): PrimitiveInstance[CString]
        do
@@@ -1193,8 -1206,7 +1205,7 @@@ redef class AMethPropde
                        else if pname == "atoi" then
                                return v.int_instance(recvval.atoi)
                        else if pname == "fast_cstring" then
-                               var ns = recvval.fast_cstring(args[1].to_i)
-                               return v.c_string_instance(ns.to_s)
+                               return v.c_string_instance_fast_cstr(args[0].val.as(CString), args[1].to_i)
                        else if pname == "fetch_4_chars" then
                                return v.uint32_instance(args[0].val.as(CString).fetch_4_chars(args[1].to_i))
                        else if pname == "fetch_4_hchars" then