text: use UInt32 to manipulate chars

author Alexis Laferrière <alexis.laf@xymus.net>

Wed, 8 Feb 2017 06:51:21 +0000 (01:51 -0500)

committer Alexis Laferrière <alexis.laf@xymus.net>

Thu, 9 Feb 2017 18:25:37 +0000 (13:25 -0500)
author Alexis Laferrière <alexis.laf@xymus.net>
Wed, 8 Feb 2017 06:51:21 +0000 (01:51 -0500)
committer Alexis Laferrière <alexis.laf@xymus.net>
Thu, 9 Feb 2017 18:25:37 +0000 (13:25 -0500)
diff --git a/lib/core/text/abstract_text.nit b/lib/core/text/abstract_text.nit

index ea438de..a09843d 100644 (file)
--- a/lib/core/text/abstract_text.nit
+++ b/lib/core/text/abstract_text.nit
@@ -789,17 +789,17 @@ abstract class Text
                 if pos == null then pos = 0
                 if ln == null then ln = length - pos
                 if ln < 6 then return 0xFFFD.code_point
-               var cp = from_utf16_digit(pos + 2)
-               if cp < 0xD800 then return cp.code_point
-               if cp > 0xDFFF then return cp.code_point
-               if cp > 0xDBFF then return 0xFFFD.code_point
+               var cp = from_utf16_digit(pos + 2).to_u32
+               if cp < 0xD800u32 then return cp.code_point
+               if cp > 0xDFFFu32 then return cp.code_point
+               if cp > 0xDBFFu32 then return 0xFFFD.code_point
                 if ln == 6 then return 0xFFFD.code_point
                 if ln < 12 then return 0xFFFD.code_point
                 cp <<= 16
-               cp += from_utf16_digit(pos + 8)
-               var cplo = cp & 0xFFFF
-               if cplo < 0xDC00 then return 0xFFFD.code_point
-               if cplo > 0xDFFF then return 0xFFFD.code_point
+               cp += from_utf16_digit(pos + 8).to_u32
+               var cplo = cp & 0xFFFFu32
+               if cplo < 0xDC00u32 then return 0xFFFD.code_point
+               if cplo > 0xDFFFu32 then return 0xFFFD.code_point
                 return cp.from_utf16_surr.code_point
         end
  
diff --git a/lib/core/text/flat.nit b/lib/core/text/flat.nit

index a28a264..de69148 100644 (file)
--- a/lib/core/text/flat.nit
+++ b/lib/core/text/flat.nit
@@ -1359,7 +1359,7 @@ redef class CString
                 while rem > 0 do
                         while rem >= 4 do
                                 var i = fetch_4_chars(pos)
-                               if i & 0x80808080 != 0 then break
+                               if i & 0x80808080u32 != 0u32 then break
                                 pos += 4
                                 chr_ln += 4
                                 rem -= 4
diff --git a/lib/core/text/native.nit b/lib/core/text/native.nit

index 0ff3787..0469a7d 100644 (file)
--- a/lib/core/text/native.nit
+++ b/lib/core/text/native.nit
@@ -13,6 +13,7 @@ module native
  
  import kernel
  import math
+import fixed_ints
  
  in "C" `{
  #ifdef __linux__
@@ -68,19 +69,26 @@ redef class Byte
         end
  end
  
-redef class Int
+redef class UInt32
         # Returns the code_point from a utf16 surrogate pair
         #
-       #     assert 0xD83DDE02.from_utf16_surr == 0x1F602
-       fun from_utf16_surr: Int do
-               var hi = (self & 0xFFFF0000) >> 16
-               var lo = self & 0xFFFF
-               var cp = 0
-               cp += (hi - 0xD800) << 10
-               cp += lo - 0xDC00
-               cp += 0x10000
+       #     assert 0xD83DDE02u32.from_utf16_surr == 0x1F602u32
+       fun from_utf16_surr: UInt32 do
+               var hi = (self & 0xFFFF0000u32) >> 16
+               var lo = self & 0xFFFFu32
+               var cp = 0u32
+               cp += (hi - 0xD800u32) << 10
+               cp += lo - 0xDC00u32
+               cp += 0x10000u32
                 return cp
         end
+
+       # The character which code point (unicode-wise) is `self`
+       #
+       #     assert 65u32.code_point == 'A'
+       #     assert 10u32.code_point == '\n'
+       #     assert 0x220Bu32.code_point == '∋'
+       fun code_point: Char `{ return self; `}
  end
  
  # C string `char *`
@@ -141,26 +149,26 @@ extern class CString `{ char* `}
                 var c = self[pos]
                 if c & 0x80u8 == 0u8 then return c.ascii
                 var b = fetch_4_hchars(pos)
-               var ret = 0
-               if b & 0xC00000 != 0x800000 then return 0xFFFD.code_point
-               if b & 0xE0000000 == 0xC0000000 then
-                       ret |= (b & 0x1F000000) >> 18
-                       ret |= (b & 0x3F0000) >> 16
+               var ret = 0u32
+               if b & 0xC00000u32 != 0x800000u32 then return 0xFFFD.code_point
+               if b & 0xE0000000u32 == 0xC0000000u32 then
+                       ret |= (b & 0x1F000000u32) >> 18
+                       ret |= (b & 0x3F0000u32) >> 16
                         return ret.code_point
                 end
-               if not b & 0xC000 == 0x8000 then return 0xFFFD.code_point
-               if b & 0xF0000000 == 0xE0000000 then
-                       ret |= (b & 0xF000000) >> 12
-                       ret |= (b & 0x3F0000) >> 10
-                       ret |= (b & 0x3F00) >> 8
+               if not b & 0xC000u32 == 0x8000u32 then return 0xFFFD.code_point
+               if b & 0xF0000000u32 == 0xE0000000u32 then
+                       ret |= (b & 0xF000000u32) >> 12
+                       ret |= (b & 0x3F0000u32) >> 10
+                       ret |= (b & 0x3F00u32) >> 8
                         return ret.code_point
                 end
-               if not b & 0xC0 == 0x80 then return 0xFFFD.code_point
-               if b & 0xF8000000 == 0xF0000000 then
-                       ret |= (b.to_i & 0x7000000) >> 6
-                       ret |= (b.to_i & 0x3F0000) >> 4
-                       ret |= (b.to_i & 0x3F00) >> 2
-                       ret |= b.to_i & 0x3F
+               if not b & 0xC0u32 == 0x80u32 then return 0xFFFD.code_point
+               if b & 0xF8000000u32 == 0xF0000000u32 then
+                       ret |= (b & 0x7000000u32) >> 6
+                       ret |= (b & 0x3F0000u32) >> 4
+                       ret |= (b & 0x3F00u32) >> 2
+                       ret |= b & 0x3Fu32
                         return ret.code_point
                 end
                 return 0xFFFD.code_point
@@ -200,7 +208,7 @@ extern class CString `{ char* `}
                 while dist > 0 do
                         while dist >= 4 do
                                 var i = fetch_4_chars(ns_i)
-                               if i & 0x80808080 != 0 then break
+                               if i & 0x80808080u32 != 0u32 then break
                                 ns_i += 4
                                 my_i += 4
                                 dist -= 4
@@ -214,7 +222,7 @@ extern class CString `{ char* `}
                 while dist < 0 do
                         while dist <= -4 do
                                 var i = fetch_4_chars(ns_i - 4)
-                               if i & 0x80808080 != 0 then break
+                               if i & 0x80808080u32 != 0u32 then break
                                 ns_i -= 4
                                 my_i -= 4
                                 dist += 4
@@ -256,8 +264,8 @@ extern class CString `{ char* `}
         # If the char is invalid UTF-8, `pos` is returned as-is
         #
         # ~~~raw
-       #       assert "abc".items.find_beginning_of_char_at(2) == 2
-       #       assert "か".items.find_beginning_of_char_at(1) == 0
+       #       assert "abc".items.find_beginning_of_char_at(2) == 2
+       #       assert "か".items.find_beginning_of_char_at(1) == 0
         #       assert [0x41u8, 233u8].to_s.items.find_beginning_of_char_at(1) == 1
         # ~~~
         fun find_beginning_of_char_at(pos: Int): Int do
@@ -280,7 +288,7 @@ extern class CString `{ char* `}
                 while byte_length > 0 do
                         while byte_length >= 4 do
                                 var i = fetch_4_chars(st)
-                               if i & 0x80808080 != 0 then break
+                               if i & 0x80808080u32 != 0u32 then break
                                 byte_length -= 4
                                 st += 4
                                 ln += 4
@@ -295,11 +303,10 @@ extern class CString `{ char* `}
         end
  
         # Fetch 4 chars in `self` at `pos`
-       fun fetch_4_chars(pos: Int): Int is intern `{ return (long)*((uint32_t*)(self+pos)); `}
+       fun fetch_4_chars(pos: Int): UInt32 is intern `{ return *((uint32_t*)(self+pos)); `}
  
         # Fetch 4 chars in `self` at `pos`
-       fun fetch_4_hchars(pos: Int): Int is intern `{ return (long)be32toh(*((uint32_t*)(self+pos))); `}
-
+       fun fetch_4_hchars(pos: Int): UInt32 is intern `{ return (uint32_t)be32toh(*((uint32_t*)(self+pos))); `}
  
         # Right shifts `len` bytes of `self` from `sh` bytes starting at position `pos`
         fun rshift(sh, len, pos: Int) do
diff --git a/lib/json/static.nit b/lib/json/static.nit

index f3107cc..f5eef1d 100644 (file)
--- a/lib/json/static.nit
+++ b/lib/json/static.nit
@@ -81,7 +81,7 @@ redef class Text
                                                 if self[i + 5] == '\\' and self[i + 6] == 'u' then
                                                         u16_esc <<= 16
                                                         u16_esc += from_utf16_digit(i + 7)
-                                                       char = u16_esc.from_utf16_surr.code_point
+                                                       char = u16_esc.to_u32.from_utf16_surr.code_point
                                                         i += 6
                                                 else
                                                         char = 0xFFFD.code_point
diff --git a/src/compiler/abstract_compiler.nit b/src/compiler/abstract_compiler.nit

index 47f813d..f0bb0bd 100644 (file)
--- a/src/compiler/abstract_compiler.nit
+++ b/src/compiler/abstract_compiler.nit
@@ -2612,10 +2612,10 @@ redef class AMethPropdef
                                 v.ret(v.new_expr("(char*){alloc}", ret.as(not null)))
                                 return true
                         else if pname == "fetch_4_chars" then
-                               v.ret(v.new_expr("(long)*((uint32_t*)({arguments[0]} + {arguments[1]}))", ret.as(not null)))
+                               v.ret(v.new_expr("*((uint32_t*)({arguments[0]} + {arguments[1]}))", ret.as(not null)))
                                 return true
                         else if pname == "fetch_4_hchars" then
-                               v.ret(v.new_expr("(long)be32toh(*((uint32_t*)({arguments[0]} + {arguments[1]})))", ret.as(not null)))
+                               v.ret(v.new_expr("(uint32_t)be32toh(*((uint32_t*)({arguments[0]} + {arguments[1]})))", ret.as(not null)))
                                 return true
                         end
                 else if cname == "NativeArray" then
diff --git a/src/interpreter/naive_interpreter.nit b/src/interpreter/naive_interpreter.nit

index 34530b7..233bdce 100644 (file)
--- a/src/interpreter/naive_interpreter.nit
+++ b/src/interpreter/naive_interpreter.nit
@@ -1174,9 +1174,9 @@ redef class AMethPropdef
                                 var ns = recvval.fast_cstring(args[1].to_i)
                                 return v.c_string_instance(ns.to_s)
                         else if pname == "fetch_4_chars" then
-                               return v.int_instance(args[0].val.as(CString).fetch_4_chars(args[1].to_i))
+                               return v.uint32_instance(args[0].val.as(CString).fetch_4_chars(args[1].to_i))
                         else if pname == "fetch_4_hchars" then
-                               return v.int_instance(args[0].val.as(CString).fetch_4_hchars(args[1].to_i))
+                               return v.uint32_instance(args[0].val.as(CString).fetch_4_hchars(args[1].to_i))
                         else if pname == "utf8_length" then
                                 return v.int_instance(args[0].val.as(CString).utf8_length(args[1].to_i, args[2].to_i))
                         end
author	Alexis Laferrière <alexis.laf@xymus.net>
	Wed, 8 Feb 2017 06:51:21 +0000 (01:51 -0500)
committer	Alexis Laferrière <alexis.laf@xymus.net>
	Thu, 9 Feb 2017 18:25:37 +0000 (13:25 -0500)
lib/core/text/abstract_text.nit		patch \| blob \| history
lib/core/text/flat.nit		patch \| blob \| history
lib/core/text/native.nit		patch \| blob \| history
lib/json/static.nit		patch \| blob \| history
src/compiler/abstract_compiler.nit		patch \| blob \| history
src/interpreter/naive_interpreter.nit		patch \| blob \| history