lib/string_exp/utf8: Added routine to compute code point on a UTF-8 character.
authorLucas Bajolet <r4pass@hotmail.com>
Mon, 21 Jul 2014 14:37:11 +0000 (10:37 -0400)
committerLucas Bajolet <r4pass@hotmail.com>
Tue, 29 Jul 2014 16:00:11 +0000 (12:00 -0400)
Signed-off-by: Lucas Bajolet <r4pass@hotmail.com>

lib/string_experimentations/utf8.nit

index 781284b..95cd4d4 100644 (file)
@@ -77,6 +77,27 @@ extern class UnicodeChar `{ UTF8Char* `}
                return recv->ns;
        `}
 
+       # Returns the Unicode code point representing the character
+       #
+       # Note : A unicode character might not be a visible glyph, but it will be used to determine canonical equivalence
+       fun code_point: Int import UnicodeChar.len `{
+               switch(UnicodeChar_len(recv)){
+                       case 1:
+                               return (long)(0x7F & (unsigned char)recv->ns[recv->pos]);
+                       case 2:
+                               return 0 | ((0x1F & (unsigned char)recv->ns[recv->pos]) << 6) | (0x3F & (unsigned char)recv->ns[recv->pos+1]);
+                       case 3:
+                               return 0 | ((0x0F & (unsigned char)recv->ns[recv->pos]) << 12) |
+                               ((0x3F & (unsigned char)recv->ns[recv->pos+1]) << 6) |
+                               (0x3F & (unsigned char)recv->ns[recv->pos+2]);
+                       case 4:
+                               return 0 | ((0x07 & (unsigned char)recv->ns[recv->pos]) << 18) |
+                               ((0x3F & (unsigned char)recv->ns[recv->pos+1]) << 12) |
+                               ((0x3F & (unsigned char)recv->ns[recv->pos+2]) << 6) |
+                               (0x3F & (unsigned char)recv->ns[recv->pos+3]);
+               }
+       `}
+
        redef fun to_s import NativeString.to_s_with_length `{
                int len = utf8___UnicodeChar_len___impl(recv);
                char* r = malloc(len + 1);