X-Git-Url: http://nitlanguage.org diff --git a/lib/string_experimentations/utf8.nit b/lib/string_experimentations/utf8.nit index 5feef35..ec66af6 100644 --- a/lib/string_experimentations/utf8.nit +++ b/lib/string_experimentations/utf8.nit @@ -34,7 +34,7 @@ typedef struct { # UTF-8 char as defined in RFC-3629, e.g. 1-4 Bytes # # A UTF-8 char has its bytes stored in a NativeString (char*) -extern class UnicodeChar `{ UTF8Char* `} +extern class UTF8Char `{ UTF8Char* `} new(pos: Int, ns: NativeString) `{ UTF8Char* u = malloc(sizeof(UTF8Char)); @@ -47,6 +47,7 @@ extern class UnicodeChar `{ UTF8Char* `} # # As per the specification : # + # ~~~raw # Length | UTF-8 octet sequence # | (binary) # ---------+------------------------------------------------- @@ -54,6 +55,7 @@ extern class UnicodeChar `{ UTF8Char* `} # 2 | 110xxxxx 10xxxxxx # 3 | 1110xxxx 10xxxxxx 10xxxxxx # 4 | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + # ~~~ private fun len: Int `{ char* ns = recv->ns; int pos = recv->pos; @@ -81,8 +83,8 @@ extern class UnicodeChar `{ UTF8Char* `} # Returns the Unicode code point representing the character # # Note : A unicode character might not be a visible glyph, but it will be used to determine canonical equivalence - fun code_point: Int import UnicodeChar.len `{ - switch(UnicodeChar_len(recv)){ + fun code_point: Int import UTF8Char.len `{ + switch(UTF8Char_len(recv)){ case 1: return (long)(0x7F & (unsigned char)recv->ns[recv->pos]); case 2: @@ -103,8 +105,8 @@ extern class UnicodeChar `{ UTF8Char* `} # # NOTE : Works only on ASCII chars # TODO : Support unicode for to_upper - fun to_upper: UnicodeChar import UnicodeChar.code_point `{ - int cp = UnicodeChar_code_point(recv); + fun to_upper: UTF8Char import UTF8Char.code_point `{ + int cp = UTF8Char_code_point(recv); if(cp < 97 || cp > 122){ return recv; } char* ns = malloc(2); ns[1] = '\0'; @@ -120,8 +122,8 @@ extern class UnicodeChar `{ UTF8Char* `} # # NOTE : Works only on ASCII chars # TODO : Support unicode for to_upper - fun to_lower: UnicodeChar import UnicodeChar.code_point `{ - int cp = UnicodeChar_code_point(recv); + fun to_lower: UTF8Char import UTF8Char.code_point `{ + int cp = UTF8Char_code_point(recv); if(cp < 65 || cp > 90){ return recv; } char* ns = malloc(2); ns[1] = '\0'; @@ -138,15 +140,15 @@ extern class UnicodeChar `{ UTF8Char* `} if o isa Char then if len != 1 then return false if code_point == o.ascii then return true - else if o isa UnicodeChar then + else if o isa UTF8Char then if len != o.len then return false if code_point == o.code_point then return true end return false end - redef fun output import UnicodeChar.code_point `{ - switch(UnicodeChar_len(recv)){ + redef fun output import UTF8Char.code_point `{ + switch(UTF8Char_len(recv)){ case 1: printf("%c", recv->ns[recv->pos]); break; @@ -163,7 +165,7 @@ extern class UnicodeChar `{ UTF8Char* `} `} redef fun to_s import NativeString.to_s_with_length `{ - int len = utf8___UnicodeChar_len___impl(recv); + int len = utf8___UTF8Char_len___impl(recv); char* r = malloc(len + 1); r[len] = '\0'; char* src = (recv->ns + recv->pos); @@ -180,10 +182,10 @@ private extern class StringIndex `{ UTF8Char* `} new(size: Int) `{ return malloc(size*sizeof(UTF8Char)); `} # Sets the character at `index` as `item` - fun []=(index: Int, item: UnicodeChar) `{ recv[index] = *item; `} + fun []=(index: Int, item: UTF8Char) `{ recv[index] = *item; `} # Gets the character at position `id` - fun [](id: Int): UnicodeChar `{ return &recv[id]; `} + fun [](id: Int): UTF8Char `{ return &recv[id]; `} # Copies a part of self starting at index `my_from` of length `length` into `other`, starting at `its_from` fun copy_to(other: StringIndex, my_from: Int, its_from: Int, length: Int)`{ @@ -214,7 +216,7 @@ redef class FlatString redef fun to_cstring do if real_items != null then return real_items.as(not null) - var new_items = calloc_string(bytelen + 1) + var new_items = new NativeString(bytelen + 1) self.items.copy_to(new_items, bytelen, index[index_from].pos, 0) new_items[bytelen] = '\0' self.real_items = new_items @@ -245,7 +247,7 @@ redef class FlatString redef fun reversed do - var native = calloc_string(self.bytelen + 1) + var native = new NativeString(self.bytelen + 1) var length = self.length var index = self.index var pos = 0 @@ -257,7 +259,7 @@ redef class FlatString var uchar = index[i] var uchar_len = uchar.len ipos -= uchar_len - new_index[pos_index] = new UnicodeChar(ipos, native) + new_index[pos_index] = new UTF8Char(ipos, native) pos_index -= 1 items.copy_to(native, uchar_len, pos, ipos) pos += uchar_len @@ -278,7 +280,7 @@ redef class FlatString var my_real_len = length var my_real_fin_len = my_real_len * i - var target_string = calloc_string((finlen) + 1) + var target_string = new NativeString((finlen) + 1) var my_index = index var new_index = new StringIndex(my_real_fin_len) @@ -300,13 +302,12 @@ redef class FlatString redef fun to_upper do - var outstr = calloc_string(self.bytelen + 1) + var outstr = new NativeString(self.bytelen + 1) var out_index = 0 var index = self.index var ipos = 0 var max = length - var items = self.items while ipos < max do var u = index[ipos].to_upper @@ -322,13 +323,12 @@ redef class FlatString redef fun to_lower do - var outstr = calloc_string(self.bytelen + 1) + var outstr = new NativeString(self.bytelen + 1) var out_index = 0 var index = self.index var ipos = 0 var max = length - var items = self.items while ipos < max do var u = index[ipos].to_lower @@ -373,7 +373,7 @@ redef class NativeString # Creates the index for said NativeString # `length` is the size of the CString (in bytes, up to the first \0) # real_len is just a way to store the length (UTF-8 characters) - private fun make_index(length: Int, real_len: Container[Int]): StringIndex import Container[Int].item=, UnicodeChar.len `{ + private fun make_index(length: Int, real_len: Container[Int]): StringIndex import Container[Int].item=, UTF8Char.len `{ int pos = 0; int index_pos = 0; UTF8Char* index = malloc(length*sizeof(UTF8Char)); @@ -381,7 +381,7 @@ redef class NativeString UTF8Char* curr = &index[index_pos]; curr->pos = pos; curr->ns = recv; - pos += UnicodeChar_len(curr); + pos += UTF8Char_len(curr); index_pos ++; } Container_of_Int_item__assign(real_len, index_pos); @@ -406,13 +406,13 @@ redef class NativeString var real_len = new Container[Int](0) var length = cstring_length var x = make_index(length, real_len) - var new_self = calloc_string(length + 1) + var new_self = new NativeString(length + 1) copy_to(new_self, length, 0, 0) return new FlatString.with_infos_index(new_self, real_len.item, 0, real_len.item - 1, x, length) end end -redef class OFStream +redef class FileWriter redef fun write(s) do assert is_writable