X-Git-Url: http://nitlanguage.org diff --git a/lib/string_experimentations/utf8_noindex.nit b/lib/string_experimentations/utf8_noindex.nit index cef60f7..db51f0a 100644 --- a/lib/string_experimentations/utf8_noindex.nit +++ b/lib/string_experimentations/utf8_noindex.nit @@ -73,7 +73,7 @@ extern class UnicodeChar `{ uint32_t* `} # Returns the Unicode code point representing the character # # Note : A unicode character might not be a visible glyph, but it will be used to determine canonical equivalence - fun code_point: Int `{ + fun code_point: Int import UnicodeChar.len `{ uint32_t val = *recv; uint32_t ret = 0; switch(UnicodeChar_len(recv)){ @@ -171,6 +171,14 @@ extern class UnicodeChar `{ uint32_t* `} `} end +# Used to keep track of the last accessed char in a String +class CharCache + # The position (as in char) of a String + var position: Int + # The position in the NativeString underlying the String + var bytepos: Int +end + class FlatStringReviter super IndexedIterator[UnicodeChar] @@ -263,6 +271,9 @@ redef class FlatString # Length in bytes of the string (e.g. the length of the C string) redef var bytelen: Int + # Cache for the last accessed character in the char + var cache = new CharCache(-1,-1) + redef var length = length_l is lazy private init full(items: NativeString, from, to, bytelen, len: Int) @@ -324,22 +335,57 @@ redef class FlatString private fun byte_index(index: Int): Int do assert index >= 0 assert index < length - var ns_i = index_from - var my_i = 0 - while my_i != index do - if items[ns_i].ascii.bin_and(0x80) == 0 then + + # Find best insertion point + var delta_begin = index + var delta_end = (length - 1) - index + var delta_cache = (cache.position - index).abs + var min = delta_begin + + if delta_cache < min then min = delta_cache + if delta_end < min then min = delta_end + + var ns_i: Int + var my_i: Int + var myits = items + + if min == delta_begin then + ns_i = index_from + my_i = 0 + else if min == delta_cache then + ns_i = cache.bytepos + my_i = cache.position + else + ns_i = index_to + my_i = length + end + + while my_i < index do + if myits[ns_i].ascii.bin_and(0x80) == 0 then ns_i += 1 - else if items[ns_i].ascii.bin_and(0xE0) == 0xC0 then + else if myits[ns_i].ascii.bin_and(0xE0) == 0xC0 then ns_i += 2 - else if items[ns_i].ascii.bin_and(0xF0) == 0xE0 then + else if myits[ns_i].ascii.bin_and(0xF0) == 0xE0 then ns_i += 3 - else if items[ns_i].ascii.bin_and(0xF7) == 0xF0 then + else if myits[ns_i].ascii.bin_and(0xF7) == 0xF0 then ns_i += 4 else ns_i += 1 end my_i += 1 end + + while my_i > index do + if myits[ns_i].ascii.bin_and(0xC0) != 0x80 then + my_i -= 1 + if my_i == index then break + end + ns_i -= 1 + end + + cache.position = index + cache.bytepos = ns_i + return ns_i end @@ -355,7 +401,7 @@ redef class FlatString end redef fun reversed do - var new_str = calloc_string(bytelen) + var new_str = new NativeString(bytelen) var s_pos = bytelen var my_pos = index_from var its = items @@ -369,7 +415,7 @@ redef class FlatString end redef fun to_upper do - var ns = calloc_string(bytelen) + var ns = new NativeString(bytelen) var offset = 0 for i in [0 .. length[ do @@ -381,7 +427,7 @@ redef class FlatString end redef fun to_lower do - var ns = calloc_string(bytelen) + var ns = new NativeString(bytelen) var offset = 0 for i in [0 .. length[ do @@ -395,15 +441,15 @@ redef class FlatString redef fun +(o) do if o isa Buffer then o = o.to_s if o isa FlatString then - var new_str = calloc_string(bytelen + o.bytelen + 1) + var new_str = new NativeString(bytelen + o.bytelen + 1) var new_bytelen = bytelen + o.bytelen new_str[new_bytelen] = '\0' var newlen = length + o.length items.copy_to(new_str, bytelen, index_from, 0) o.items.copy_to(new_str, o.bytelen, o.index_from, bytelen) return new FlatString.full(new_str, 0, new_bytelen - 1, new_bytelen, newlen) - else if o isa RopeString then - return new RopeString.from(self) + o + else if o isa Concat then + return new Concat(self, o) else # If it goes to this point, that means another String implementation was concerned, therefore you need to support the + operation for this variant abort @@ -415,7 +461,7 @@ redef class FlatString var new_bytelen = mybtlen * i var mylen = length var newlen = mylen * i - var ns = calloc_string(new_bytelen + 1) + var ns = new NativeString(new_bytelen + 1) ns[new_bytelen] = '\0' var offset = 0 while i > 0 do @@ -453,7 +499,7 @@ redef class FlatString redef fun to_cstring do if real_items != null then return real_items.as(not null) - var new_items = calloc_string(bytelen + 1) + var new_items = new NativeString(bytelen + 1) self.items.copy_to(new_items, bytelen, index_from, 0) new_items[bytelen] = '\0' self.real_items = new_items @@ -473,11 +519,11 @@ redef class FlatBuffer redef var bytelen: Int redef init from(s) do - if s isa RopeString then + if s isa Concat then with_capacity(50) for i in s.substrings do self.append(i) end - items = calloc_string(s.bytelen) + items = new NativeString(s.bytelen) if s isa FlatString then s.items.copy_to(items, s.bytelen, s.index_from, 0) else @@ -565,14 +611,14 @@ redef class FlatBuffer var c = capacity if cap <= c then return while c <= cap do c = c * 2 + 2 - var a = calloc_string(c+1) + var a = new NativeString(c+1) if bytelen > 0 then items.copy_to(a, bytelen, 0, 0) items = a capacity = c end redef fun append(s) do - if s isa RopeString then + if s isa Concat then for i in s.substrings do append i end var i = s.as(FlatString) @@ -589,7 +635,7 @@ redef class FlatBuffer redef fun reverse do - var nns = calloc_string(bytelen) + var nns = new NativeString(bytelen) var ns = items var btlen = bytelen var myp = 0 @@ -655,7 +701,7 @@ redef class FlatBuffer end redef fun to_cstring do - var ns = calloc_string(bytelen) + var ns = new NativeString(bytelen) items.copy_to(ns, bytelen, 0, 0) return ns end @@ -677,7 +723,7 @@ redef class NativeString redef fun to_s_with_copy do var length = cstring_length - var new_self = calloc_string(length + 1) + var new_self = new NativeString(length + 1) copy_to(new_self, length, 0, 0) return new FlatString.with_bytelen(new_self, 0, length - 1, length) end @@ -686,7 +732,7 @@ end redef class OFStream redef fun write(s) do - assert _writable + assert is_writable if s isa FlatText then write_native(s.to_cstring, s.bytelen) else for i in s.substrings do write_native(i.to_cstring, i.length)