X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/string.nit b/lib/standard/string.nit index 3b8f02e..aa3cef5 100644 --- a/lib/standard/string.nit +++ b/lib/standard/string.nit @@ -38,6 +38,8 @@ abstract class Text # Type of self (used for factorization of several methods, ex : substring_from, empty...) type SELFTYPE: Text + var hash_cache: nullable Int = null + # Gets a view on the chars of the Text object fun chars: SELFVIEW is abstract @@ -121,6 +123,9 @@ abstract class Text return last_index_of_from(c, length - 1) end + # Return a null terminated char * + fun to_cstring: NativeString do return flatten.to_cstring + # The index of the last occurrence of an element starting from pos (in reverse order). # Example : # assert "/etc/bin/test/test.nit".last_index_of_from('/', length-1) == 13 @@ -169,10 +174,14 @@ abstract class Text # As with substring, a `from` index < 0 will be replaced by 0 fun substring_from(from: Int): SELFTYPE do - assert from < length + if from > self.length then return empty + if from < 0 then from = 0 return substring(from, length - from) end + # Returns a reversed version of self + fun reversed: SELFTYPE is abstract + # Does self have a substring `str` starting from position `pos`? # # assert "abcd".has_substring("bc",1) == true @@ -452,6 +461,26 @@ abstract class Text return self.chars < o.chars end + # Flat representation of self + fun flatten: FlatText is abstract + + redef fun hash + do + if hash_cache == null then + # djb2 hash algorithm + var h = 5381 + var i = length - 1 + + for char in self.chars do + h = (h * 32) + h + char.ascii + i -= 1 + end + + hash_cache = h + end + return hash_cache.as(not null) + end + end # All kinds of array-based text representations. @@ -460,6 +489,9 @@ abstract class FlatText private var items: NativeString + # Real items, used as cache for to_cstring is called + private var real_items: nullable NativeString = null + redef var length: Int init do end @@ -472,6 +504,8 @@ abstract class FlatText i += 1 end end + + redef fun flatten do return self end # Abstract class for the SequenceRead compatible @@ -583,11 +617,21 @@ abstract class BufferCharView end +abstract class String + super Text + + redef type SELFTYPE: String + + redef fun to_s do return self + +end + # Immutable strings of characters. -class String +class FlatString super FlatText + super String - redef type SELFTYPE: String + redef type SELFTYPE: FlatString redef type SELFVIEW: FlatStringCharView # Index in _items of the start of the string @@ -610,6 +654,19 @@ class String return items[index + index_from] end + redef fun reversed + do + var native = calloc_string(self.length + 1) + var reviter = chars.reverse_iterator + var pos = 0 + while reviter.is_ok do + native[pos] = reviter.item + pos += 1 + reviter.next + end + return native.to_s_with_length(self.length) + end + redef fun substring(from, count) do assert count >= 0 @@ -622,16 +679,16 @@ class String var realFrom = index_from + from - if (realFrom + count) > index_to then return new String.with_infos(items, index_to - realFrom + 1, realFrom, index_to) + if (realFrom + count) > index_to then return new FlatString.with_infos(items, index_to - realFrom + 1, realFrom, index_to) - if count == 0 then return "" + if count == 0 then return empty var to = realFrom + count - 1 - return new String.with_infos(items, to - realFrom + 1, realFrom, to) + return new FlatString.with_infos(items, to - realFrom + 1, realFrom, to) end - redef fun empty do return "".as(String) + redef fun empty do return "".as(FlatString) redef fun to_upper do @@ -696,12 +753,14 @@ class String end # Return a null terminated char * - fun to_cstring: NativeString + redef fun to_cstring: NativeString do + if real_items != null then return real_items.as(not null) if index_from > 0 or index_to != items.cstring_length - 1 then var newItems = calloc_string(length + 1) self.items.copy_to(newItems, length, index_from, 0) newItems[length] = '\0' + self.real_items = newItems return newItems end return items @@ -709,7 +768,7 @@ class String redef fun ==(other) do - if not other isa String then return super + if not other isa FlatString then return super if self.object_id == other.object_id then return true @@ -739,7 +798,7 @@ class String # assert ("aa" < "b") == true redef fun <(other) do - if not other isa String then return super + if not other isa FlatString then return super if self.object_id == other.object_id then return false @@ -786,7 +845,7 @@ class String var target_string = calloc_string(my_length + its_length + 1) self.items.copy_to(target_string, my_length, index_from, 0) - if s isa String then + if s isa FlatString then s.items.copy_to(target_string, its_length, s.index_from, my_length) else if s isa FlatBuffer then s.items.copy_to(target_string, its_length, 0, my_length) @@ -830,38 +889,40 @@ class String return target_string.to_s_with_length(final_length) end - redef fun to_s do return self - redef fun hash do - # djb2 hash algorythm - var h = 5381 - var i = length - 1 + if hash_cache == null then + # djb2 hash algorythm + var h = 5381 + var i = length - 1 + + var myitems = items + var strStart = index_from - var myitems = items - var strStart = index_from + i += strStart - i += strStart + while i >= strStart do + h = (h * 32) + h + self.items[i].ascii + i -= 1 + end - while i >= strStart do - h = (h * 32) + h + self.items[i].ascii - i -= 1 + hash_cache = h end - return h + return hash_cache.as(not null) end end private class FlatStringReverseIterator super IndexedIterator[Char] - var target: String + var target: FlatString var target_items: NativeString var curr_pos: Int - init with_pos(tgt: String, pos: Int) + init with_pos(tgt: FlatString, pos: Int) do target = tgt target_items = tgt.items @@ -881,13 +942,13 @@ end private class FlatStringIterator super IndexedIterator[Char] - var target: String + var target: FlatString var target_items: NativeString var curr_pos: Int - init with_pos(tgt: String, pos: Int) + init with_pos(tgt: FlatString, pos: Int) do target = tgt target_items = tgt.items @@ -907,7 +968,7 @@ end private class FlatStringCharView super StringCharView - redef type SELFTYPE: String + redef type SELFTYPE: FlatString redef fun [](index) do @@ -930,6 +991,8 @@ abstract class Buffer redef type SELFVIEW: BufferCharView redef type SELFTYPE: Buffer + var is_dirty = true + # Modifies the char contained at pos `index` # # DEPRECATED : Use self.chars.[]= instead @@ -949,6 +1012,12 @@ abstract class Buffer # Adds the content of text `s` at the end of self fun append(s: Text) is abstract + redef fun hash + do + if is_dirty then hash_cache = null + return super + end + end # Mutable strings of characters. @@ -965,6 +1034,7 @@ class FlatBuffer redef fun []=(index, item) do + is_dirty = true if index == length then add(item) return @@ -975,17 +1045,22 @@ class FlatBuffer redef fun add(c) do + is_dirty = true if capacity <= length then enlarge(length + 5) items[length] = c length += 1 end - redef fun clear do length = 0 + redef fun clear do + is_dirty = true + length = 0 + end redef fun empty do return new FlatBuffer redef fun enlarge(cap) do + is_dirty = true var c = capacity if cap <= c then return while c <= cap do c = c * 2 + 2 @@ -998,28 +1073,40 @@ class FlatBuffer redef fun to_s: String do - var l = length - var a = calloc_string(l+1) - items.copy_to(a, l, 0, 0) - - # Ensure the afterlast byte is '\0' to nul-terminated char * - a[length] = '\0' - - return a.to_s_with_length(length) + return to_cstring.to_s_with_length(length) end - # Create a new empty string. - init + redef fun to_cstring do - with_capacity(5) + if is_dirty then + var new_native = calloc_string(length + 1) + new_native[length] = '\0' + items.copy_to(new_native, length, 0, 0) + real_items = new_native + is_dirty = false + end + return real_items.as(not null) end - init from(s: String) + # Create a new empty string. + init do with_capacity(5) + + init from(s: Text) do capacity = s.length + 1 length = s.length items = calloc_string(capacity) - s.items.copy_to(items, length, s.index_from, 0) + if s isa FlatString then + s.items.copy_to(items, length, s.index_from, 0) + else if s isa FlatBuffer then + s.items.copy_to(items, length, 0, 0) + else + var curr_pos = 0 + for i in s.chars do + items[curr_pos] = i + curr_pos += 1 + end + end end # Create a new empty string with a given capacity. @@ -1034,9 +1121,10 @@ class FlatBuffer redef fun append(s) do + is_dirty = true var sl = s.length if capacity < length + sl then enlarge(length + sl) - if s isa String then + if s isa FlatString then s.items.copy_to(items, sl, s.index_from, length) else if s isa FlatBuffer then s.items.copy_to(items, sl, 0, length) @@ -1078,6 +1166,17 @@ class FlatBuffer end end + redef fun reversed + do + var new_buf = new FlatBuffer.with_capacity(self.length) + var reviter = self.chars.reverse_iterator + while reviter.is_ok do + new_buf.add(reviter.item) + reviter.next + end + return new_buf + end + redef fun +(other) do var new_buf = new FlatBuffer.with_capacity(self.length + other.length) @@ -1505,18 +1604,18 @@ class NativeString return to_s_with_length(cstring_length) end - fun to_s_with_length(length: Int): String + fun to_s_with_length(length: Int): FlatString do assert length >= 0 - return new String.with_infos(self, length, 0, length - 1) + return new FlatString.with_infos(self, length, 0, length - 1) end - fun to_s_with_copy: String + fun to_s_with_copy: FlatString do var length = cstring_length var new_self = calloc_string(length + 1) copy_to(new_self, length, 0, 0) - return new String.with_infos(new_self, length, 0, length - 1) + return new FlatString.with_infos(new_self, length, 0, length - 1) end end