X-Git-Url: http://nitlanguage.org diff --git a/lib/standard/string.nit b/lib/standard/string.nit index 1176e04..aa3cef5 100644 --- a/lib/standard/string.nit +++ b/lib/standard/string.nit @@ -28,6 +28,7 @@ intrude import collection # FIXME should be collection::array # High-level abstraction for all text representations abstract class Text super Comparable + super StringCapable redef type OTHER: Text @@ -37,6 +38,8 @@ abstract class Text # Type of self (used for factorization of several methods, ex : substring_from, empty...) type SELFTYPE: Text + var hash_cache: nullable Int = null + # Gets a view on the chars of the Text object fun chars: SELFVIEW is abstract @@ -55,6 +58,12 @@ abstract class Text # In this case, `from += count` and `count -= from`. fun substring(from: Int, count: Int): SELFTYPE is abstract + # Concatenates `o` to `self` + fun +(o: Text): SELFTYPE is abstract + + # Auto-concatenates self `i` times + fun *(i: Int): SELFTYPE is abstract + # Is the current Text empty (== "") # assert "".is_empty # assert not "foo".is_empty @@ -114,6 +123,9 @@ abstract class Text return last_index_of_from(c, length - 1) end + # Return a null terminated char * + fun to_cstring: NativeString do return flatten.to_cstring + # The index of the last occurrence of an element starting from pos (in reverse order). # Example : # assert "/etc/bin/test/test.nit".last_index_of_from('/', length-1) == 13 @@ -162,10 +174,14 @@ abstract class Text # As with substring, a `from` index < 0 will be replaced by 0 fun substring_from(from: Int): SELFTYPE do - assert from < length + if from > self.length then return empty + if from < 0 then from = 0 return substring(from, length - from) end + # Returns a reversed version of self + fun reversed: SELFTYPE is abstract + # Does self have a substring `str` starting from position `pos`? # # assert "abcd".has_substring("bc",1) == true @@ -445,6 +461,26 @@ abstract class Text return self.chars < o.chars end + # Flat representation of self + fun flatten: FlatText is abstract + + redef fun hash + do + if hash_cache == null then + # djb2 hash algorithm + var h = 5381 + var i = length - 1 + + for char in self.chars do + h = (h * 32) + h + char.ascii + i -= 1 + end + + hash_cache = h + end + return hash_cache.as(not null) + end + end # All kinds of array-based text representations. @@ -453,6 +489,9 @@ abstract class FlatText private var items: NativeString + # Real items, used as cache for to_cstring is called + private var real_items: nullable NativeString = null + redef var length: Int init do end @@ -465,6 +504,8 @@ abstract class FlatText i += 1 end end + + redef fun flatten do return self end # Abstract class for the SequenceRead compatible @@ -576,12 +617,21 @@ abstract class BufferCharView end +abstract class String + super Text + + redef type SELFTYPE: String + + redef fun to_s do return self + +end + # Immutable strings of characters. -class String +class FlatString super FlatText - super StringCapable + super String - redef type SELFTYPE: String + redef type SELFTYPE: FlatString redef type SELFVIEW: FlatStringCharView # Index in _items of the start of the string @@ -604,6 +654,19 @@ class String return items[index + index_from] end + redef fun reversed + do + var native = calloc_string(self.length + 1) + var reviter = chars.reverse_iterator + var pos = 0 + while reviter.is_ok do + native[pos] = reviter.item + pos += 1 + reviter.next + end + return native.to_s_with_length(self.length) + end + redef fun substring(from, count) do assert count >= 0 @@ -616,16 +679,16 @@ class String var realFrom = index_from + from - if (realFrom + count) > index_to then return new String.with_infos(items, index_to - realFrom + 1, realFrom, index_to) + if (realFrom + count) > index_to then return new FlatString.with_infos(items, index_to - realFrom + 1, realFrom, index_to) - if count == 0 then return "" + if count == 0 then return empty var to = realFrom + count - 1 - return new String.with_infos(items, to - realFrom + 1, realFrom, to) + return new FlatString.with_infos(items, to - realFrom + 1, realFrom, to) end - redef fun empty do return "".as(String) + redef fun empty do return "".as(FlatString) redef fun to_upper do @@ -690,12 +753,14 @@ class String end # Return a null terminated char * - fun to_cstring: NativeString + redef fun to_cstring: NativeString do + if real_items != null then return real_items.as(not null) if index_from > 0 or index_to != items.cstring_length - 1 then var newItems = calloc_string(length + 1) self.items.copy_to(newItems, length, index_from, 0) newItems[length] = '\0' + self.real_items = newItems return newItems end return items @@ -703,7 +768,7 @@ class String redef fun ==(other) do - if not other isa String then return super + if not other isa FlatString then return super if self.object_id == other.object_id then return true @@ -733,7 +798,7 @@ class String # assert ("aa" < "b") == true redef fun <(other) do - if not other isa String then return super + if not other isa FlatString then return super if self.object_id == other.object_id then return false @@ -770,7 +835,7 @@ class String # The concatenation of `self` with `s` # # assert "hello " + "world!" == "hello world!" - fun +(s: String): String + redef fun +(s) do var my_length = self.length var its_length = s.length @@ -780,19 +845,27 @@ class String var target_string = calloc_string(my_length + its_length + 1) self.items.copy_to(target_string, my_length, index_from, 0) - s.items.copy_to(target_string, its_length, s.index_from, my_length) + if s isa FlatString then + s.items.copy_to(target_string, its_length, s.index_from, my_length) + else if s isa FlatBuffer then + s.items.copy_to(target_string, its_length, 0, my_length) + else + var curr_pos = my_length + for i in s.chars do + target_string[curr_pos] = i + curr_pos += 1 + end + end target_string[total_length] = '\0' return target_string.to_s_with_length(total_length) end - # `i` repetitions of `self` - # # assert "abc"*3 == "abcabcabc" # assert "abc"*1 == "abc" # assert "abc"*0 == "" - fun *(i: Int): String + redef fun *(i) do assert i >= 0 @@ -816,38 +889,40 @@ class String return target_string.to_s_with_length(final_length) end - redef fun to_s do return self - redef fun hash do - # djb2 hash algorythm - var h = 5381 - var i = length - 1 + if hash_cache == null then + # djb2 hash algorythm + var h = 5381 + var i = length - 1 - var myitems = items - var strStart = index_from + var myitems = items + var strStart = index_from - i += strStart + i += strStart - while i >= strStart do - h = (h * 32) + h + self.items[i].ascii - i -= 1 + while i >= strStart do + h = (h * 32) + h + self.items[i].ascii + i -= 1 + end + + hash_cache = h end - return h + return hash_cache.as(not null) end end private class FlatStringReverseIterator super IndexedIterator[Char] - var target: String + var target: FlatString var target_items: NativeString var curr_pos: Int - init with_pos(tgt: String, pos: Int) + init with_pos(tgt: FlatString, pos: Int) do target = tgt target_items = tgt.items @@ -867,13 +942,13 @@ end private class FlatStringIterator super IndexedIterator[Char] - var target: String + var target: FlatString var target_items: NativeString var curr_pos: Int - init with_pos(tgt: String, pos: Int) + init with_pos(tgt: FlatString, pos: Int) do target = tgt target_items = tgt.items @@ -893,7 +968,7 @@ end private class FlatStringCharView super StringCharView - redef type SELFTYPE: String + redef type SELFTYPE: FlatString redef fun [](index) do @@ -916,6 +991,8 @@ abstract class Buffer redef type SELFVIEW: BufferCharView redef type SELFTYPE: Buffer + var is_dirty = true + # Modifies the char contained at pos `index` # # DEPRECATED : Use self.chars.[]= instead @@ -932,15 +1009,20 @@ abstract class Buffer # Enlarges the subsequent array containing the chars of self fun enlarge(cap: Int) is abstract - # Adds the content of string `s` at the end of self - fun append(s: String) is abstract + # Adds the content of text `s` at the end of self + fun append(s: Text) is abstract + + redef fun hash + do + if is_dirty then hash_cache = null + return super + end end # Mutable strings of characters. class FlatBuffer super FlatText - super StringCapable super Buffer redef type SELFVIEW: FlatBufferCharView @@ -952,6 +1034,7 @@ class FlatBuffer redef fun []=(index, item) do + is_dirty = true if index == length then add(item) return @@ -962,17 +1045,22 @@ class FlatBuffer redef fun add(c) do + is_dirty = true if capacity <= length then enlarge(length + 5) items[length] = c length += 1 end - redef fun clear do length = 0 + redef fun clear do + is_dirty = true + length = 0 + end redef fun empty do return new FlatBuffer redef fun enlarge(cap) do + is_dirty = true var c = capacity if cap <= c then return while c <= cap do c = c * 2 + 2 @@ -985,28 +1073,40 @@ class FlatBuffer redef fun to_s: String do - var l = length - var a = calloc_string(l+1) - items.copy_to(a, l, 0, 0) - - # Ensure the afterlast byte is '\0' to nul-terminated char * - a[length] = '\0' - - return a.to_s_with_length(length) + return to_cstring.to_s_with_length(length) end - # Create a new empty string. - init + redef fun to_cstring do - with_capacity(5) + if is_dirty then + var new_native = calloc_string(length + 1) + new_native[length] = '\0' + items.copy_to(new_native, length, 0, 0) + real_items = new_native + is_dirty = false + end + return real_items.as(not null) end - init from(s: String) + # Create a new empty string. + init do with_capacity(5) + + init from(s: Text) do capacity = s.length + 1 length = s.length items = calloc_string(capacity) - s.items.copy_to(items, length, s.index_from, 0) + if s isa FlatString then + s.items.copy_to(items, length, s.index_from, 0) + else if s isa FlatBuffer then + s.items.copy_to(items, length, 0, 0) + else + var curr_pos = 0 + for i in s.chars do + items[curr_pos] = i + curr_pos += 1 + end + end end # Create a new empty string with a given capacity. @@ -1021,9 +1121,20 @@ class FlatBuffer redef fun append(s) do + is_dirty = true var sl = s.length if capacity < length + sl then enlarge(length + sl) - s.items.copy_to(items, sl, s.index_from, length) + if s isa FlatString then + s.items.copy_to(items, sl, s.index_from, length) + else if s isa FlatBuffer then + s.items.copy_to(items, sl, 0, length) + else + var curr_pos = self.length + for i in s.chars do + items[curr_pos] = i + curr_pos += 1 + end + end length += sl end @@ -1054,6 +1165,34 @@ class FlatBuffer return new FlatBuffer end end + + redef fun reversed + do + var new_buf = new FlatBuffer.with_capacity(self.length) + var reviter = self.chars.reverse_iterator + while reviter.is_ok do + new_buf.add(reviter.item) + reviter.next + end + return new_buf + end + + redef fun +(other) + do + var new_buf = new FlatBuffer.with_capacity(self.length + other.length) + new_buf.append(self) + new_buf.append(other) + return new_buf + end + + redef fun *(repeats) + do + var new_buf = new FlatBuffer.with_capacity(self.length * repeats) + for i in [0..repeats[ do + new_buf.append(self) + end + return new_buf + end end private class FlatBufferReverseIterator @@ -1465,18 +1604,18 @@ class NativeString return to_s_with_length(cstring_length) end - fun to_s_with_length(length: Int): String + fun to_s_with_length(length: Int): FlatString do assert length >= 0 - return new String.with_infos(self, length, 0, length - 1) + return new FlatString.with_infos(self, length, 0, length - 1) end - fun to_s_with_copy: String + fun to_s_with_copy: FlatString do var length = cstring_length var new_self = calloc_string(length + 1) copy_to(new_self, length, 0, 0) - return new String.with_infos(new_self, length, 0, length - 1) + return new FlatString.with_infos(new_self, length, 0, length - 1) end end