stdlib/strings: Repaired hash for Buffers.
[nit.git] / lib / standard / string.nit
index 1dcbb31..dd5ed1b 100644 (file)
@@ -27,9 +27,21 @@ intrude import collection # FIXME should be collection::array
 
 # High-level abstraction for all text representations
 abstract class Text
+       super Comparable
+       super StringCapable
+
+       redef type OTHER: Text
+
+       # Type of the view on self (.chars)
+       type SELFVIEW: StringCharView
+
+       # Type of self (used for factorization of several methods, ex : substring_from, empty...)
+       type SELFTYPE: Text
+
+       var hash_cache: nullable Int = null
 
        # Gets a view on the chars of the Text object
-       fun chars: StringCharView is abstract
+       fun chars: SELFVIEW is abstract
 
        # Number of characters contained in self.
        fun length: Int is abstract
@@ -44,13 +56,22 @@ abstract class Text
        # A `from` index < 0 will be replaced by 0.
        # Unless a `count` value is > 0 at the same time.
        # In this case, `from += count` and `count -= from`.
-       fun substring(from: Int, count: Int): String is abstract
+       fun substring(from: Int, count: Int): SELFTYPE is abstract
+
+       # Concatenates `o` to `self`
+       fun +(o: Text): SELFTYPE is abstract
+
+       # Auto-concatenates self `i` times
+       fun *(i: Int): SELFTYPE is abstract
 
        # Is the current Text empty (== "")
        #       assert "".is_empty
        #       assert not "foo".is_empty
        fun is_empty: Bool do return self.length == 0
 
+       # Returns an empty Text of the right type
+       fun empty: SELFTYPE is abstract
+
        # Gets the first char of the Text
        #
        # DEPRECATED : Use self.chars.first instead
@@ -102,6 +123,9 @@ abstract class Text
                return last_index_of_from(c, length - 1)
        end
 
+       # Return a null terminated char *
+       fun to_cstring: NativeString do return flatten.to_cstring
+
        # The index of the last occurrence of an element starting from pos (in reverse order).
        # Example :
        #               assert "/etc/bin/test/test.nit".last_index_of_from('/', length-1) == 13
@@ -148,12 +172,16 @@ abstract class Text
        #     assert "abcd".substring_from(2)    ==  "cd"
        #
        # As with substring, a `from` index < 0 will be replaced by 0
-       fun substring_from(from: Int): String
+       fun substring_from(from: Int): SELFTYPE
        do
-               assert from < length
+               if from > self.length then return empty
+               if from < 0 then from = 0
                return substring(from, length - from)
        end
 
+       # Returns a reversed version of self
+       fun reversed: SELFTYPE is abstract
+
        # Does self have a substring `str` starting from position `pos`?
        #
        #     assert "abcd".has_substring("bc",1)            ==  true
@@ -265,21 +293,35 @@ abstract class Text
        # A upper case version of `self`
        #
        #     assert "Hello World!".to_upper     == "HELLO WORLD!"
-       fun to_upper: String
-       do
-               var s = new FlatBuffer.with_capacity(length)
-               for i in self.chars do s.add(i.to_upper)
-               return s.to_s
-       end
+       fun to_upper: SELFTYPE is abstract
 
        # A lower case version of `self`
        #
        #     assert "Hello World!".to_lower     == "hello world!"
-       fun to_lower : String
+       fun to_lower : SELFTYPE is abstract
+
+       # Removes the whitespaces at the beginning of self
+       fun l_trim: SELFTYPE
        do
-               var s = new FlatBuffer.with_capacity(length)
-               for i in self.chars do s.add(i.to_lower)
-               return s.to_s
+               var iter = self.chars.iterator
+               while iter.is_ok do
+                       if iter.item.ascii > 32 then break
+                       iter.next
+               end
+               if iter.index == length then return self.empty
+               return self.substring_from(iter.index)
+       end
+
+       # Removes the whitespaces at the end of self
+       fun r_trim: SELFTYPE
+       do
+               var iter = self.chars.reverse_iterator
+               while iter.is_ok do
+                       if iter.item.ascii > 32 then break
+                       iter.next
+               end
+               if iter.index == length then return self.empty
+               return self.substring(0, iter.index + 1)
        end
 
        # Trims trailing and preceding white spaces
@@ -287,23 +329,7 @@ abstract class Text
        #
        #     assert "  Hello  World !  ".trim   == "Hello  World !"
        #     assert "\na\nb\tc\t".trim          == "a\nb\tc"
-       fun trim: String
-       do
-               if self.length == 0 then return self.to_s
-               # find position of the first non white space char (ascii < 32) from the start of the string
-               var start_pos = 0
-               while self.chars[start_pos].ascii <= 32 do
-                       start_pos += 1
-                       if start_pos == length then return ""
-               end
-               # find position of the first non white space char from the end of the string
-               var end_pos = length - 1
-               while self.chars[end_pos].ascii <= 32 do
-                       end_pos -= 1
-                       if end_pos == start_pos then return self.chars[start_pos].to_s
-               end
-               return self.substring(start_pos, end_pos - start_pos + 1)
-       end
+       fun trim: SELFTYPE do return (self.l_trim).r_trim
 
        # Mangle a string to be a unique string only made of alphanumeric characters
        fun to_cmangle: String
@@ -421,6 +447,40 @@ abstract class Text
                return res.to_s
        end
 
+       redef fun ==(o)
+       do
+               if o == null then return false
+               if not o isa Text then return false
+               if self.is_same_instance(o) then return true
+               if self.length != o.length then return false
+               return self.chars == o.chars
+       end
+
+       redef fun <(o)
+       do
+               return self.chars < o.chars
+       end
+
+       # Flat representation of self
+       fun flatten: FlatText is abstract
+
+       redef fun hash
+       do
+               if hash_cache == null then
+                       # djb2 hash algorithm
+                       var h = 5381
+                       var i = length - 1
+
+                       for char in self.chars do
+                               h = (h * 32) + h + char.ascii
+                               i -= 1
+                       end
+
+                       hash_cache = h
+               end
+               return hash_cache.as(not null)
+       end
+
 end
 
 # All kinds of array-based text representations.
@@ -429,6 +489,9 @@ abstract class FlatText
 
        private var items: NativeString
 
+       # Real items, used as cache for to_cstring is called
+       private var real_items: nullable NativeString = null
+
        redef var length: Int
 
        init do end
@@ -441,15 +504,20 @@ abstract class FlatText
                        i += 1
                end
        end
+
+       redef fun flatten do return self
 end
 
 # Abstract class for the SequenceRead compatible
 # views on String and Buffer objects
 abstract class StringCharView
        super SequenceRead[Char]
+       super Comparable
 
        type SELFTYPE: Text
 
+       redef type OTHER: StringCharView
+
        private var target: SELFTYPE
 
        private init(tgt: SELFTYPE)
@@ -507,6 +575,36 @@ abstract class StringCharView
                return false
        end
 
+       redef fun ==(other)
+       do
+               if other == null then return false
+               if not other isa StringCharView then return false
+               var other_chars = other.iterator
+               for i in self do
+                       if i != other_chars.item then return false
+                       other_chars.next
+               end
+               return true
+       end
+
+       redef fun <(other)
+       do
+               var self_chars = self.iterator
+               var other_chars = other.iterator
+
+               while self_chars.is_ok and other_chars.is_ok do
+                       if self_chars.item < other_chars.item then return true
+                       if self_chars.item > other_chars.item then return false
+                       self_chars.next
+                       other_chars.next
+               end
+
+               if self_chars.is_ok then
+                       return false
+               else
+                       return true
+               end
+       end
 end
 
 # View on Buffer objects, extends Sequence
@@ -519,13 +617,22 @@ abstract class BufferCharView
 
 end
 
+abstract class String
+       super Text
+
+       redef type SELFTYPE: String
+
+       redef fun to_s do return self
+
+end
+
 # Immutable strings of characters.
-class String
-       super Comparable
+class FlatString
        super FlatText
-       super StringCapable
+       super String
 
-       redef type OTHER: String
+       redef type SELFTYPE: FlatString
+       redef type SELFVIEW: FlatStringCharView
 
        # Index in _items of the start of the string
        private var index_from: Int
@@ -533,7 +640,7 @@ class String
        # Indes in _items of the last item of the string
        private var index_to: Int
 
-       redef var chars: StringCharView = new FlatStringCharView(self)
+       redef var chars: SELFVIEW = new FlatStringCharView(self)
 
        ################################################
        #       AbstractString specific methods        #
@@ -547,7 +654,20 @@ class String
                return items[index + index_from]
        end
 
-       redef fun substring(from: Int, count: Int): String
+       redef fun reversed
+       do
+               var native = calloc_string(self.length + 1)
+               var reviter = chars.reverse_iterator
+               var pos = 0
+               while reviter.is_ok do
+                       native[pos] = reviter.item
+                       pos += 1
+                       reviter.next
+               end
+               return native.to_s_with_length(self.length)
+       end
+
+       redef fun substring(from, count)
        do
                assert count >= 0
 
@@ -559,16 +679,18 @@ class String
 
                var realFrom = index_from + from
 
-               if (realFrom + count) > index_to then return new String.with_infos(items, index_to - realFrom + 1, realFrom, index_to)
+               if (realFrom + count) > index_to then return new FlatString.with_infos(items, index_to - realFrom + 1, realFrom, index_to)
 
-               if count == 0 then return ""
+               if count == 0 then return empty
 
                var to = realFrom + count - 1
 
-               return new String.with_infos(items, to - realFrom + 1, realFrom, to)
+               return new FlatString.with_infos(items, to - realFrom + 1, realFrom, to)
        end
 
-       redef fun to_upper: String
+       redef fun empty do return "".as(FlatString)
+
+       redef fun to_upper
        do
                var outstr = calloc_string(self.length + 1)
                var out_index = 0
@@ -588,7 +710,7 @@ class String
                return outstr.to_s_with_length(self.length)
        end
 
-       redef fun to_lower : String
+       redef fun to_lower
        do
                var outstr = calloc_string(self.length + 1)
                var out_index = 0
@@ -608,26 +730,6 @@ class String
                return outstr.to_s_with_length(self.length)
        end
 
-       redef fun trim: String
-       do
-               if self.length == 0 then return self
-               # find position of the first non white space char (ascii < 32) from the start of the string
-               var start_pos = self.index_from
-               while items[start_pos].ascii <= 32 do
-                       start_pos += 1
-                       if start_pos == index_to + 1 then return ""
-               end
-               # find position of the first non white space char from the end of the string
-               var end_pos = index_to
-               while items[end_pos].ascii <= 32 do
-                       end_pos -= 1
-                       if end_pos == start_pos then return items[start_pos].to_s
-               end
-               start_pos -= index_from
-               end_pos -= index_from
-               return self.substring(start_pos, end_pos - start_pos + 1)
-       end
-
        redef fun output
        do
                var i = self.index_from
@@ -651,12 +753,14 @@ class String
        end
 
        # Return a null terminated char *
-       fun to_cstring: NativeString
+       redef fun to_cstring: NativeString
        do
+               if real_items != null then return real_items.as(not null)
                if index_from > 0 or index_to != items.cstring_length - 1 then
                        var newItems = calloc_string(length + 1)
                        self.items.copy_to(newItems, length, index_from, 0)
                        newItems[length] = '\0'
+                       self.real_items = newItems
                        return newItems
                end
                return items
@@ -664,7 +768,7 @@ class String
 
        redef fun ==(other)
        do
-               if not other isa String then return false
+               if not other isa FlatString then return super
 
                if self.object_id == other.object_id then return true
 
@@ -694,6 +798,8 @@ class String
        #     assert ("aa" < "b")      ==  true
        redef fun <(other)
        do
+               if not other isa FlatString then return super
+
                if self.object_id == other.object_id then return false
 
                var my_curr_char : Char
@@ -729,7 +835,7 @@ class String
        # The concatenation of `self` with `s`
        #
        #     assert "hello " + "world!"         == "hello world!"
-       fun +(s: String): String
+       redef fun +(s)
        do
                var my_length = self.length
                var its_length = s.length
@@ -739,19 +845,27 @@ class String
                var target_string = calloc_string(my_length + its_length + 1)
 
                self.items.copy_to(target_string, my_length, index_from, 0)
-               s.items.copy_to(target_string, its_length, s.index_from, my_length)
+               if s isa FlatString then
+                       s.items.copy_to(target_string, its_length, s.index_from, my_length)
+               else if s isa FlatBuffer then
+                       s.items.copy_to(target_string, its_length, 0, my_length)
+               else
+                       var curr_pos = my_length
+                       for i in s.chars do
+                               target_string[curr_pos] = i
+                               curr_pos += 1
+                       end
+               end
 
                target_string[total_length] = '\0'
 
                return target_string.to_s_with_length(total_length)
        end
 
-       # `i` repetitions of `self`
-       #
        #     assert "abc"*3           == "abcabcabc"
        #     assert "abc"*1           == "abc"
        #     assert "abc"*0           == ""
-       fun *(i: Int): String
+       redef fun *(i)
        do
                assert i >= 0
 
@@ -775,38 +889,40 @@ class String
                return target_string.to_s_with_length(final_length)
        end
 
-       redef fun to_s do return self
-
        redef fun hash
        do
-               # djb2 hash algorythm
-               var h = 5381
-               var i = length - 1
+               if hash_cache == null then
+                       # djb2 hash algorythm
+                       var h = 5381
+                       var i = length - 1
 
-               var myitems = items
-               var strStart = index_from
+                       var myitems = items
+                       var strStart = index_from
 
-               i += strStart
+                       i += strStart
 
-               while i >= strStart do
-                       h = (h * 32) + h + self.items[i].ascii
-                       i -= 1
+                       while i >= strStart do
+                               h = (h * 32) + h + self.items[i].ascii
+                               i -= 1
+                       end
+
+                       hash_cache = h
                end
 
-               return h
+               return hash_cache.as(not null)
        end
 end
 
 private class FlatStringReverseIterator
        super IndexedIterator[Char]
 
-       var target: String
+       var target: FlatString
 
        var target_items: NativeString
 
        var curr_pos: Int
 
-       init with_pos(tgt: String, pos: Int)
+       init with_pos(tgt: FlatString, pos: Int)
        do
                target = tgt
                target_items = tgt.items
@@ -826,13 +942,13 @@ end
 private class FlatStringIterator
        super IndexedIterator[Char]
 
-       var target: String
+       var target: FlatString
 
        var target_items: NativeString
 
        var curr_pos: Int
 
-       init with_pos(tgt: String, pos: Int)
+       init with_pos(tgt: FlatString, pos: Int)
        do
                target = tgt
                target_items = tgt.items
@@ -852,7 +968,7 @@ end
 private class FlatStringCharView
        super StringCharView
 
-       redef type SELFTYPE: String
+       redef type SELFTYPE: FlatString
 
        redef fun [](index)
        do
@@ -872,6 +988,11 @@ end
 abstract class Buffer
        super Text
 
+       redef type SELFVIEW: BufferCharView
+       redef type SELFTYPE: Buffer
+
+       var is_dirty = true
+
        # Modifies the char contained at pos `index`
        #
        # DEPRECATED : Use self.chars.[]= instead
@@ -888,27 +1009,32 @@ abstract class Buffer
        # Enlarges the subsequent array containing the chars of self
        fun enlarge(cap: Int) is abstract
 
-       # Adds the content of string `s` at the end of self
-       fun append(s: String) is abstract
+       # Adds the content of text `s` at the end of self
+       fun append(s: Text) is abstract
+
+       redef fun hash
+       do
+               if is_dirty then hash_cache = null
+               return super
+       end
 
-       redef fun chars: BufferCharView is abstract
 end
 
 # Mutable strings of characters.
 class FlatBuffer
        super FlatText
-       super Comparable
-       super StringCapable
        super Buffer
 
-       redef type OTHER: String
+       redef type SELFVIEW: FlatBufferCharView
+       redef type SELFTYPE: FlatBuffer
 
-       redef var chars: FlatBufferCharView = new FlatBufferCharView(self)
+       redef var chars: SELFVIEW = new FlatBufferCharView(self)
 
        var capacity: Int
 
        redef fun []=(index, item)
        do
+               is_dirty = true
                if index == length then
                        add(item)
                        return
@@ -919,15 +1045,22 @@ class FlatBuffer
 
        redef fun add(c)
        do
+               is_dirty = true
                if capacity <= length then enlarge(length + 5)
                items[length] = c
                length += 1
        end
 
-       redef fun clear do length = 0
+       redef fun clear do
+               is_dirty = true
+               length = 0
+       end
+
+       redef fun empty do return new FlatBuffer
 
        redef fun enlarge(cap)
        do
+               is_dirty = true
                var c = capacity
                if cap <= c then return
                while c <= cap do c = c * 2 + 2
@@ -940,36 +1073,19 @@ class FlatBuffer
 
        redef fun to_s: String
        do
-               var l = length
-               var a = calloc_string(l+1)
-               items.copy_to(a, l, 0, 0)
-
-               # Ensure the afterlast byte is '\0' to nul-terminated char *
-               a[length] = '\0'
-
-               return a.to_s_with_length(length)
+               return to_cstring.to_s_with_length(length)
        end
 
-       redef fun <(s)
+       redef fun to_cstring
        do
-               var i = 0
-               var l1 = length
-               var l2 = s.length
-               while i < l1 and i < l2 do
-                       var c1 = self.chars[i].ascii
-                       var c2 = s.chars[i].ascii
-                       if c1 < c2 then
-                               return true
-                       else if c2 < c1 then
-                               return false
-                       end
-                       i += 1
-               end
-               if l1 < l2 then
-                       return true
-               else
-                       return false
+               if is_dirty then
+                       var new_native = calloc_string(length + 1)
+                       new_native[length] = '\0'
+                       items.copy_to(new_native, length, 0, 0)
+                       real_items = new_native
+                       is_dirty = false
                end
+               return real_items.as(not null)
        end
 
        # Create a new empty string.
@@ -978,12 +1094,22 @@ class FlatBuffer
                with_capacity(5)
        end
 
-       init from(s: String)
+       init from(s: Text)
        do
                capacity = s.length + 1
                length = s.length
                items = calloc_string(capacity)
-               s.items.copy_to(items, length, s.index_from, 0)
+               if s isa FlatString then
+                       s.items.copy_to(items, length, s.index_from, 0)
+               else if s isa FlatBuffer then
+                       s.items.copy_to(items, length, 0, 0)
+               else
+                       var curr_pos = 0
+                       for i in s.chars do
+                               items[curr_pos] = i
+                               curr_pos += 1
+                       end
+               end
        end
 
        # Create a new empty string with a given capacity.
@@ -998,25 +1124,21 @@ class FlatBuffer
 
        redef fun append(s)
        do
+               is_dirty = true
                var sl = s.length
                if capacity < length + sl then enlarge(length + sl)
-               s.items.copy_to(items, sl, s.index_from, length)
-               length += sl
-       end
-
-       redef fun ==(o)
-       do
-               if not o isa FlatBuffer then return false
-               var l = length
-               if o.length != l then return false
-               var i = 0
-               var it = items
-               var oit = o.items
-               while i < l do
-                       if it[i] != oit[i] then return false
-                       i += 1
+               if s isa FlatString then
+                       s.items.copy_to(items, sl, s.index_from, length)
+               else if s isa FlatBuffer then
+                       s.items.copy_to(items, sl, 0, length)
+               else
+                       var curr_pos = self.length
+                       for i in s.chars do
+                               items[curr_pos] = i
+                               curr_pos += 1
+                       end
                end
-               return true
+               length += sl
        end
 
        # Copies the content of self in `dest`
@@ -1041,10 +1163,38 @@ class FlatBuffer
                                r.chars.push(items[from])
                                from += 1
                        end
-                       return r.to_s
+                       return r
                else
-                       return ""
+                       return new FlatBuffer
+               end
+       end
+
+       redef fun reversed
+       do
+               var new_buf = new FlatBuffer.with_capacity(self.length)
+               var reviter = self.chars.reverse_iterator
+               while reviter.is_ok do
+                       new_buf.add(reviter.item)
+                       reviter.next
+               end
+               return new_buf
+       end
+
+       redef fun +(other)
+       do
+               var new_buf = new FlatBuffer.with_capacity(self.length + other.length)
+               new_buf.append(self)
+               new_buf.append(other)
+               return new_buf
+       end
+
+       redef fun *(repeats)
+       do
+               var new_buf = new FlatBuffer.with_capacity(self.length * repeats)
+               for i in [0..repeats[ do
+                       new_buf.append(self)
                end
+               return new_buf
        end
 end
 
@@ -1457,18 +1607,18 @@ class NativeString
                return to_s_with_length(cstring_length)
        end
 
-       fun to_s_with_length(length: Int): String
+       fun to_s_with_length(length: Int): FlatString
        do
                assert length >= 0
-               return new String.with_infos(self, length, 0, length - 1)
+               return new FlatString.with_infos(self, length, 0, length - 1)
        end
 
-       fun to_s_with_copy: String
+       fun to_s_with_copy: FlatString
        do
                var length = cstring_length
                var new_self = calloc_string(length + 1)
                copy_to(new_self, length, 0, 0)
-               return new String.with_infos(new_self, length, 0, length - 1)
+               return new FlatString.with_infos(new_self, length, 0, length - 1)
        end
 
 end