stdlib/strings: Added flatten service.
[nit.git] / lib / standard / string.nit
index cc24265..e47399b 100644 (file)
@@ -27,9 +27,19 @@ intrude import collection # FIXME should be collection::array
 
 # High-level abstraction for all text representations
 abstract class Text
+       super Comparable
+       super StringCapable
+
+       redef type OTHER: Text
+
+       # Type of the view on self (.chars)
+       type SELFVIEW: StringCharView
+
+       # Type of self (used for factorization of several methods, ex : substring_from, empty...)
+       type SELFTYPE: Text
 
        # Gets a view on the chars of the Text object
-       fun chars: StringCharView is abstract
+       fun chars: SELFVIEW is abstract
 
        # Number of characters contained in self.
        fun length: Int is abstract
@@ -44,13 +54,22 @@ abstract class Text
        # A `from` index < 0 will be replaced by 0.
        # Unless a `count` value is > 0 at the same time.
        # In this case, `from += count` and `count -= from`.
-       fun substring(from: Int, count: Int): String is abstract
+       fun substring(from: Int, count: Int): SELFTYPE is abstract
+
+       # Concatenates `o` to `self`
+       fun +(o: Text): SELFTYPE is abstract
+
+       # Auto-concatenates self `i` times
+       fun *(i: Int): SELFTYPE is abstract
 
        # Is the current Text empty (== "")
        #       assert "".is_empty
        #       assert not "foo".is_empty
        fun is_empty: Bool do return self.length == 0
 
+       # Returns an empty Text of the right type
+       fun empty: SELFTYPE is abstract
+
        # Gets the first char of the Text
        #
        # DEPRECATED : Use self.chars.first instead
@@ -148,9 +167,10 @@ abstract class Text
        #     assert "abcd".substring_from(2)    ==  "cd"
        #
        # As with substring, a `from` index < 0 will be replaced by 0
-       fun substring_from(from: Int): String
+       fun substring_from(from: Int): SELFTYPE
        do
-               assert from < length
+               if from > self.length then return empty
+               if from < 0 then from = 0
                return substring(from, length - from)
        end
 
@@ -265,21 +285,35 @@ abstract class Text
        # A upper case version of `self`
        #
        #     assert "Hello World!".to_upper     == "HELLO WORLD!"
-       fun to_upper: String
-       do
-               var s = new Buffer.with_capacity(length)
-               for i in self.chars do s.add(i.to_upper)
-               return s.to_s
-       end
+       fun to_upper: SELFTYPE is abstract
 
        # A lower case version of `self`
        #
        #     assert "Hello World!".to_lower     == "hello world!"
-       fun to_lower : String
+       fun to_lower : SELFTYPE is abstract
+
+       # Removes the whitespaces at the beginning of self
+       fun l_trim: SELFTYPE
        do
-               var s = new Buffer.with_capacity(length)
-               for i in self.chars do s.add(i.to_lower)
-               return s.to_s
+               var iter = self.chars.iterator
+               while iter.is_ok do
+                       if iter.item.ascii > 32 then break
+                       iter.next
+               end
+               if iter.index == length then return self.empty
+               return self.substring_from(iter.index)
+       end
+
+       # Removes the whitespaces at the end of self
+       fun r_trim: SELFTYPE
+       do
+               var iter = self.chars.reverse_iterator
+               while iter.is_ok do
+                       if iter.item.ascii > 32 then break
+                       iter.next
+               end
+               if iter.index == length then return self.empty
+               return self.substring(0, iter.index + 1)
        end
 
        # Trims trailing and preceding white spaces
@@ -287,28 +321,12 @@ abstract class Text
        #
        #     assert "  Hello  World !  ".trim   == "Hello  World !"
        #     assert "\na\nb\tc\t".trim          == "a\nb\tc"
-       fun trim: String
-       do
-               if self.length == 0 then return self.to_s
-               # find position of the first non white space char (ascii < 32) from the start of the string
-               var start_pos = 0
-               while self.chars[start_pos].ascii <= 32 do
-                       start_pos += 1
-                       if start_pos == length then return ""
-               end
-               # find position of the first non white space char from the end of the string
-               var end_pos = length - 1
-               while self.chars[end_pos].ascii <= 32 do
-                       end_pos -= 1
-                       if end_pos == start_pos then return self.chars[start_pos].to_s
-               end
-               return self.substring(start_pos, end_pos - start_pos + 1)
-       end
+       fun trim: SELFTYPE do return (self.l_trim).r_trim
 
        # Mangle a string to be a unique string only made of alphanumeric characters
        fun to_cmangle: String
        do
-               var res = new Buffer
+               var res = new FlatBuffer
                var underscore = false
                for c in self.chars do
                        if (c >= 'a' and c <= 'z') or (c >='A' and c <= 'Z') then
@@ -342,7 +360,7 @@ abstract class Text
        #     assert "\n\"'\\".escape_to_c         == "\\n\\\"\\'\\\\"
        fun escape_to_c: String
        do
-               var b = new Buffer
+               var b = new FlatBuffer
                for c in self.chars do
                        if c == '\n' then
                                b.append("\\n")
@@ -369,7 +387,7 @@ abstract class Text
        #     assert "ab|\{\}".escape_more_to_c("|\{\}") == "ab\\|\\\{\\\}"
        fun escape_more_to_c(chars: String): String
        do
-               var b = new Buffer
+               var b = new FlatBuffer
                for c in escape_to_c do
                        if chars.chars.has(c) then
                                b.add('\\')
@@ -394,7 +412,7 @@ abstract class Text
        #     assert u[0].ascii      ==  10 # (the ASCII value of the "new line" character)
        fun unescape_nit: String
        do
-               var res = new Buffer.with_capacity(self.length)
+               var res = new FlatBuffer.with_capacity(self.length)
                var was_slash = false
                for c in self do
                        if not was_slash then
@@ -421,10 +439,27 @@ abstract class Text
                return res.to_s
        end
 
+       redef fun ==(o)
+       do
+               if o == null then return false
+               if not o isa Text then return false
+               if self.is_same_instance(o) then return true
+               if self.length != o.length then return false
+               return self.chars == o.chars
+       end
+
+       redef fun <(o)
+       do
+               return self.chars < o.chars
+       end
+
+       # Flat representation of self
+       fun flatten: FlatText is abstract
+
 end
 
-# Common subclass for String and Buffer
-abstract class AbstractString
+# All kinds of array-based text representations.
+abstract class FlatText
        super Text
 
        private var items: NativeString
@@ -441,15 +476,20 @@ abstract class AbstractString
                        i += 1
                end
        end
+
+       redef fun flatten do return self
 end
 
 # Abstract class for the SequenceRead compatible
 # views on String and Buffer objects
 abstract class StringCharView
        super SequenceRead[Char]
+       super Comparable
 
        type SELFTYPE: Text
 
+       redef type OTHER: StringCharView
+
        private var target: SELFTYPE
 
        private init(tgt: SELFTYPE)
@@ -507,6 +547,36 @@ abstract class StringCharView
                return false
        end
 
+       redef fun ==(other)
+       do
+               if other == null then return false
+               if not other isa StringCharView then return false
+               var other_chars = other.iterator
+               for i in self do
+                       if i != other_chars.item then return false
+                       other_chars.next
+               end
+               return true
+       end
+
+       redef fun <(other)
+       do
+               var self_chars = self.iterator
+               var other_chars = other.iterator
+
+               while self_chars.is_ok and other_chars.is_ok do
+                       if self_chars.item < other_chars.item then return true
+                       if self_chars.item > other_chars.item then return false
+                       self_chars.next
+                       other_chars.next
+               end
+
+               if self_chars.is_ok then
+                       return false
+               else
+                       return true
+               end
+       end
 end
 
 # View on Buffer objects, extends Sequence
@@ -519,13 +589,22 @@ abstract class BufferCharView
 
 end
 
+abstract class String
+       super Text
+
+       redef type SELFTYPE: String
+
+       redef fun to_s do return self
+
+end
+
 # Immutable strings of characters.
-class String
-       super Comparable
-       super AbstractString
-       super StringCapable
+class FlatString
+       super FlatText
+       super String
 
-       redef type OTHER: String
+       redef type SELFTYPE: FlatString
+       redef type SELFVIEW: FlatStringCharView
 
        # Index in _items of the start of the string
        private var index_from: Int
@@ -533,7 +612,7 @@ class String
        # Indes in _items of the last item of the string
        private var index_to: Int
 
-       redef var chars: StringCharView = new FlatStringCharView(self)
+       redef var chars: SELFVIEW = new FlatStringCharView(self)
 
        ################################################
        #       AbstractString specific methods        #
@@ -547,7 +626,7 @@ class String
                return items[index + index_from]
        end
 
-       redef fun substring(from: Int, count: Int): String
+       redef fun substring(from, count)
        do
                assert count >= 0
 
@@ -559,16 +638,18 @@ class String
 
                var realFrom = index_from + from
 
-               if (realFrom + count) > index_to then return new String.with_infos(items, index_to - realFrom + 1, realFrom, index_to)
+               if (realFrom + count) > index_to then return new FlatString.with_infos(items, index_to - realFrom + 1, realFrom, index_to)
 
-               if count == 0 then return ""
+               if count == 0 then return empty
 
                var to = realFrom + count - 1
 
-               return new String.with_infos(items, to - realFrom + 1, realFrom, to)
+               return new FlatString.with_infos(items, to - realFrom + 1, realFrom, to)
        end
 
-       redef fun to_upper: String
+       redef fun empty do return "".as(FlatString)
+
+       redef fun to_upper
        do
                var outstr = calloc_string(self.length + 1)
                var out_index = 0
@@ -588,7 +669,7 @@ class String
                return outstr.to_s_with_length(self.length)
        end
 
-       redef fun to_lower : String
+       redef fun to_lower
        do
                var outstr = calloc_string(self.length + 1)
                var out_index = 0
@@ -608,26 +689,6 @@ class String
                return outstr.to_s_with_length(self.length)
        end
 
-       redef fun trim: String
-       do
-               if self.length == 0 then return self
-               # find position of the first non white space char (ascii < 32) from the start of the string
-               var start_pos = self.index_from
-               while items[start_pos].ascii <= 32 do
-                       start_pos += 1
-                       if start_pos == index_to + 1 then return ""
-               end
-               # find position of the first non white space char from the end of the string
-               var end_pos = index_to
-               while items[end_pos].ascii <= 32 do
-                       end_pos -= 1
-                       if end_pos == start_pos then return items[start_pos].to_s
-               end
-               start_pos -= index_from
-               end_pos -= index_from
-               return self.substring(start_pos, end_pos - start_pos + 1)
-       end
-
        redef fun output
        do
                var i = self.index_from
@@ -664,7 +725,7 @@ class String
 
        redef fun ==(other)
        do
-               if not other isa String then return false
+               if not other isa FlatString then return super
 
                if self.object_id == other.object_id then return true
 
@@ -694,6 +755,8 @@ class String
        #     assert ("aa" < "b")      ==  true
        redef fun <(other)
        do
+               if not other isa FlatString then return super
+
                if self.object_id == other.object_id then return false
 
                var my_curr_char : Char
@@ -729,7 +792,7 @@ class String
        # The concatenation of `self` with `s`
        #
        #     assert "hello " + "world!"         == "hello world!"
-       fun +(s: String): String
+       redef fun +(s)
        do
                var my_length = self.length
                var its_length = s.length
@@ -739,19 +802,27 @@ class String
                var target_string = calloc_string(my_length + its_length + 1)
 
                self.items.copy_to(target_string, my_length, index_from, 0)
-               s.items.copy_to(target_string, its_length, s.index_from, my_length)
+               if s isa FlatString then
+                       s.items.copy_to(target_string, its_length, s.index_from, my_length)
+               else if s isa FlatBuffer then
+                       s.items.copy_to(target_string, its_length, 0, my_length)
+               else
+                       var curr_pos = my_length
+                       for i in s.chars do
+                               target_string[curr_pos] = i
+                               curr_pos += 1
+                       end
+               end
 
                target_string[total_length] = '\0'
 
                return target_string.to_s_with_length(total_length)
        end
 
-       # `i` repetitions of `self`
-       #
        #     assert "abc"*3           == "abcabcabc"
        #     assert "abc"*1           == "abc"
        #     assert "abc"*0           == ""
-       fun *(i: Int): String
+       redef fun *(i)
        do
                assert i >= 0
 
@@ -775,8 +846,6 @@ class String
                return target_string.to_s_with_length(final_length)
        end
 
-       redef fun to_s do return self
-
        redef fun hash
        do
                # djb2 hash algorythm
@@ -800,13 +869,13 @@ end
 private class FlatStringReverseIterator
        super IndexedIterator[Char]
 
-       var target: String
+       var target: FlatString
 
        var target_items: NativeString
 
        var curr_pos: Int
 
-       init with_pos(tgt: String, pos: Int)
+       init with_pos(tgt: FlatString, pos: Int)
        do
                target = tgt
                target_items = tgt.items
@@ -826,13 +895,13 @@ end
 private class FlatStringIterator
        super IndexedIterator[Char]
 
-       var target: String
+       var target: FlatString
 
        var target_items: NativeString
 
        var curr_pos: Int
 
-       init with_pos(tgt: String, pos: Int)
+       init with_pos(tgt: FlatString, pos: Int)
        do
                target = tgt
                target_items = tgt.items
@@ -852,7 +921,7 @@ end
 private class FlatStringCharView
        super StringCharView
 
-       redef type SELFTYPE: String
+       redef type SELFTYPE: FlatString
 
        redef fun [](index)
        do
@@ -869,22 +938,46 @@ private class FlatStringCharView
 
 end
 
+abstract class Buffer
+       super Text
+
+       redef type SELFVIEW: BufferCharView
+       redef type SELFTYPE: Buffer
+
+       # Modifies the char contained at pos `index`
+       #
+       # DEPRECATED : Use self.chars.[]= instead
+       fun []=(index: Int, item: Char) is abstract
+
+       # Adds a char `c` at the end of self
+       #
+       # DEPRECATED : Use self.chars.add instead
+       fun add(c: Char) is abstract
+
+       # Clears the buffer
+       fun clear is abstract
+
+       # Enlarges the subsequent array containing the chars of self
+       fun enlarge(cap: Int) is abstract
+
+       # Adds the content of text `s` at the end of self
+       fun append(s: Text) is abstract
+
+end
+
 # Mutable strings of characters.
-class Buffer
-       super AbstractString
-       super Comparable
-       super StringCapable
+class FlatBuffer
+       super FlatText
+       super Buffer
 
-       redef type OTHER: String
+       redef type SELFVIEW: FlatBufferCharView
+       redef type SELFTYPE: FlatBuffer
 
-       redef var chars: BufferCharView = new FlatBufferCharView(self)
+       redef var chars: SELFVIEW = new FlatBufferCharView(self)
 
        var capacity: Int
 
-       # Modifies the char contained at pos `index`
-       #
-       # DEPRECATED : Use self.chars.[]= instead
-       fun []=(index: Int, item: Char)
+       redef fun []=(index, item)
        do
                if index == length then
                        add(item)
@@ -894,21 +987,18 @@ class Buffer
                items[index] = item
        end
 
-       # Adds a char `c` at the end of self
-       #
-       # DEPRECATED : Use self.chars.add instead
-       fun add(c: Char)
+       redef fun add(c)
        do
                if capacity <= length then enlarge(length + 5)
                items[length] = c
                length += 1
        end
 
-       # Clears the buffer
-       fun clear do length = 0
+       redef fun clear do length = 0
 
-       # Enlarges the subsequent array containing the chars of self
-       fun enlarge(cap: Int)
+       redef fun empty do return new FlatBuffer
+
+       redef fun enlarge(cap)
        do
                var c = capacity
                if cap <= c then return
@@ -932,40 +1022,28 @@ class Buffer
                return a.to_s_with_length(length)
        end
 
-       redef fun <(s)
-       do
-               var i = 0
-               var l1 = length
-               var l2 = s.length
-               while i < l1 and i < l2 do
-                       var c1 = self.chars[i].ascii
-                       var c2 = s.chars[i].ascii
-                       if c1 < c2 then
-                               return true
-                       else if c2 < c1 then
-                               return false
-                       end
-                       i += 1
-               end
-               if l1 < l2 then
-                       return true
-               else
-                       return false
-               end
-       end
-
        # Create a new empty string.
        init
        do
                with_capacity(5)
        end
 
-       init from(s: String)
+       init from(s: Text)
        do
                capacity = s.length + 1
                length = s.length
                items = calloc_string(capacity)
-               s.items.copy_to(items, length, s.index_from, 0)
+               if s isa FlatString then
+                       s.items.copy_to(items, length, s.index_from, 0)
+               else if s isa FlatBuffer then
+                       s.items.copy_to(items, length, 0, 0)
+               else
+                       var curr_pos = 0
+                       for i in s.chars do
+                               items[curr_pos] = i
+                               curr_pos += 1
+                       end
+               end
        end
 
        # Create a new empty string with a given capacity.
@@ -978,28 +1056,22 @@ class Buffer
                length = 0
        end
 
-       # Adds the content of string `s` at the end of self
-       fun append(s: String)
+       redef fun append(s)
        do
                var sl = s.length
                if capacity < length + sl then enlarge(length + sl)
-               s.items.copy_to(items, sl, s.index_from, length)
-               length += sl
-       end
-
-       redef fun ==(o)
-       do
-               if not o isa Buffer then return false
-               var l = length
-               if o.length != l then return false
-               var i = 0
-               var it = items
-               var oit = o.items
-               while i < l do
-                       if it[i] != oit[i] then return false
-                       i += 1
+               if s isa FlatString then
+                       s.items.copy_to(items, sl, s.index_from, length)
+               else if s isa FlatBuffer then
+                       s.items.copy_to(items, sl, 0, length)
+               else
+                       var curr_pos = self.length
+                       for i in s.chars do
+                               items[curr_pos] = i
+                               curr_pos += 1
+                       end
                end
-               return true
+               length += sl
        end
 
        # Copies the content of self in `dest`
@@ -1019,28 +1091,45 @@ class Buffer
                if from < 0 then from = 0
                if count > length then count = length
                if from < count then
-                       var r = new Buffer.with_capacity(count - from)
+                       var r = new FlatBuffer.with_capacity(count - from)
                        while from < count do
                                r.chars.push(items[from])
                                from += 1
                        end
-                       return r.to_s
+                       return r
                else
-                       return ""
+                       return new FlatBuffer
                end
        end
+
+       redef fun +(other)
+       do
+               var new_buf = new FlatBuffer.with_capacity(self.length + other.length)
+               new_buf.append(self)
+               new_buf.append(other)
+               return new_buf
+       end
+
+       redef fun *(repeats)
+       do
+               var new_buf = new FlatBuffer.with_capacity(self.length * repeats)
+               for i in [0..repeats[ do
+                       new_buf.append(self)
+               end
+               return new_buf
+       end
 end
 
 private class FlatBufferReverseIterator
        super IndexedIterator[Char]
 
-       var target: Buffer
+       var target: FlatBuffer
 
        var target_items: NativeString
 
        var curr_pos: Int
 
-       init with_pos(tgt: Buffer, pos: Int)
+       init with_pos(tgt: FlatBuffer, pos: Int)
        do
                target = tgt
                target_items = tgt.items
@@ -1061,7 +1150,7 @@ private class FlatBufferCharView
        super BufferCharView
        super StringCapable
 
-       redef type SELFTYPE: Buffer
+       redef type SELFTYPE: FlatBuffer
 
        redef fun [](index) do return target.items[index]
 
@@ -1106,13 +1195,13 @@ end
 private class FlatBufferIterator
        super IndexedIterator[Char]
 
-       var target: Buffer
+       var target: FlatBuffer
 
        var target_items: NativeString
 
        var curr_pos: Int
 
-       init with_pos(tgt: Buffer, pos: Int)
+       init with_pos(tgt: FlatBuffer, pos: Int)
        do
                target = tgt
                target_items = tgt.items
@@ -1222,7 +1311,7 @@ redef class Int
        fun to_base(base: Int, signed: Bool): String
        do
                var l = digit_count(base)
-               var s = new Buffer.from(" " * l)
+               var s = new FlatBuffer.from(" " * l)
                fill_buffer(s, base, signed)
                return s.to_s
        end
@@ -1297,7 +1386,7 @@ redef class Char
        #     assert 'x'.to_s    == "x"
        redef fun to_s
        do
-               var s = new Buffer.with_capacity(1)
+               var s = new FlatBuffer.with_capacity(1)
                s.chars[0] = self
                return s.to_s
        end
@@ -1331,7 +1420,7 @@ redef class Collection[E]
        # Concatenate elements.
        redef fun to_s
        do
-               var s = new Buffer
+               var s = new FlatBuffer
                for e in self do if e != null then s.append(e.to_s)
                return s.to_s
        end
@@ -1344,7 +1433,7 @@ redef class Collection[E]
        do
                if is_empty then return ""
 
-               var s = new Buffer # Result
+               var s = new FlatBuffer # Result
 
                # Concat first item
                var i = iterator
@@ -1367,7 +1456,7 @@ redef class Array[E]
        # Fast implementation
        redef fun to_s
        do
-               var s = new Buffer
+               var s = new FlatBuffer
                var i = 0
                var l = length
                while i < l do
@@ -1392,7 +1481,7 @@ redef class Map[K,V]
        do
                if is_empty then return ""
 
-               var s = new Buffer # Result
+               var s = new FlatBuffer # Result
 
                # Concat first item
                var i = iterator
@@ -1440,18 +1529,18 @@ class NativeString
                return to_s_with_length(cstring_length)
        end
 
-       fun to_s_with_length(length: Int): String
+       fun to_s_with_length(length: Int): FlatString
        do
                assert length >= 0
-               return new String.with_infos(self, length, 0, length - 1)
+               return new FlatString.with_infos(self, length, 0, length - 1)
        end
 
-       fun to_s_with_copy: String
+       fun to_s_with_copy: FlatString
        do
                var length = cstring_length
                var new_self = calloc_string(length + 1)
                copy_to(new_self, length, 0, 0)
-               return new String.with_infos(new_self, length, 0, length - 1)
+               return new FlatString.with_infos(new_self, length, 0, length - 1)
        end
 
 end